summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/audio_core/audio_core.cpp16
-rw-r--r--src/audio_core/hle/filter.h2
-rw-r--r--src/audio_core/hle/source.cpp44
-rw-r--r--src/audio_core/hle/source.h2
-rw-r--r--src/audio_core/interpolate.h2
-rw-r--r--src/audio_core/null_sink.h6
-rw-r--r--src/audio_core/sdl2_sink.cpp30
-rw-r--r--src/audio_core/sdl2_sink.h5
-rw-r--r--src/audio_core/sink.h9
-rw-r--r--src/audio_core/sink_details.cpp19
-rw-r--r--src/audio_core/sink_details.h2
-rw-r--r--src/audio_core/time_stretch.h2
-rw-r--r--src/citra/citra.cpp20
-rw-r--r--src/citra/config.cpp16
-rw-r--r--src/citra/default_ini.h20
-rw-r--r--src/citra/emu_window/emu_window_sdl2.cpp4
-rw-r--r--src/citra_qt/CMakeLists.txt1
-rw-r--r--src/citra_qt/bootmanager.cpp4
-rw-r--r--src/citra_qt/config.cpp37
-rw-r--r--src/citra_qt/configure_audio.cpp33
-rw-r--r--src/citra_qt/configure_audio.h3
-rw-r--r--src/citra_qt/configure_audio.ui15
-rw-r--r--src/citra_qt/configure_general.ui2
-rw-r--r--src/citra_qt/configure_input.cpp1
-rw-r--r--src/citra_qt/configure_system.cpp1
-rw-r--r--src/citra_qt/debugger/graphics/graphics_cmdlists.cpp36
-rw-r--r--src/citra_qt/debugger/graphics/graphics_surface.cpp54
-rw-r--r--src/citra_qt/debugger/graphics/graphics_tracing.cpp5
-rw-r--r--src/citra_qt/debugger/graphics/graphics_tracing.h6
-rw-r--r--src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp14
-rw-r--r--src/citra_qt/debugger/graphics/graphics_vertex_shader.h3
-rw-r--r--src/citra_qt/debugger/profiler.cpp111
-rw-r--r--src/citra_qt/debugger/profiler.h40
-rw-r--r--src/citra_qt/debugger/profiler.ui33
-rw-r--r--src/citra_qt/game_list.cpp60
-rw-r--r--src/citra_qt/game_list.h6
-rw-r--r--src/citra_qt/game_list_p.h8
-rw-r--r--src/citra_qt/hotkeys.h2
-rw-r--r--src/citra_qt/main.cpp242
-rw-r--r--src/citra_qt/main.h30
-rw-r--r--src/citra_qt/main.ui65
-rw-r--r--src/citra_qt/ui_settings.h1
-rw-r--r--src/common/CMakeLists.txt30
-rw-r--r--src/common/bit_set.h33
-rw-r--r--src/common/common_paths.h1
-rw-r--r--src/common/file_util.cpp4
-rw-r--r--src/common/hash.cpp8
-rw-r--r--src/common/hash.h5
-rw-r--r--src/common/logging/backend.cpp2
-rw-r--r--src/common/logging/log.h2
-rw-r--r--src/common/profiler.cpp101
-rw-r--r--src/common/profiler_reporting.h83
-rw-r--r--src/common/scm_rev.cpp.in2
-rw-r--r--src/common/scm_rev.h1
-rw-r--r--src/common/synchronized_wrapper.h43
-rw-r--r--src/common/x64/abi.cpp350
-rw-r--r--src/common/x64/abi.h58
-rw-r--r--src/common/x64/cpu_detect.cpp12
-rw-r--r--src/common/x64/emitter.cpp2583
-rw-r--r--src/common/x64/emitter.h1206
-rw-r--r--src/core/CMakeLists.txt27
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic.cpp31
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic.h2
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_cp15.cpp88
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_cp15.h32
-rw-r--r--src/core/arm/dyncom/arm_dyncom_interpreter.cpp8
-rw-r--r--src/core/core.cpp19
-rw-r--r--src/core/core.h9
-rw-r--r--src/core/file_sys/archive_extsavedata.cpp2
-rw-r--r--src/core/file_sys/archive_extsavedata.h2
-rw-r--r--src/core/file_sys/archive_romfs.cpp43
-rw-r--r--src/core/file_sys/archive_sdmc.cpp12
-rw-r--r--src/core/file_sys/archive_selfncch.cpp257
-rw-r--r--src/core/file_sys/archive_selfncch.h (renamed from src/core/file_sys/archive_romfs.h)23
-rw-r--r--src/core/file_sys/errors.h10
-rw-r--r--src/core/file_sys/savedata_archive.cpp12
-rw-r--r--src/core/frontend/camera/blank_camera.cpp31
-rw-r--r--src/core/frontend/camera/blank_camera.h28
-rw-r--r--src/core/frontend/camera/factory.cpp32
-rw-r--r--src/core/frontend/camera/factory.h41
-rw-r--r--src/core/frontend/camera/interface.cpp11
-rw-r--r--src/core/frontend/camera/interface.h61
-rw-r--r--src/core/frontend/emu_window.cpp19
-rw-r--r--src/core/gdbstub/gdbstub.cpp1
-rw-r--r--src/core/hle/applets/applet.cpp5
-rw-r--r--src/core/hle/applets/mint.cpp72
-rw-r--r--src/core/hle/applets/mint.h29
-rw-r--r--src/core/hle/config_mem.cpp13
-rw-r--r--src/core/hle/ipc.h83
-rw-r--r--src/core/hle/ipc_helpers.h275
-rw-r--r--src/core/hle/kernel/server_session.h4
-rw-r--r--src/core/hle/kernel/thread.h1
-rw-r--r--src/core/hle/kernel/timer.cpp41
-rw-r--r--src/core/hle/kernel/timer.h8
-rw-r--r--src/core/hle/result.h4
-rw-r--r--src/core/hle/service/apt/apt.cpp103
-rw-r--r--src/core/hle/service/apt/apt.h40
-rw-r--r--src/core/hle/service/apt/apt_a.cpp4
-rw-r--r--src/core/hle/service/apt/apt_s.cpp4
-rw-r--r--src/core/hle/service/apt/apt_u.cpp4
-rw-r--r--src/core/hle/service/cam/cam.cpp1024
-rw-r--r--src/core/hle/service/cam/cam.h358
-rw-r--r--src/core/hle/service/cam/cam_u.cpp32
-rw-r--r--src/core/hle/service/cfg/cfg.cpp105
-rw-r--r--src/core/hle/service/err_f.cpp2
-rw-r--r--src/core/hle/service/fs/archive.h2
-rw-r--r--src/core/hle/service/fs/fs_user.cpp27
-rw-r--r--src/core/hle/service/gsp_gpu.cpp37
-rw-r--r--src/core/hle/service/hid/hid.cpp7
-rw-r--r--src/core/hle/service/hid/hid.h4
-rw-r--r--src/core/hle/service/ir/ir.cpp94
-rw-r--r--src/core/hle/service/ir/ir.h57
-rw-r--r--src/core/hle/service/ir/ir_rst.cpp37
-rw-r--r--src/core/hle/service/ir/ir_rst.h3
-rw-r--r--src/core/hle/service/ir/ir_u.cpp2
-rw-r--r--src/core/hle/service/ir/ir_user.cpp112
-rw-r--r--src/core/hle/service/ir/ir_user.h3
-rw-r--r--src/core/hle/service/ldr_ro/cro_helper.h4
-rw-r--r--src/core/hle/service/ldr_ro/ldr_ro.cpp1
-rw-r--r--src/core/hle/service/mic_u.cpp8
-rw-r--r--src/core/hle/service/nfc/nfc.cpp105
-rw-r--r--src/core/hle/service/nfc/nfc.h122
-rw-r--r--src/core/hle/service/nfc/nfc_m.cpp21
-rw-r--r--src/core/hle/service/nfc/nfc_u.cpp21
-rw-r--r--src/core/hle/service/nim/nim.cpp2
-rw-r--r--src/core/hle/service/service.h1
-rw-r--r--src/core/hle/service/y2r_u.cpp47
-rw-r--r--src/core/hle/svc.cpp25
-rw-r--r--src/core/hw/aes/arithmetic128.cpp47
-rw-r--r--src/core/hw/aes/arithmetic128.h17
-rw-r--r--src/core/hw/aes/ccm.cpp95
-rw-r--r--src/core/hw/aes/ccm.h40
-rw-r--r--src/core/hw/aes/key.cpp173
-rw-r--r--src/core/hw/aes/key.h35
-rw-r--r--src/core/hw/gpu.cpp41
-rw-r--r--src/core/hw/gpu.h2
-rw-r--r--src/core/hw/hw.cpp2
-rw-r--r--src/core/loader/3dsx.cpp6
-rw-r--r--src/core/loader/loader.h2
-rw-r--r--src/core/loader/ncch.cpp6
-rw-r--r--src/core/perf_stats.cpp105
-rw-r--r--src/core/perf_stats.h83
-rw-r--r--src/core/settings.cpp2
-rw-r--r--src/core/settings.h6
-rw-r--r--src/video_core/CMakeLists.txt60
-rw-r--r--src/video_core/command_processor.cpp133
-rw-r--r--src/video_core/debug_utils/debug_utils.cpp301
-rw-r--r--src/video_core/debug_utils/debug_utils.h43
-rw-r--r--src/video_core/pica.cpp489
-rw-r--r--src/video_core/pica.h1405
-rw-r--r--src/video_core/pica_state.h6
-rw-r--r--src/video_core/primitive_assembly.cpp20
-rw-r--r--src/video_core/primitive_assembly.h14
-rw-r--r--src/video_core/rasterizer.h20
-rw-r--r--src/video_core/regs.cpp488
-rw-r--r--src/video_core/regs.h142
-rw-r--r--src/video_core/regs_framebuffer.h284
-rw-r--r--src/video_core/regs_lighting.h294
-rw-r--r--src/video_core/regs_pipeline.h230
-rw-r--r--src/video_core/regs_rasterizer.h129
-rw-r--r--src/video_core/regs_shader.h104
-rw-r--r--src/video_core/regs_texturing.h328
-rw-r--r--src/video_core/renderer_base.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp354
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h56
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp32
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h20
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp105
-rw-r--r--src/video_core/renderer_opengl/pica_to_gl.h20
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp17
-rw-r--r--src/video_core/shader/shader.cpp141
-rw-r--r--src/video_core/shader/shader.h123
-rw-r--r--src/video_core/shader/shader_interpreter.cpp49
-rw-r--r--src/video_core/shader/shader_interpreter.h26
-rw-r--r--src/video_core/shader/shader_jit_x64.cpp890
-rw-r--r--src/video_core/shader/shader_jit_x64.h115
-rw-r--r--src/video_core/shader/shader_jit_x64_compiler.cpp897
-rw-r--r--src/video_core/shader/shader_jit_x64_compiler.h125
-rw-r--r--src/video_core/swrasterizer/clipper.cpp (renamed from src/video_core/clipper.cpp)37
-rw-r--r--src/video_core/swrasterizer/clipper.h (renamed from src/video_core/clipper.h)0
-rw-r--r--src/video_core/swrasterizer/framebuffer.cpp358
-rw-r--r--src/video_core/swrasterizer/framebuffer.h29
-rw-r--r--src/video_core/swrasterizer/rasterizer.cpp (renamed from src/video_core/rasterizer.cpp)732
-rw-r--r--src/video_core/swrasterizer/rasterizer.h48
-rw-r--r--src/video_core/swrasterizer/swrasterizer.cpp (renamed from src/video_core/swrasterizer.cpp)4
-rw-r--r--src/video_core/swrasterizer/swrasterizer.h (renamed from src/video_core/swrasterizer.h)0
-rw-r--r--src/video_core/swrasterizer/texturing.cpp228
-rw-r--r--src/video_core/swrasterizer/texturing.h28
-rw-r--r--src/video_core/texture/etc1.cpp122
-rw-r--r--src/video_core/texture/etc1.h16
-rw-r--r--src/video_core/texture/texture_decode.cpp227
-rw-r--r--src/video_core/texture/texture_decode.h60
-rw-r--r--src/video_core/vertex_loader.cpp22
-rw-r--r--src/video_core/vertex_loader.h12
194 files changed, 9433 insertions, 9904 deletions
diff --git a/src/audio_core/audio_core.cpp b/src/audio_core/audio_core.cpp
index ba6acf28e..84f9c03a7 100644
--- a/src/audio_core/audio_core.cpp
+++ b/src/audio_core/audio_core.cpp
@@ -56,20 +56,8 @@ void AddAddressSpace(Kernel::VMManager& address_space) {
56} 56}
57 57
58void SelectSink(std::string sink_id) { 58void SelectSink(std::string sink_id) {
59 auto iter = 59 const SinkDetails& sink_details = GetSinkDetails(sink_id);
60 std::find_if(g_sink_details.begin(), g_sink_details.end(), 60 DSP::HLE::SetSink(sink_details.factory());
61 [sink_id](const auto& sink_detail) { return sink_detail.id == sink_id; });
62
63 if (sink_id == "auto" || iter == g_sink_details.end()) {
64 if (sink_id != "auto") {
65 LOG_ERROR(Audio, "AudioCore::SelectSink given invalid sink_id %s", sink_id.c_str());
66 }
67 // Auto-select.
68 // g_sink_details is ordered in terms of desirability, with the best choice at the front.
69 iter = g_sink_details.begin();
70 }
71
72 DSP::HLE::SetSink(iter->factory());
73} 61}
74 62
75void EnableStretching(bool enable) { 63void EnableStretching(bool enable) {
diff --git a/src/audio_core/hle/filter.h b/src/audio_core/hle/filter.h
index 4281a5898..5350e2857 100644
--- a/src/audio_core/hle/filter.h
+++ b/src/audio_core/hle/filter.h
@@ -27,7 +27,7 @@ public:
27 * See also: SourceConfiguration::Configuration::simple_filter_enabled, 27 * See also: SourceConfiguration::Configuration::simple_filter_enabled,
28 * SourceConfiguration::Configuration::biquad_filter_enabled. 28 * SourceConfiguration::Configuration::biquad_filter_enabled.
29 * @param simple If true, enables the simple filter. If false, disables it. 29 * @param simple If true, enables the simple filter. If false, disables it.
30 * @param simple If true, enables the biquad filter. If false, disables it. 30 * @param biquad If true, enables the biquad filter. If false, disables it.
31 */ 31 */
32 void Enable(bool simple, bool biquad); 32 void Enable(bool simple, bool biquad);
33 33
diff --git a/src/audio_core/hle/source.cpp b/src/audio_core/hle/source.cpp
index 2bbf7146e..92484c526 100644
--- a/src/audio_core/hle/source.cpp
+++ b/src/audio_core/hle/source.cpp
@@ -158,6 +158,14 @@ void Source::ParseConfig(SourceConfiguration::Configuration& config,
158 static_cast<size_t>(state.mono_or_stereo)); 158 static_cast<size_t>(state.mono_or_stereo));
159 } 159 }
160 160
161 u32_dsp play_position = {};
162 if (config.play_position_dirty && config.play_position != 0) {
163 config.play_position_dirty.Assign(0);
164 play_position = config.play_position;
165 // play_position applies only to the embedded buffer, and defaults to 0 w/o a dirty bit
166 // This will be the starting sample for the first time the buffer is played.
167 }
168
161 if (config.embedded_buffer_dirty) { 169 if (config.embedded_buffer_dirty) {
162 config.embedded_buffer_dirty.Assign(0); 170 config.embedded_buffer_dirty.Assign(0);
163 state.input_queue.emplace(Buffer{ 171 state.input_queue.emplace(Buffer{
@@ -171,9 +179,18 @@ void Source::ParseConfig(SourceConfiguration::Configuration& config,
171 state.mono_or_stereo, 179 state.mono_or_stereo,
172 state.format, 180 state.format,
173 false, 181 false,
182 play_position,
183 false,
174 }); 184 });
175 LOG_TRACE(Audio_DSP, "enqueuing embedded addr=0x%08x len=%u id=%hu", 185 LOG_TRACE(Audio_DSP, "enqueuing embedded addr=0x%08x len=%u id=%hu start=%u",
176 config.physical_address, config.length, config.buffer_id); 186 config.physical_address, config.length, config.buffer_id,
187 static_cast<u32>(config.play_position));
188 }
189
190 if (config.loop_related_dirty && config.loop_related != 0) {
191 config.loop_related_dirty.Assign(0);
192 LOG_WARNING(Audio_DSP, "Unhandled complex loop with loop_related=0x%08x",
193 static_cast<u32>(config.loop_related));
177 } 194 }
178 195
179 if (config.buffer_queue_dirty) { 196 if (config.buffer_queue_dirty) {
@@ -192,6 +209,8 @@ void Source::ParseConfig(SourceConfiguration::Configuration& config,
192 state.mono_or_stereo, 209 state.mono_or_stereo,
193 state.format, 210 state.format,
194 true, 211 true,
212 {}, // 0 in u32_dsp
213 false,
195 }); 214 });
196 LOG_TRACE(Audio_DSP, "enqueuing queued %zu addr=0x%08x len=%u id=%hu", i, 215 LOG_TRACE(Audio_DSP, "enqueuing queued %zu addr=0x%08x len=%u id=%hu", i,
197 b.physical_address, b.length, b.buffer_id); 216 b.physical_address, b.length, b.buffer_id);
@@ -247,18 +266,18 @@ bool Source::DequeueBuffer() {
247 if (state.input_queue.empty()) 266 if (state.input_queue.empty())
248 return false; 267 return false;
249 268
250 const Buffer buf = state.input_queue.top(); 269 Buffer buf = state.input_queue.top();
251 state.input_queue.pop(); 270
271 // if we're in a loop, the current sound keeps playing afterwards, so leave the queue alone
272 if (!buf.is_looping) {
273 state.input_queue.pop();
274 }
252 275
253 if (buf.adpcm_dirty) { 276 if (buf.adpcm_dirty) {
254 state.adpcm_state.yn1 = buf.adpcm_yn[0]; 277 state.adpcm_state.yn1 = buf.adpcm_yn[0];
255 state.adpcm_state.yn2 = buf.adpcm_yn[1]; 278 state.adpcm_state.yn2 = buf.adpcm_yn[1];
256 } 279 }
257 280
258 if (buf.is_looping) {
259 LOG_ERROR(Audio_DSP, "Looped buffers are unimplemented at the moment");
260 }
261
262 const u8* const memory = Memory::GetPhysicalPointer(buf.physical_address); 281 const u8* const memory = Memory::GetPhysicalPointer(buf.physical_address);
263 if (memory) { 282 if (memory) {
264 const unsigned num_channels = buf.mono_or_stereo == MonoOrStereo::Stereo ? 2 : 1; 283 const unsigned num_channels = buf.mono_or_stereo == MonoOrStereo::Stereo ? 2 : 1;
@@ -305,10 +324,13 @@ bool Source::DequeueBuffer() {
305 break; 324 break;
306 } 325 }
307 326
308 state.current_sample_number = 0; 327 // the first playthrough starts at play_position, loops start at the beginning of the buffer
309 state.next_sample_number = 0; 328 state.current_sample_number = (!buf.has_played) ? buf.play_position : 0;
329 state.next_sample_number = state.current_sample_number;
310 state.current_buffer_id = buf.buffer_id; 330 state.current_buffer_id = buf.buffer_id;
311 state.buffer_update = buf.from_queue; 331 state.buffer_update = buf.from_queue && !buf.has_played;
332
333 buf.has_played = true;
312 334
313 LOG_TRACE(Audio_DSP, "source_id=%zu buffer_id=%hu from_queue=%s current_buffer.size()=%zu", 335 LOG_TRACE(Audio_DSP, "source_id=%zu buffer_id=%hu from_queue=%s current_buffer.size()=%zu",
314 source_id, buf.buffer_id, buf.from_queue ? "true" : "false", 336 source_id, buf.buffer_id, buf.from_queue ? "true" : "false",
diff --git a/src/audio_core/hle/source.h b/src/audio_core/hle/source.h
index 3d725f2a3..ccb7f064f 100644
--- a/src/audio_core/hle/source.h
+++ b/src/audio_core/hle/source.h
@@ -76,6 +76,8 @@ private:
76 Format format; 76 Format format;
77 77
78 bool from_queue; 78 bool from_queue;
79 u32_dsp play_position; // = 0;
80 bool has_played; // = false;
79 }; 81 };
80 82
81 struct BufferOrder { 83 struct BufferOrder {
diff --git a/src/audio_core/interpolate.h b/src/audio_core/interpolate.h
index dd06fdda9..19a7b66cb 100644
--- a/src/audio_core/interpolate.h
+++ b/src/audio_core/interpolate.h
@@ -21,6 +21,7 @@ struct State {
21 21
22/** 22/**
23 * No interpolation. This is equivalent to a zero-order hold. There is a two-sample predelay. 23 * No interpolation. This is equivalent to a zero-order hold. There is a two-sample predelay.
24 * @param state Interpolation state.
24 * @param input Input buffer. 25 * @param input Input buffer.
25 * @param rate_multiplier Stretch factor. Must be a positive non-zero value. 26 * @param rate_multiplier Stretch factor. Must be a positive non-zero value.
26 * rate_multiplier > 1.0 performs decimation and rate_multipler < 1.0 27 * rate_multiplier > 1.0 performs decimation and rate_multipler < 1.0
@@ -31,6 +32,7 @@ StereoBuffer16 None(State& state, const StereoBuffer16& input, float rate_multip
31 32
32/** 33/**
33 * Linear interpolation. This is equivalent to a first-order hold. There is a two-sample predelay. 34 * Linear interpolation. This is equivalent to a first-order hold. There is a two-sample predelay.
35 * @param state Interpolation state.
34 * @param input Input buffer. 36 * @param input Input buffer.
35 * @param rate_multiplier Stretch factor. Must be a positive non-zero value. 37 * @param rate_multiplier Stretch factor. Must be a positive non-zero value.
36 * rate_multiplier > 1.0 performs decimation and rate_multipler < 1.0 38 * rate_multiplier > 1.0 performs decimation and rate_multipler < 1.0
diff --git a/src/audio_core/null_sink.h b/src/audio_core/null_sink.h
index e7668438c..c732926a2 100644
--- a/src/audio_core/null_sink.h
+++ b/src/audio_core/null_sink.h
@@ -23,6 +23,12 @@ public:
23 size_t SamplesInQueue() const override { 23 size_t SamplesInQueue() const override {
24 return 0; 24 return 0;
25 } 25 }
26
27 void SetDevice(int device_id) override {}
28
29 std::vector<std::string> GetDeviceList() const override {
30 return {};
31 }
26}; 32};
27 33
28} // namespace AudioCore 34} // namespace AudioCore
diff --git a/src/audio_core/sdl2_sink.cpp b/src/audio_core/sdl2_sink.cpp
index 4b66cd826..933c5f16d 100644
--- a/src/audio_core/sdl2_sink.cpp
+++ b/src/audio_core/sdl2_sink.cpp
@@ -4,12 +4,12 @@
4 4
5#include <list> 5#include <list>
6#include <numeric> 6#include <numeric>
7#include <vector>
8#include <SDL.h> 7#include <SDL.h>
9#include "audio_core/audio_core.h" 8#include "audio_core/audio_core.h"
10#include "audio_core/sdl2_sink.h" 9#include "audio_core/sdl2_sink.h"
11#include "common/assert.h" 10#include "common/assert.h"
12#include "common/logging/log.h" 11#include "common/logging/log.h"
12#include "core/settings.h"
13 13
14namespace AudioCore { 14namespace AudioCore {
15 15
@@ -42,10 +42,24 @@ SDL2Sink::SDL2Sink() : impl(std::make_unique<Impl>()) {
42 SDL_AudioSpec obtained_audiospec; 42 SDL_AudioSpec obtained_audiospec;
43 SDL_zero(obtained_audiospec); 43 SDL_zero(obtained_audiospec);
44 44
45 impl->audio_device_id = 45 int device_count = SDL_GetNumAudioDevices(0);
46 SDL_OpenAudioDevice(nullptr, false, &desired_audiospec, &obtained_audiospec, 0); 46 device_list.clear();
47 for (int i = 0; i < device_count; ++i) {
48 device_list.push_back(SDL_GetAudioDeviceName(i, 0));
49 }
50
51 const char* device = nullptr;
52
53 if (device_count >= 1 && Settings::values.audio_device_id != "auto" &&
54 !Settings::values.audio_device_id.empty()) {
55 device = Settings::values.audio_device_id.c_str();
56 }
57
58 impl->audio_device_id = SDL_OpenAudioDevice(device, false, &desired_audiospec,
59 &obtained_audiospec, SDL_AUDIO_ALLOW_ANY_CHANGE);
47 if (impl->audio_device_id <= 0) { 60 if (impl->audio_device_id <= 0) {
48 LOG_CRITICAL(Audio_Sink, "SDL_OpenAudioDevice failed with: %s", SDL_GetError()); 61 LOG_CRITICAL(Audio_Sink, "SDL_OpenAudioDevice failed with code %d for device \"%s\"",
62 impl->audio_device_id, Settings::values.audio_device_id.c_str());
49 return; 63 return;
50 } 64 }
51 65
@@ -69,6 +83,10 @@ unsigned int SDL2Sink::GetNativeSampleRate() const {
69 return impl->sample_rate; 83 return impl->sample_rate;
70} 84}
71 85
86std::vector<std::string> SDL2Sink::GetDeviceList() const {
87 return device_list;
88}
89
72void SDL2Sink::EnqueueSamples(const s16* samples, size_t sample_count) { 90void SDL2Sink::EnqueueSamples(const s16* samples, size_t sample_count) {
73 if (impl->audio_device_id <= 0) 91 if (impl->audio_device_id <= 0)
74 return; 92 return;
@@ -96,6 +114,10 @@ size_t SDL2Sink::SamplesInQueue() const {
96 return total_size; 114 return total_size;
97} 115}
98 116
117void SDL2Sink::SetDevice(int device_id) {
118 this->device_id = device_id;
119}
120
99void SDL2Sink::Impl::Callback(void* impl_, u8* buffer, int buffer_size_in_bytes) { 121void SDL2Sink::Impl::Callback(void* impl_, u8* buffer, int buffer_size_in_bytes) {
100 Impl* impl = reinterpret_cast<Impl*>(impl_); 122 Impl* impl = reinterpret_cast<Impl*>(impl_);
101 123
diff --git a/src/audio_core/sdl2_sink.h b/src/audio_core/sdl2_sink.h
index ccd0f7c7e..bcc725369 100644
--- a/src/audio_core/sdl2_sink.h
+++ b/src/audio_core/sdl2_sink.h
@@ -21,9 +21,14 @@ public:
21 21
22 size_t SamplesInQueue() const override; 22 size_t SamplesInQueue() const override;
23 23
24 std::vector<std::string> GetDeviceList() const override;
25 void SetDevice(int device_id) override;
26
24private: 27private:
25 struct Impl; 28 struct Impl;
26 std::unique_ptr<Impl> impl; 29 std::unique_ptr<Impl> impl;
30 int device_id;
31 std::vector<std::string> device_list;
27}; 32};
28 33
29} // namespace AudioCore 34} // namespace AudioCore
diff --git a/src/audio_core/sink.h b/src/audio_core/sink.h
index 08f3bab5b..c69cb2c74 100644
--- a/src/audio_core/sink.h
+++ b/src/audio_core/sink.h
@@ -31,6 +31,15 @@ public:
31 31
32 /// Samples enqueued that have not been played yet. 32 /// Samples enqueued that have not been played yet.
33 virtual std::size_t SamplesInQueue() const = 0; 33 virtual std::size_t SamplesInQueue() const = 0;
34
35 /**
36 * Sets the desired output device.
37 * @param device_id ID of the desired device.
38 */
39 virtual void SetDevice(int device_id) = 0;
40
41 /// Returns the list of available devices.
42 virtual std::vector<std::string> GetDeviceList() const = 0;
34}; 43};
35 44
36} // namespace 45} // namespace
diff --git a/src/audio_core/sink_details.cpp b/src/audio_core/sink_details.cpp
index 95ccc9e9d..6972395af 100644
--- a/src/audio_core/sink_details.cpp
+++ b/src/audio_core/sink_details.cpp
@@ -2,6 +2,7 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
5#include <memory> 6#include <memory>
6#include <vector> 7#include <vector>
7#include "audio_core/null_sink.h" 8#include "audio_core/null_sink.h"
@@ -9,6 +10,7 @@
9#ifdef HAVE_SDL2 10#ifdef HAVE_SDL2
10#include "audio_core/sdl2_sink.h" 11#include "audio_core/sdl2_sink.h"
11#endif 12#endif
13#include "common/logging/log.h"
12 14
13namespace AudioCore { 15namespace AudioCore {
14 16
@@ -20,4 +22,21 @@ const std::vector<SinkDetails> g_sink_details = {
20 {"null", []() { return std::make_unique<NullSink>(); }}, 22 {"null", []() { return std::make_unique<NullSink>(); }},
21}; 23};
22 24
25const SinkDetails& GetSinkDetails(std::string sink_id) {
26 auto iter =
27 std::find_if(g_sink_details.begin(), g_sink_details.end(),
28 [sink_id](const auto& sink_detail) { return sink_detail.id == sink_id; });
29
30 if (sink_id == "auto" || iter == g_sink_details.end()) {
31 if (sink_id != "auto") {
32 LOG_ERROR(Audio, "AudioCore::SelectSink given invalid sink_id %s", sink_id.c_str());
33 }
34 // Auto-select.
35 // g_sink_details is ordered in terms of desirability, with the best choice at the front.
36 iter = g_sink_details.begin();
37 }
38
39 return *iter;
40}
41
23} // namespace AudioCore 42} // namespace AudioCore
diff --git a/src/audio_core/sink_details.h b/src/audio_core/sink_details.h
index 4b30cf835..9d3735171 100644
--- a/src/audio_core/sink_details.h
+++ b/src/audio_core/sink_details.h
@@ -24,4 +24,6 @@ struct SinkDetails {
24 24
25extern const std::vector<SinkDetails> g_sink_details; 25extern const std::vector<SinkDetails> g_sink_details;
26 26
27const SinkDetails& GetSinkDetails(std::string sink_id);
28
27} // namespace AudioCore 29} // namespace AudioCore
diff --git a/src/audio_core/time_stretch.h b/src/audio_core/time_stretch.h
index e3e4dc353..c98b16705 100644
--- a/src/audio_core/time_stretch.h
+++ b/src/audio_core/time_stretch.h
@@ -25,7 +25,7 @@ public:
25 /** 25 /**
26 * Add samples to be processed. 26 * Add samples to be processed.
27 * @param sample_buffer Buffer of samples in interleaved stereo PCM16 format. 27 * @param sample_buffer Buffer of samples in interleaved stereo PCM16 format.
28 * @param num_sample Number of samples. 28 * @param num_samples Number of samples.
29 */ 29 */
30 void AddSamples(const s16* sample_buffer, size_t num_samples); 30 void AddSamples(const s16* sample_buffer, size_t num_samples);
31 31
diff --git a/src/citra/citra.cpp b/src/citra/citra.cpp
index 99c096ac7..76f5caeb1 100644
--- a/src/citra/citra.cpp
+++ b/src/citra/citra.cpp
@@ -141,6 +141,26 @@ int main(int argc, char** argv) {
141 case Core::System::ResultStatus::ErrorLoader: 141 case Core::System::ResultStatus::ErrorLoader:
142 LOG_CRITICAL(Frontend, "Failed to load ROM!"); 142 LOG_CRITICAL(Frontend, "Failed to load ROM!");
143 return -1; 143 return -1;
144 case Core::System::ResultStatus::ErrorLoader_ErrorEncrypted:
145 LOG_CRITICAL(Frontend, "The game that you are trying to load must be decrypted before "
146 "being used with Citra. \n\n For more information on dumping and "
147 "decrypting games, please refer to: "
148 "https://citra-emu.org/wiki/Dumping-Game-Cartridges");
149 return -1;
150 case Core::System::ResultStatus::ErrorLoader_ErrorInvalidFormat:
151 LOG_CRITICAL(Frontend, "Error while loading ROM: The ROM format is not supported.");
152 return -1;
153 case Core::System::ResultStatus::ErrorNotInitialized:
154 LOG_CRITICAL(Frontend, "CPUCore not initialized");
155 return -1;
156 case Core::System::ResultStatus::ErrorSystemMode:
157 LOG_CRITICAL(Frontend, "Failed to determine system mode!");
158 return -1;
159 case Core::System::ResultStatus::ErrorVideoCore:
160 LOG_CRITICAL(Frontend, "VideoCore not initialized");
161 return -1;
162 case Core::System::ResultStatus::Success:
163 break; // Expected case
144 } 164 }
145 165
146 while (emu_window->IsOpen()) { 166 while (emu_window->IsOpen()) {
diff --git a/src/citra/config.cpp b/src/citra/config.cpp
index 1d0faf193..fac1c9a0e 100644
--- a/src/citra/config.cpp
+++ b/src/citra/config.cpp
@@ -82,6 +82,7 @@ void Config::ReadValues() {
82 Settings::values.sink_id = sdl2_config->Get("Audio", "output_engine", "auto"); 82 Settings::values.sink_id = sdl2_config->Get("Audio", "output_engine", "auto");
83 Settings::values.enable_audio_stretching = 83 Settings::values.enable_audio_stretching =
84 sdl2_config->GetBoolean("Audio", "enable_audio_stretching", true); 84 sdl2_config->GetBoolean("Audio", "enable_audio_stretching", true);
85 Settings::values.audio_device_id = sdl2_config->Get("Audio", "output_device", "auto");
85 86
86 // Data Storage 87 // Data Storage
87 Settings::values.use_virtual_sd = 88 Settings::values.use_virtual_sd =
@@ -92,6 +93,21 @@ void Config::ReadValues() {
92 Settings::values.region_value = 93 Settings::values.region_value =
93 sdl2_config->GetInteger("System", "region_value", Settings::REGION_VALUE_AUTO_SELECT); 94 sdl2_config->GetInteger("System", "region_value", Settings::REGION_VALUE_AUTO_SELECT);
94 95
96 // Camera
97 using namespace Service::CAM;
98 Settings::values.camera_name[OuterRightCamera] =
99 sdl2_config->Get("Camera", "camera_outer_right_name", "blank");
100 Settings::values.camera_config[OuterRightCamera] =
101 sdl2_config->Get("Camera", "camera_outer_right_config", "");
102 Settings::values.camera_name[InnerCamera] =
103 sdl2_config->Get("Camera", "camera_inner_name", "blank");
104 Settings::values.camera_config[InnerCamera] =
105 sdl2_config->Get("Camera", "camera_inner_config", "");
106 Settings::values.camera_name[OuterLeftCamera] =
107 sdl2_config->Get("Camera", "camera_outer_left_name", "blank");
108 Settings::values.camera_config[OuterLeftCamera] =
109 sdl2_config->Get("Camera", "camera_outer_left_config", "");
110
95 // Miscellaneous 111 // Miscellaneous
96 Settings::values.log_filter = sdl2_config->Get("Miscellaneous", "log_filter", "*:Info"); 112 Settings::values.log_filter = sdl2_config->Get("Miscellaneous", "log_filter", "*:Info");
97 113
diff --git a/src/citra/default_ini.h b/src/citra/default_ini.h
index 7996813b4..435ba6f00 100644
--- a/src/citra/default_ini.h
+++ b/src/citra/default_ini.h
@@ -91,6 +91,10 @@ output_engine =
91# 0: No, 1 (default): Yes 91# 0: No, 1 (default): Yes
92enable_audio_stretching = 92enable_audio_stretching =
93 93
94# Which audio device to use.
95# auto (default): Auto-select
96output_device =
97
94[Data Storage] 98[Data Storage]
95# Whether to create a virtual SD card. 99# Whether to create a virtual SD card.
96# 1 (default): Yes, 0: No 100# 1 (default): Yes, 0: No
@@ -105,6 +109,22 @@ is_new_3ds =
105# -1: Auto-select (default), 0: Japan, 1: USA, 2: Europe, 3: Australia, 4: China, 5: Korea, 6: Taiwan 109# -1: Auto-select (default), 0: Japan, 1: USA, 2: Europe, 3: Australia, 4: China, 5: Korea, 6: Taiwan
106region_value = 110region_value =
107 111
112[Camera]
113# Which camera engine to use for the right outer camera
114# blank (default): a dummy camera that always returns black image
115camera_outer_right_name =
116
117# A config string for the right outer camera. Its meaning is defined by the camera engine
118camera_outer_right_config =
119
120# ... for the left outer camera
121camera_outer_left_name =
122camera_outer_left_config =
123
124# ... for the inner camera
125camera_inner_name =
126camera_inner_config =
127
108[Miscellaneous] 128[Miscellaneous]
109# A filter which removes logs below a certain logging level. 129# A filter which removes logs below a certain logging level.
110# Examples: *:Debug Kernel.SVC:Trace Service.*:Critical 130# Examples: *:Debug Kernel.SVC:Trace Service.*:Critical
diff --git a/src/citra/emu_window/emu_window_sdl2.cpp b/src/citra/emu_window/emu_window_sdl2.cpp
index 81a3abe3f..00d00905a 100644
--- a/src/citra/emu_window/emu_window_sdl2.cpp
+++ b/src/citra/emu_window/emu_window_sdl2.cpp
@@ -79,8 +79,8 @@ EmuWindow_SDL2::EmuWindow_SDL2() {
79 SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 8); 79 SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 8);
80 SDL_GL_SetAttribute(SDL_GL_ALPHA_SIZE, 0); 80 SDL_GL_SetAttribute(SDL_GL_ALPHA_SIZE, 0);
81 81
82 std::string window_title = 82 std::string window_title = Common::StringFromFormat("Citra %s| %s-%s ", Common::g_build_name,
83 Common::StringFromFormat("Citra | %s-%s", Common::g_scm_branch, Common::g_scm_desc); 83 Common::g_scm_branch, Common::g_scm_desc);
84 render_window = SDL_CreateWindow( 84 render_window = SDL_CreateWindow(
85 window_title.c_str(), 85 window_title.c_str(),
86 SDL_WINDOWPOS_UNDEFINED, // x position 86 SDL_WINDOWPOS_UNDEFINED, // x position
diff --git a/src/citra_qt/CMakeLists.txt b/src/citra_qt/CMakeLists.txt
index d4460bf01..15a6ccf9a 100644
--- a/src/citra_qt/CMakeLists.txt
+++ b/src/citra_qt/CMakeLists.txt
@@ -69,7 +69,6 @@ set(HEADERS
69set(UIS 69set(UIS
70 debugger/callstack.ui 70 debugger/callstack.ui
71 debugger/disassembler.ui 71 debugger/disassembler.ui
72 debugger/profiler.ui
73 debugger/registers.ui 72 debugger/registers.ui
74 configure.ui 73 configure.ui
75 configure_audio.ui 74 configure_audio.ui
diff --git a/src/citra_qt/bootmanager.cpp b/src/citra_qt/bootmanager.cpp
index 948db384d..69d18cf0c 100644
--- a/src/citra_qt/bootmanager.cpp
+++ b/src/citra_qt/bootmanager.cpp
@@ -101,8 +101,8 @@ private:
101GRenderWindow::GRenderWindow(QWidget* parent, EmuThread* emu_thread) 101GRenderWindow::GRenderWindow(QWidget* parent, EmuThread* emu_thread)
102 : QWidget(parent), child(nullptr), keyboard_id(0), emu_thread(emu_thread) { 102 : QWidget(parent), child(nullptr), keyboard_id(0), emu_thread(emu_thread) {
103 103
104 std::string window_title = 104 std::string window_title = Common::StringFromFormat("Citra %s| %s-%s", Common::g_build_name,
105 Common::StringFromFormat("Citra | %s-%s", Common::g_scm_branch, Common::g_scm_desc); 105 Common::g_scm_branch, Common::g_scm_desc);
106 setWindowTitle(QString::fromStdString(window_title)); 106 setWindowTitle(QString::fromStdString(window_title));
107 107
108 keyboard_id = KeyMap::NewDeviceId(); 108 keyboard_id = KeyMap::NewDeviceId();
diff --git a/src/citra_qt/config.cpp b/src/citra_qt/config.cpp
index 8021667d0..5fe57dfa2 100644
--- a/src/citra_qt/config.cpp
+++ b/src/citra_qt/config.cpp
@@ -63,6 +63,24 @@ void Config::ReadValues() {
63 Settings::values.sink_id = qt_config->value("output_engine", "auto").toString().toStdString(); 63 Settings::values.sink_id = qt_config->value("output_engine", "auto").toString().toStdString();
64 Settings::values.enable_audio_stretching = 64 Settings::values.enable_audio_stretching =
65 qt_config->value("enable_audio_stretching", true).toBool(); 65 qt_config->value("enable_audio_stretching", true).toBool();
66 Settings::values.audio_device_id =
67 qt_config->value("output_device", "auto").toString().toStdString();
68 qt_config->endGroup();
69
70 using namespace Service::CAM;
71 qt_config->beginGroup("Camera");
72 Settings::values.camera_name[OuterRightCamera] =
73 qt_config->value("camera_outer_right_name", "blank").toString().toStdString();
74 Settings::values.camera_config[OuterRightCamera] =
75 qt_config->value("camera_outer_right_config", "").toString().toStdString();
76 Settings::values.camera_name[InnerCamera] =
77 qt_config->value("camera_inner_name", "blank").toString().toStdString();
78 Settings::values.camera_config[InnerCamera] =
79 qt_config->value("camera_inner_config", "").toString().toStdString();
80 Settings::values.camera_name[OuterLeftCamera] =
81 qt_config->value("camera_outer_left_name", "blank").toString().toStdString();
82 Settings::values.camera_config[OuterLeftCamera] =
83 qt_config->value("camera_outer_left_config", "").toString().toStdString();
66 qt_config->endGroup(); 84 qt_config->endGroup();
67 85
68 qt_config->beginGroup("Data Storage"); 86 qt_config->beginGroup("Data Storage");
@@ -128,6 +146,7 @@ void Config::ReadValues() {
128 146
129 UISettings::values.single_window_mode = qt_config->value("singleWindowMode", true).toBool(); 147 UISettings::values.single_window_mode = qt_config->value("singleWindowMode", true).toBool();
130 UISettings::values.display_titlebar = qt_config->value("displayTitleBars", true).toBool(); 148 UISettings::values.display_titlebar = qt_config->value("displayTitleBars", true).toBool();
149 UISettings::values.show_status_bar = qt_config->value("showStatusBar", true).toBool();
131 UISettings::values.confirm_before_closing = qt_config->value("confirmClose", true).toBool(); 150 UISettings::values.confirm_before_closing = qt_config->value("confirmClose", true).toBool();
132 UISettings::values.first_start = qt_config->value("firstStart", true).toBool(); 151 UISettings::values.first_start = qt_config->value("firstStart", true).toBool();
133 152
@@ -169,6 +188,23 @@ void Config::SaveValues() {
169 qt_config->beginGroup("Audio"); 188 qt_config->beginGroup("Audio");
170 qt_config->setValue("output_engine", QString::fromStdString(Settings::values.sink_id)); 189 qt_config->setValue("output_engine", QString::fromStdString(Settings::values.sink_id));
171 qt_config->setValue("enable_audio_stretching", Settings::values.enable_audio_stretching); 190 qt_config->setValue("enable_audio_stretching", Settings::values.enable_audio_stretching);
191 qt_config->setValue("output_device", QString::fromStdString(Settings::values.audio_device_id));
192 qt_config->endGroup();
193
194 using namespace Service::CAM;
195 qt_config->beginGroup("Camera");
196 qt_config->setValue("camera_outer_right_name",
197 QString::fromStdString(Settings::values.camera_name[OuterRightCamera]));
198 qt_config->setValue("camera_outer_right_config",
199 QString::fromStdString(Settings::values.camera_config[OuterRightCamera]));
200 qt_config->setValue("camera_inner_name",
201 QString::fromStdString(Settings::values.camera_name[InnerCamera]));
202 qt_config->setValue("camera_inner_config",
203 QString::fromStdString(Settings::values.camera_config[InnerCamera]));
204 qt_config->setValue("camera_outer_left_name",
205 QString::fromStdString(Settings::values.camera_name[OuterLeftCamera]));
206 qt_config->setValue("camera_outer_left_config",
207 QString::fromStdString(Settings::values.camera_config[OuterLeftCamera]));
172 qt_config->endGroup(); 208 qt_config->endGroup();
173 209
174 qt_config->beginGroup("Data Storage"); 210 qt_config->beginGroup("Data Storage");
@@ -217,6 +253,7 @@ void Config::SaveValues() {
217 253
218 qt_config->setValue("singleWindowMode", UISettings::values.single_window_mode); 254 qt_config->setValue("singleWindowMode", UISettings::values.single_window_mode);
219 qt_config->setValue("displayTitleBars", UISettings::values.display_titlebar); 255 qt_config->setValue("displayTitleBars", UISettings::values.display_titlebar);
256 qt_config->setValue("showStatusBar", UISettings::values.show_status_bar);
220 qt_config->setValue("confirmClose", UISettings::values.confirm_before_closing); 257 qt_config->setValue("confirmClose", UISettings::values.confirm_before_closing);
221 qt_config->setValue("firstStart", UISettings::values.first_start); 258 qt_config->setValue("firstStart", UISettings::values.first_start);
222 259
diff --git a/src/citra_qt/configure_audio.cpp b/src/citra_qt/configure_audio.cpp
index 3cdd4c780..3ddcf9232 100644
--- a/src/citra_qt/configure_audio.cpp
+++ b/src/citra_qt/configure_audio.cpp
@@ -2,6 +2,9 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <memory>
6#include "audio_core/audio_core.h"
7#include "audio_core/sink.h"
5#include "audio_core/sink_details.h" 8#include "audio_core/sink_details.h"
6#include "citra_qt/configure_audio.h" 9#include "citra_qt/configure_audio.h"
7#include "core/settings.h" 10#include "core/settings.h"
@@ -18,6 +21,8 @@ ConfigureAudio::ConfigureAudio(QWidget* parent)
18 } 21 }
19 22
20 this->setConfiguration(); 23 this->setConfiguration();
24 connect(ui->output_sink_combo_box, SIGNAL(currentIndexChanged(int)), this,
25 SLOT(updateAudioDevices(int)));
21} 26}
22 27
23ConfigureAudio::~ConfigureAudio() {} 28ConfigureAudio::~ConfigureAudio() {}
@@ -33,6 +38,19 @@ void ConfigureAudio::setConfiguration() {
33 ui->output_sink_combo_box->setCurrentIndex(new_sink_index); 38 ui->output_sink_combo_box->setCurrentIndex(new_sink_index);
34 39
35 ui->toggle_audio_stretching->setChecked(Settings::values.enable_audio_stretching); 40 ui->toggle_audio_stretching->setChecked(Settings::values.enable_audio_stretching);
41
42 // The device list cannot be pre-populated (nor listed) until the output sink is known.
43 updateAudioDevices(new_sink_index);
44
45 int new_device_index = -1;
46 for (int index = 0; index < ui->audio_device_combo_box->count(); index++) {
47 if (ui->audio_device_combo_box->itemText(index).toStdString() ==
48 Settings::values.audio_device_id) {
49 new_device_index = index;
50 break;
51 }
52 }
53 ui->audio_device_combo_box->setCurrentIndex(new_device_index);
36} 54}
37 55
38void ConfigureAudio::applyConfiguration() { 56void ConfigureAudio::applyConfiguration() {
@@ -40,5 +58,20 @@ void ConfigureAudio::applyConfiguration() {
40 ui->output_sink_combo_box->itemText(ui->output_sink_combo_box->currentIndex()) 58 ui->output_sink_combo_box->itemText(ui->output_sink_combo_box->currentIndex())
41 .toStdString(); 59 .toStdString();
42 Settings::values.enable_audio_stretching = ui->toggle_audio_stretching->isChecked(); 60 Settings::values.enable_audio_stretching = ui->toggle_audio_stretching->isChecked();
61 Settings::values.audio_device_id =
62 ui->audio_device_combo_box->itemText(ui->audio_device_combo_box->currentIndex())
63 .toStdString();
43 Settings::Apply(); 64 Settings::Apply();
44} 65}
66
67void ConfigureAudio::updateAudioDevices(int sink_index) {
68 ui->audio_device_combo_box->clear();
69 ui->audio_device_combo_box->addItem("auto");
70
71 std::string sink_id = ui->output_sink_combo_box->itemText(sink_index).toStdString();
72 std::vector<std::string> device_list =
73 AudioCore::GetSinkDetails(sink_id).factory()->GetDeviceList();
74 for (const auto& device : device_list) {
75 ui->audio_device_combo_box->addItem(device.c_str());
76 }
77}
diff --git a/src/citra_qt/configure_audio.h b/src/citra_qt/configure_audio.h
index 51df2e27b..8190e694f 100644
--- a/src/citra_qt/configure_audio.h
+++ b/src/citra_qt/configure_audio.h
@@ -20,6 +20,9 @@ public:
20 20
21 void applyConfiguration(); 21 void applyConfiguration();
22 22
23public slots:
24 void updateAudioDevices(int sink_index);
25
23private: 26private:
24 void setConfiguration(); 27 void setConfiguration();
25 28
diff --git a/src/citra_qt/configure_audio.ui b/src/citra_qt/configure_audio.ui
index 3e2b4635f..dd870eb61 100644
--- a/src/citra_qt/configure_audio.ui
+++ b/src/citra_qt/configure_audio.ui
@@ -35,6 +35,21 @@
35 </property> 35 </property>
36 </widget> 36 </widget>
37 </item> 37 </item>
38 <item>
39 <layout class="QHBoxLayout">
40 <item>
41 <widget class="QLabel">
42 <property name="text">
43 <string>Audio Device:</string>
44 </property>
45 </widget>
46 </item>
47 <item>
48 <widget class="QComboBox" name="audio_device_combo_box">
49 </widget>
50 </item>
51 </layout>
52 </item>
38 </layout> 53 </layout>
39 </widget> 54 </widget>
40 </item> 55 </item>
diff --git a/src/citra_qt/configure_general.ui b/src/citra_qt/configure_general.ui
index 0f3352a1d..c739605a4 100644
--- a/src/citra_qt/configure_general.ui
+++ b/src/citra_qt/configure_general.ui
@@ -27,7 +27,7 @@
27 <item> 27 <item>
28 <widget class="QCheckBox" name="toggle_deepscan"> 28 <widget class="QCheckBox" name="toggle_deepscan">
29 <property name="text"> 29 <property name="text">
30 <string>Recursive scan for game folder</string> 30 <string>Search sub-directories for games</string>
31 </property> 31 </property>
32 </widget> 32 </widget>
33 </item> 33 </item>
diff --git a/src/citra_qt/configure_input.cpp b/src/citra_qt/configure_input.cpp
index 3e6803b8a..c29652f32 100644
--- a/src/citra_qt/configure_input.cpp
+++ b/src/citra_qt/configure_input.cpp
@@ -17,7 +17,6 @@ static QString getKeyName(Qt::Key key_code) {
17 case Qt::Key_Alt: 17 case Qt::Key_Alt:
18 return QObject::tr("Alt"); 18 return QObject::tr("Alt");
19 case Qt::Key_Meta: 19 case Qt::Key_Meta:
20 case -1:
21 return ""; 20 return "";
22 default: 21 default:
23 return QKeySequence(key_code).toString(); 22 return QKeySequence(key_code).toString();
diff --git a/src/citra_qt/configure_system.cpp b/src/citra_qt/configure_system.cpp
index eb1276ef3..040185e82 100644
--- a/src/citra_qt/configure_system.cpp
+++ b/src/citra_qt/configure_system.cpp
@@ -4,6 +4,7 @@
4 4
5#include "citra_qt/configure_system.h" 5#include "citra_qt/configure_system.h"
6#include "citra_qt/ui_settings.h" 6#include "citra_qt/ui_settings.h"
7#include "core/core.h"
7#include "core/hle/service/cfg/cfg.h" 8#include "core/hle/service/cfg/cfg.h"
8#include "core/hle/service/fs/archive.h" 9#include "core/hle/service/fs/archive.h"
9#include "ui_configure_system.h" 10#include "ui_configure_system.h"
diff --git a/src/citra_qt/debugger/graphics/graphics_cmdlists.cpp b/src/citra_qt/debugger/graphics/graphics_cmdlists.cpp
index f5a2ec761..c68fe753b 100644
--- a/src/citra_qt/debugger/graphics/graphics_cmdlists.cpp
+++ b/src/citra_qt/debugger/graphics/graphics_cmdlists.cpp
@@ -18,15 +18,16 @@
18#include "citra_qt/util/util.h" 18#include "citra_qt/util/util.h"
19#include "common/vector_math.h" 19#include "common/vector_math.h"
20#include "video_core/debug_utils/debug_utils.h" 20#include "video_core/debug_utils/debug_utils.h"
21#include "video_core/pica.h"
22#include "video_core/pica_state.h" 21#include "video_core/pica_state.h"
22#include "video_core/regs.h"
23#include "video_core/texture/texture_decode.h"
23 24
24namespace { 25namespace {
25QImage LoadTexture(const u8* src, const Pica::DebugUtils::TextureInfo& info) { 26QImage LoadTexture(const u8* src, const Pica::Texture::TextureInfo& info) {
26 QImage decoded_image(info.width, info.height, QImage::Format_ARGB32); 27 QImage decoded_image(info.width, info.height, QImage::Format_ARGB32);
27 for (int y = 0; y < info.height; ++y) { 28 for (int y = 0; y < info.height; ++y) {
28 for (int x = 0; x < info.width; ++x) { 29 for (int x = 0; x < info.width; ++x) {
29 Math::Vec4<u8> color = Pica::DebugUtils::LookupTexture(src, x, y, info, true); 30 Math::Vec4<u8> color = Pica::Texture::LookupTexture(src, x, y, info, true);
30 decoded_image.setPixel(x, y, qRgba(color.r(), color.g(), color.b(), color.a())); 31 decoded_image.setPixel(x, y, qRgba(color.r(), color.g(), color.b(), color.a()));
31 } 32 }
32 } 33 }
@@ -36,9 +37,10 @@ QImage LoadTexture(const u8* src, const Pica::DebugUtils::TextureInfo& info) {
36 37
37class TextureInfoWidget : public QWidget { 38class TextureInfoWidget : public QWidget {
38public: 39public:
39 TextureInfoWidget(const u8* src, const Pica::DebugUtils::TextureInfo& info, 40 TextureInfoWidget(const u8* src, const Pica::Texture::TextureInfo& info,
40 QWidget* parent = nullptr) 41 QWidget* parent = nullptr)
41 : QWidget(parent) { 42 : QWidget(parent) {
43
42 QLabel* image_widget = new QLabel; 44 QLabel* image_widget = new QLabel;
43 QPixmap image_pixmap = QPixmap::fromImage(LoadTexture(src, info)); 45 QPixmap image_pixmap = QPixmap::fromImage(LoadTexture(src, info));
44 image_pixmap = image_pixmap.scaled(200, 100, Qt::KeepAspectRatio, Qt::SmoothTransformation); 46 image_pixmap = image_pixmap.scaled(200, 100, Qt::KeepAspectRatio, Qt::SmoothTransformation);
@@ -70,7 +72,7 @@ QVariant GPUCommandListModel::data(const QModelIndex& index, int role) const {
70 if (role == Qt::DisplayRole) { 72 if (role == Qt::DisplayRole) {
71 switch (index.column()) { 73 switch (index.column()) {
72 case 0: 74 case 0:
73 return QString::fromLatin1(Pica::Regs::GetCommandName(write.cmd_id).c_str()); 75 return QString::fromLatin1(Pica::Regs::GetRegisterName(write.cmd_id));
74 case 1: 76 case 1:
75 return QString("%1").arg(write.cmd_id, 3, 16, QLatin1Char('0')); 77 return QString("%1").arg(write.cmd_id, 3, 16, QLatin1Char('0'));
76 case 2: 78 case 2:
@@ -121,15 +123,16 @@ void GPUCommandListModel::OnPicaTraceFinished(const Pica::DebugUtils::PicaTrace&
121void GPUCommandListWidget::OnCommandDoubleClicked(const QModelIndex& index) { 123void GPUCommandListWidget::OnCommandDoubleClicked(const QModelIndex& index) {
122 const unsigned int command_id = 124 const unsigned int command_id =
123 list_widget->model()->data(index, GPUCommandListModel::CommandIdRole).toUInt(); 125 list_widget->model()->data(index, GPUCommandListModel::CommandIdRole).toUInt();
124 if (COMMAND_IN_RANGE(command_id, texture0) || COMMAND_IN_RANGE(command_id, texture1) || 126 if (COMMAND_IN_RANGE(command_id, texturing.texture0) ||
125 COMMAND_IN_RANGE(command_id, texture2)) { 127 COMMAND_IN_RANGE(command_id, texturing.texture1) ||
128 COMMAND_IN_RANGE(command_id, texturing.texture2)) {
126 129
127 unsigned texture_index; 130 unsigned texture_index;
128 if (COMMAND_IN_RANGE(command_id, texture0)) { 131 if (COMMAND_IN_RANGE(command_id, texturing.texture0)) {
129 texture_index = 0; 132 texture_index = 0;
130 } else if (COMMAND_IN_RANGE(command_id, texture1)) { 133 } else if (COMMAND_IN_RANGE(command_id, texturing.texture1)) {
131 texture_index = 1; 134 texture_index = 1;
132 } else if (COMMAND_IN_RANGE(command_id, texture2)) { 135 } else if (COMMAND_IN_RANGE(command_id, texturing.texture2)) {
133 texture_index = 2; 136 texture_index = 2;
134 } else { 137 } else {
135 UNREACHABLE_MSG("Unknown texture command"); 138 UNREACHABLE_MSG("Unknown texture command");
@@ -144,23 +147,24 @@ void GPUCommandListWidget::SetCommandInfo(const QModelIndex& index) {
144 147
145 const unsigned int command_id = 148 const unsigned int command_id =
146 list_widget->model()->data(index, GPUCommandListModel::CommandIdRole).toUInt(); 149 list_widget->model()->data(index, GPUCommandListModel::CommandIdRole).toUInt();
147 if (COMMAND_IN_RANGE(command_id, texture0) || COMMAND_IN_RANGE(command_id, texture1) || 150 if (COMMAND_IN_RANGE(command_id, texturing.texture0) ||
148 COMMAND_IN_RANGE(command_id, texture2)) { 151 COMMAND_IN_RANGE(command_id, texturing.texture1) ||
152 COMMAND_IN_RANGE(command_id, texturing.texture2)) {
149 153
150 unsigned texture_index; 154 unsigned texture_index;
151 if (COMMAND_IN_RANGE(command_id, texture0)) { 155 if (COMMAND_IN_RANGE(command_id, texturing.texture0)) {
152 texture_index = 0; 156 texture_index = 0;
153 } else if (COMMAND_IN_RANGE(command_id, texture1)) { 157 } else if (COMMAND_IN_RANGE(command_id, texturing.texture1)) {
154 texture_index = 1; 158 texture_index = 1;
155 } else { 159 } else {
156 texture_index = 2; 160 texture_index = 2;
157 } 161 }
158 162
159 const auto texture = Pica::g_state.regs.GetTextures()[texture_index]; 163 const auto texture = Pica::g_state.regs.texturing.GetTextures()[texture_index];
160 const auto config = texture.config; 164 const auto config = texture.config;
161 const auto format = texture.format; 165 const auto format = texture.format;
162 166
163 const auto info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config, format); 167 const auto info = Pica::Texture::TextureInfo::FromPicaRegister(config, format);
164 const u8* src = Memory::GetPhysicalPointer(config.GetPhysicalAddress()); 168 const u8* src = Memory::GetPhysicalPointer(config.GetPhysicalAddress());
165 new_info_widget = new TextureInfoWidget(src, info); 169 new_info_widget = new TextureInfoWidget(src, info);
166 } 170 }
diff --git a/src/citra_qt/debugger/graphics/graphics_surface.cpp b/src/citra_qt/debugger/graphics/graphics_surface.cpp
index 4efd95d3c..47d9924e1 100644
--- a/src/citra_qt/debugger/graphics/graphics_surface.cpp
+++ b/src/citra_qt/debugger/graphics/graphics_surface.cpp
@@ -16,8 +16,10 @@
16#include "common/color.h" 16#include "common/color.h"
17#include "core/hw/gpu.h" 17#include "core/hw/gpu.h"
18#include "core/memory.h" 18#include "core/memory.h"
19#include "video_core/pica.h"
20#include "video_core/pica_state.h" 19#include "video_core/pica_state.h"
20#include "video_core/regs_framebuffer.h"
21#include "video_core/regs_texturing.h"
22#include "video_core/texture/texture_decode.h"
21#include "video_core/utils.h" 23#include "video_core/utils.h"
22 24
23SurfacePicture::SurfacePicture(QWidget* parent, GraphicsSurfaceWidget* surface_widget_) 25SurfacePicture::SurfacePicture(QWidget* parent, GraphicsSurfaceWidget* surface_widget_)
@@ -413,30 +415,30 @@ void GraphicsSurfaceWidget::OnUpdate() {
413 // TODO: Store a reference to the registers in the debug context instead of accessing them 415 // TODO: Store a reference to the registers in the debug context instead of accessing them
414 // directly... 416 // directly...
415 417
416 const auto& framebuffer = Pica::g_state.regs.framebuffer; 418 const auto& framebuffer = Pica::g_state.regs.framebuffer.framebuffer;
417 419
418 surface_address = framebuffer.GetColorBufferPhysicalAddress(); 420 surface_address = framebuffer.GetColorBufferPhysicalAddress();
419 surface_width = framebuffer.GetWidth(); 421 surface_width = framebuffer.GetWidth();
420 surface_height = framebuffer.GetHeight(); 422 surface_height = framebuffer.GetHeight();
421 423
422 switch (framebuffer.color_format) { 424 switch (framebuffer.color_format) {
423 case Pica::Regs::ColorFormat::RGBA8: 425 case Pica::FramebufferRegs::ColorFormat::RGBA8:
424 surface_format = Format::RGBA8; 426 surface_format = Format::RGBA8;
425 break; 427 break;
426 428
427 case Pica::Regs::ColorFormat::RGB8: 429 case Pica::FramebufferRegs::ColorFormat::RGB8:
428 surface_format = Format::RGB8; 430 surface_format = Format::RGB8;
429 break; 431 break;
430 432
431 case Pica::Regs::ColorFormat::RGB5A1: 433 case Pica::FramebufferRegs::ColorFormat::RGB5A1:
432 surface_format = Format::RGB5A1; 434 surface_format = Format::RGB5A1;
433 break; 435 break;
434 436
435 case Pica::Regs::ColorFormat::RGB565: 437 case Pica::FramebufferRegs::ColorFormat::RGB565:
436 surface_format = Format::RGB565; 438 surface_format = Format::RGB565;
437 break; 439 break;
438 440
439 case Pica::Regs::ColorFormat::RGBA4: 441 case Pica::FramebufferRegs::ColorFormat::RGBA4:
440 surface_format = Format::RGBA4; 442 surface_format = Format::RGBA4;
441 break; 443 break;
442 444
@@ -449,22 +451,22 @@ void GraphicsSurfaceWidget::OnUpdate() {
449 } 451 }
450 452
451 case Source::DepthBuffer: { 453 case Source::DepthBuffer: {
452 const auto& framebuffer = Pica::g_state.regs.framebuffer; 454 const auto& framebuffer = Pica::g_state.regs.framebuffer.framebuffer;
453 455
454 surface_address = framebuffer.GetDepthBufferPhysicalAddress(); 456 surface_address = framebuffer.GetDepthBufferPhysicalAddress();
455 surface_width = framebuffer.GetWidth(); 457 surface_width = framebuffer.GetWidth();
456 surface_height = framebuffer.GetHeight(); 458 surface_height = framebuffer.GetHeight();
457 459
458 switch (framebuffer.depth_format) { 460 switch (framebuffer.depth_format) {
459 case Pica::Regs::DepthFormat::D16: 461 case Pica::FramebufferRegs::DepthFormat::D16:
460 surface_format = Format::D16; 462 surface_format = Format::D16;
461 break; 463 break;
462 464
463 case Pica::Regs::DepthFormat::D24: 465 case Pica::FramebufferRegs::DepthFormat::D24:
464 surface_format = Format::D24; 466 surface_format = Format::D24;
465 break; 467 break;
466 468
467 case Pica::Regs::DepthFormat::D24S8: 469 case Pica::FramebufferRegs::DepthFormat::D24S8:
468 surface_format = Format::D24X8; 470 surface_format = Format::D24X8;
469 break; 471 break;
470 472
@@ -477,14 +479,14 @@ void GraphicsSurfaceWidget::OnUpdate() {
477 } 479 }
478 480
479 case Source::StencilBuffer: { 481 case Source::StencilBuffer: {
480 const auto& framebuffer = Pica::g_state.regs.framebuffer; 482 const auto& framebuffer = Pica::g_state.regs.framebuffer.framebuffer;
481 483
482 surface_address = framebuffer.GetDepthBufferPhysicalAddress(); 484 surface_address = framebuffer.GetDepthBufferPhysicalAddress();
483 surface_width = framebuffer.GetWidth(); 485 surface_width = framebuffer.GetWidth();
484 surface_height = framebuffer.GetHeight(); 486 surface_height = framebuffer.GetHeight();
485 487
486 switch (framebuffer.depth_format) { 488 switch (framebuffer.depth_format) {
487 case Pica::Regs::DepthFormat::D24S8: 489 case Pica::FramebufferRegs::DepthFormat::D24S8:
488 surface_format = Format::X24S8; 490 surface_format = Format::X24S8;
489 break; 491 break;
490 492
@@ -511,8 +513,8 @@ void GraphicsSurfaceWidget::OnUpdate() {
511 break; 513 break;
512 } 514 }
513 515
514 const auto texture = Pica::g_state.regs.GetTextures()[texture_index]; 516 const auto texture = Pica::g_state.regs.texturing.GetTextures()[texture_index];
515 auto info = Pica::DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format); 517 auto info = Pica::Texture::TextureInfo::FromPicaRegister(texture.config, texture.format);
516 518
517 surface_address = info.physical_address; 519 surface_address = info.physical_address;
518 surface_width = info.width; 520 surface_width = info.width;
@@ -567,28 +569,27 @@ void GraphicsSurfaceWidget::OnUpdate() {
567 569
568 surface_picture_label->show(); 570 surface_picture_label->show();
569 571
570 unsigned nibbles_per_pixel = GraphicsSurfaceWidget::NibblesPerPixel(surface_format);
571 unsigned stride = nibbles_per_pixel * surface_width / 2;
572
573 // We handle depth formats here because DebugUtils only supports TextureFormats
574 if (surface_format <= Format::MaxTextureFormat) { 572 if (surface_format <= Format::MaxTextureFormat) {
575
576 // Generate a virtual texture 573 // Generate a virtual texture
577 Pica::DebugUtils::TextureInfo info; 574 Pica::Texture::TextureInfo info;
578 info.physical_address = surface_address; 575 info.physical_address = surface_address;
579 info.width = surface_width; 576 info.width = surface_width;
580 info.height = surface_height; 577 info.height = surface_height;
581 info.format = static_cast<Pica::Regs::TextureFormat>(surface_format); 578 info.format = static_cast<Pica::TexturingRegs::TextureFormat>(surface_format);
582 info.stride = stride; 579 info.SetDefaultStride();
583 580
584 for (unsigned int y = 0; y < surface_height; ++y) { 581 for (unsigned int y = 0; y < surface_height; ++y) {
585 for (unsigned int x = 0; x < surface_width; ++x) { 582 for (unsigned int x = 0; x < surface_width; ++x) {
586 Math::Vec4<u8> color = Pica::DebugUtils::LookupTexture(buffer, x, y, info, true); 583 Math::Vec4<u8> color = Pica::Texture::LookupTexture(buffer, x, y, info, true);
587 decoded_image.setPixel(x, y, qRgba(color.r(), color.g(), color.b(), color.a())); 584 decoded_image.setPixel(x, y, qRgba(color.r(), color.g(), color.b(), color.a()));
588 } 585 }
589 } 586 }
590
591 } else { 587 } else {
588 // We handle depth formats here because DebugUtils only supports TextureFormats
589
590 // TODO(yuriks): Convert to newer tile-based addressing
591 unsigned nibbles_per_pixel = GraphicsSurfaceWidget::NibblesPerPixel(surface_format);
592 unsigned stride = nibbles_per_pixel * surface_width / 2;
592 593
593 ASSERT_MSG(nibbles_per_pixel >= 2, 594 ASSERT_MSG(nibbles_per_pixel >= 2,
594 "Depth decoder only supports formats with at least one byte per pixel"); 595 "Depth decoder only supports formats with at least one byte per pixel");
@@ -689,7 +690,8 @@ void GraphicsSurfaceWidget::SaveSurface() {
689 690
690unsigned int GraphicsSurfaceWidget::NibblesPerPixel(GraphicsSurfaceWidget::Format format) { 691unsigned int GraphicsSurfaceWidget::NibblesPerPixel(GraphicsSurfaceWidget::Format format) {
691 if (format <= Format::MaxTextureFormat) { 692 if (format <= Format::MaxTextureFormat) {
692 return Pica::Regs::NibblesPerPixel(static_cast<Pica::Regs::TextureFormat>(format)); 693 return Pica::TexturingRegs::NibblesPerPixel(
694 static_cast<Pica::TexturingRegs::TextureFormat>(format));
693 } 695 }
694 696
695 switch (format) { 697 switch (format) {
diff --git a/src/citra_qt/debugger/graphics/graphics_tracing.cpp b/src/citra_qt/debugger/graphics/graphics_tracing.cpp
index 716ed50b8..40d5bed51 100644
--- a/src/citra_qt/debugger/graphics/graphics_tracing.cpp
+++ b/src/citra_qt/debugger/graphics/graphics_tracing.cpp
@@ -18,7 +18,6 @@
18#include "core/hw/lcd.h" 18#include "core/hw/lcd.h"
19#include "core/tracer/recorder.h" 19#include "core/tracer/recorder.h"
20#include "nihstro/float24.h" 20#include "nihstro/float24.h"
21#include "video_core/pica.h"
22#include "video_core/pica_state.h" 21#include "video_core/pica_state.h"
23 22
24GraphicsTracingWidget::GraphicsTracingWidget(std::shared_ptr<Pica::DebugContext> debug_context, 23GraphicsTracingWidget::GraphicsTracingWidget(std::shared_ptr<Pica::DebugContext> debug_context,
@@ -71,8 +70,8 @@ void GraphicsTracingWidget::StartRecording() {
71 std::array<u32, 4 * 16> default_attributes; 70 std::array<u32, 4 * 16> default_attributes;
72 for (unsigned i = 0; i < 16; ++i) { 71 for (unsigned i = 0; i < 16; ++i) {
73 for (unsigned comp = 0; comp < 3; ++comp) { 72 for (unsigned comp = 0; comp < 3; ++comp) {
74 default_attributes[4 * i + comp] = 73 default_attributes[4 * i + comp] = nihstro::to_float24(
75 nihstro::to_float24(Pica::g_state.vs_default_attributes[i][comp].ToFloat32()); 74 Pica::g_state.input_default_attributes.attr[i][comp].ToFloat32());
76 } 75 }
77 } 76 }
78 77
diff --git a/src/citra_qt/debugger/graphics/graphics_tracing.h b/src/citra_qt/debugger/graphics/graphics_tracing.h
index 3f73bcd2e..eb1292c29 100644
--- a/src/citra_qt/debugger/graphics/graphics_tracing.h
+++ b/src/citra_qt/debugger/graphics/graphics_tracing.h
@@ -15,6 +15,9 @@ public:
15 explicit GraphicsTracingWidget(std::shared_ptr<Pica::DebugContext> debug_context, 15 explicit GraphicsTracingWidget(std::shared_ptr<Pica::DebugContext> debug_context,
16 QWidget* parent = nullptr); 16 QWidget* parent = nullptr);
17 17
18 void OnEmulationStarting(EmuThread* emu_thread);
19 void OnEmulationStopping();
20
18private slots: 21private slots:
19 void StartRecording(); 22 void StartRecording();
20 void StopRecording(); 23 void StopRecording();
@@ -23,9 +26,6 @@ private slots:
23 void OnBreakPointHit(Pica::DebugContext::Event event, void* data) override; 26 void OnBreakPointHit(Pica::DebugContext::Event event, void* data) override;
24 void OnResumed() override; 27 void OnResumed() override;
25 28
26 void OnEmulationStarting(EmuThread* emu_thread);
27 void OnEmulationStopping();
28
29signals: 29signals:
30 void SetStartTracingButtonEnabled(bool enable); 30 void SetStartTracingButtonEnabled(bool enable);
31 void SetStopTracingButtonEnabled(bool enable); 31 void SetStopTracingButtonEnabled(bool enable);
diff --git a/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp b/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp
index ff2e7e363..e3f3194db 100644
--- a/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp
+++ b/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp
@@ -16,9 +16,10 @@
16#include <QTreeView> 16#include <QTreeView>
17#include "citra_qt/debugger/graphics/graphics_vertex_shader.h" 17#include "citra_qt/debugger/graphics/graphics_vertex_shader.h"
18#include "citra_qt/util/util.h" 18#include "citra_qt/util/util.h"
19#include "video_core/pica.h"
20#include "video_core/pica_state.h" 19#include "video_core/pica_state.h"
20#include "video_core/shader/debug_data.h"
21#include "video_core/shader/shader.h" 21#include "video_core/shader/shader.h"
22#include "video_core/shader/shader_interpreter.h"
22 23
23using nihstro::OpCode; 24using nihstro::OpCode;
24using nihstro::Instruction; 25using nihstro::Instruction;
@@ -357,7 +358,7 @@ void GraphicsVertexShaderWidget::DumpShader() {
357 auto& config = Pica::g_state.regs.vs; 358 auto& config = Pica::g_state.regs.vs;
358 359
359 Pica::DebugUtils::DumpShader(filename.toStdString(), config, setup, 360 Pica::DebugUtils::DumpShader(filename.toStdString(), config, setup,
360 Pica::g_state.regs.vs_output_attributes); 361 Pica::g_state.regs.rasterizer.vs_output_attributes);
361} 362}
362 363
363GraphicsVertexShaderWidget::GraphicsVertexShaderWidget( 364GraphicsVertexShaderWidget::GraphicsVertexShaderWidget(
@@ -509,7 +510,7 @@ void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_d
509 auto& shader_config = Pica::g_state.regs.vs; 510 auto& shader_config = Pica::g_state.regs.vs;
510 for (auto instr : shader_setup.program_code) 511 for (auto instr : shader_setup.program_code)
511 info.code.push_back({instr}); 512 info.code.push_back({instr});
512 int num_attributes = Pica::g_state.regs.vertex_attributes.GetNumTotalAttributes(); 513 int num_attributes = shader_config.max_input_attribute_index + 1;
513 514
514 for (auto pattern : shader_setup.swizzle_data) 515 for (auto pattern : shader_setup.swizzle_data)
515 info.swizzle_info.push_back({pattern}); 516 info.swizzle_info.push_back({pattern});
@@ -518,12 +519,13 @@ void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_d
518 info.labels.insert({entry_point, "main"}); 519 info.labels.insert({entry_point, "main"});
519 520
520 // Generate debug information 521 // Generate debug information
521 debug_data = Pica::g_state.vs.ProduceDebugInfo(input_vertex, num_attributes, shader_config, 522 Pica::Shader::InterpreterEngine shader_engine;
522 shader_setup); 523 shader_engine.SetupBatch(shader_setup, entry_point);
524 debug_data = shader_engine.ProduceDebugInfo(shader_setup, input_vertex, shader_config);
523 525
524 // Reload widget state 526 // Reload widget state
525 for (int attr = 0; attr < num_attributes; ++attr) { 527 for (int attr = 0; attr < num_attributes; ++attr) {
526 unsigned source_attr = shader_config.input_register_map.GetRegisterForAttribute(attr); 528 unsigned source_attr = shader_config.GetRegisterForAttribute(attr);
527 input_data_mapping[attr]->setText(QString("-> v%1").arg(source_attr)); 529 input_data_mapping[attr]->setText(QString("-> v%1").arg(source_attr));
528 input_data_container[attr]->setVisible(true); 530 input_data_container[attr]->setVisible(true);
529 } 531 }
diff --git a/src/citra_qt/debugger/graphics/graphics_vertex_shader.h b/src/citra_qt/debugger/graphics/graphics_vertex_shader.h
index bedea0bed..c249a2ff8 100644
--- a/src/citra_qt/debugger/graphics/graphics_vertex_shader.h
+++ b/src/citra_qt/debugger/graphics/graphics_vertex_shader.h
@@ -8,6 +8,7 @@
8#include <QTreeView> 8#include <QTreeView>
9#include "citra_qt/debugger/graphics/graphics_breakpoint_observer.h" 9#include "citra_qt/debugger/graphics/graphics_breakpoint_observer.h"
10#include "nihstro/parser_shbin.h" 10#include "nihstro/parser_shbin.h"
11#include "video_core/shader/debug_data.h"
11#include "video_core/shader/shader.h" 12#include "video_core/shader/shader.h"
12 13
13class QLabel; 14class QLabel;
@@ -81,7 +82,7 @@ private:
81 82
82 nihstro::ShaderInfo info; 83 nihstro::ShaderInfo info;
83 Pica::Shader::DebugData<true> debug_data; 84 Pica::Shader::DebugData<true> debug_data;
84 Pica::Shader::InputVertex input_vertex; 85 Pica::Shader::AttributeBuffer input_vertex;
85 86
86 friend class GraphicsVertexShaderModel; 87 friend class GraphicsVertexShaderModel;
87}; 88};
diff --git a/src/citra_qt/debugger/profiler.cpp b/src/citra_qt/debugger/profiler.cpp
index cee10403d..f060bbe08 100644
--- a/src/citra_qt/debugger/profiler.cpp
+++ b/src/citra_qt/debugger/profiler.cpp
@@ -2,6 +2,8 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <QAction>
6#include <QLayout>
5#include <QMouseEvent> 7#include <QMouseEvent>
6#include <QPainter> 8#include <QPainter>
7#include <QString> 9#include <QString>
@@ -9,121 +11,12 @@
9#include "citra_qt/util/util.h" 11#include "citra_qt/util/util.h"
10#include "common/common_types.h" 12#include "common/common_types.h"
11#include "common/microprofile.h" 13#include "common/microprofile.h"
12#include "common/profiler_reporting.h"
13 14
14// Include the implementation of the UI in this file. This isn't in microprofile.cpp because the 15// Include the implementation of the UI in this file. This isn't in microprofile.cpp because the
15// non-Qt frontends don't need it (and don't implement the UI drawing hooks either). 16// non-Qt frontends don't need it (and don't implement the UI drawing hooks either).
16#if MICROPROFILE_ENABLED 17#if MICROPROFILE_ENABLED
17#define MICROPROFILEUI_IMPL 1 18#define MICROPROFILEUI_IMPL 1
18#include "common/microprofileui.h" 19#include "common/microprofileui.h"
19#endif
20
21using namespace Common::Profiling;
22
23static QVariant GetDataForColumn(int col, const AggregatedDuration& duration) {
24 static auto duration_to_float = [](Duration dur) -> float {
25 using FloatMs = std::chrono::duration<float, std::chrono::milliseconds::period>;
26 return std::chrono::duration_cast<FloatMs>(dur).count();
27 };
28
29 switch (col) {
30 case 1:
31 return duration_to_float(duration.avg);
32 case 2:
33 return duration_to_float(duration.min);
34 case 3:
35 return duration_to_float(duration.max);
36 default:
37 return QVariant();
38 }
39}
40
41ProfilerModel::ProfilerModel(QObject* parent) : QAbstractItemModel(parent) {
42 updateProfilingInfo();
43}
44
45QVariant ProfilerModel::headerData(int section, Qt::Orientation orientation, int role) const {
46 if (orientation == Qt::Horizontal && role == Qt::DisplayRole) {
47 switch (section) {
48 case 0:
49 return tr("Category");
50 case 1:
51 return tr("Avg");
52 case 2:
53 return tr("Min");
54 case 3:
55 return tr("Max");
56 }
57 }
58
59 return QVariant();
60}
61
62QModelIndex ProfilerModel::index(int row, int column, const QModelIndex& parent) const {
63 return createIndex(row, column);
64}
65
66QModelIndex ProfilerModel::parent(const QModelIndex& child) const {
67 return QModelIndex();
68}
69
70int ProfilerModel::columnCount(const QModelIndex& parent) const {
71 return 4;
72}
73
74int ProfilerModel::rowCount(const QModelIndex& parent) const {
75 if (parent.isValid()) {
76 return 0;
77 } else {
78 return 2;
79 }
80}
81
82QVariant ProfilerModel::data(const QModelIndex& index, int role) const {
83 if (role == Qt::DisplayRole) {
84 if (index.row() == 0) {
85 if (index.column() == 0) {
86 return tr("Frame");
87 } else {
88 return GetDataForColumn(index.column(), results.frame_time);
89 }
90 } else if (index.row() == 1) {
91 if (index.column() == 0) {
92 return tr("Frame (with swapping)");
93 } else {
94 return GetDataForColumn(index.column(), results.interframe_time);
95 }
96 }
97 }
98
99 return QVariant();
100}
101
102void ProfilerModel::updateProfilingInfo() {
103 results = GetTimingResultsAggregator()->GetAggregatedResults();
104 emit dataChanged(createIndex(0, 1), createIndex(rowCount() - 1, 3));
105}
106
107ProfilerWidget::ProfilerWidget(QWidget* parent) : QDockWidget(parent) {
108 ui.setupUi(this);
109
110 model = new ProfilerModel(this);
111 ui.treeView->setModel(model);
112
113 connect(this, SIGNAL(visibilityChanged(bool)), SLOT(setProfilingInfoUpdateEnabled(bool)));
114 connect(&update_timer, SIGNAL(timeout()), model, SLOT(updateProfilingInfo()));
115}
116
117void ProfilerWidget::setProfilingInfoUpdateEnabled(bool enable) {
118 if (enable) {
119 update_timer.start(100);
120 model->updateProfilingInfo();
121 } else {
122 update_timer.stop();
123 }
124}
125
126#if MICROPROFILE_ENABLED
127 20
128class MicroProfileWidget : public QWidget { 21class MicroProfileWidget : public QWidget {
129public: 22public:
diff --git a/src/citra_qt/debugger/profiler.h b/src/citra_qt/debugger/profiler.h
index c8912fd5a..eae1e9e3c 100644
--- a/src/citra_qt/debugger/profiler.h
+++ b/src/citra_qt/debugger/profiler.h
@@ -8,46 +8,6 @@
8#include <QDockWidget> 8#include <QDockWidget>
9#include <QTimer> 9#include <QTimer>
10#include "common/microprofile.h" 10#include "common/microprofile.h"
11#include "common/profiler_reporting.h"
12#include "ui_profiler.h"
13
14class ProfilerModel : public QAbstractItemModel {
15 Q_OBJECT
16
17public:
18 explicit ProfilerModel(QObject* parent);
19
20 QVariant headerData(int section, Qt::Orientation orientation,
21 int role = Qt::DisplayRole) const override;
22 QModelIndex index(int row, int column,
23 const QModelIndex& parent = QModelIndex()) const override;
24 QModelIndex parent(const QModelIndex& child) const override;
25 int columnCount(const QModelIndex& parent = QModelIndex()) const override;
26 int rowCount(const QModelIndex& parent = QModelIndex()) const override;
27 QVariant data(const QModelIndex& index, int role = Qt::DisplayRole) const override;
28
29public slots:
30 void updateProfilingInfo();
31
32private:
33 Common::Profiling::AggregatedFrameResult results;
34};
35
36class ProfilerWidget : public QDockWidget {
37 Q_OBJECT
38
39public:
40 explicit ProfilerWidget(QWidget* parent = nullptr);
41
42private slots:
43 void setProfilingInfoUpdateEnabled(bool enable);
44
45private:
46 Ui::Profiler ui;
47 ProfilerModel* model;
48
49 QTimer update_timer;
50};
51 11
52class MicroProfileDialog : public QWidget { 12class MicroProfileDialog : public QWidget {
53 Q_OBJECT 13 Q_OBJECT
diff --git a/src/citra_qt/debugger/profiler.ui b/src/citra_qt/debugger/profiler.ui
deleted file mode 100644
index d3c9a9a1f..000000000
--- a/src/citra_qt/debugger/profiler.ui
+++ /dev/null
@@ -1,33 +0,0 @@
1<?xml version="1.0" encoding="UTF-8"?>
2<ui version="4.0">
3 <class>Profiler</class>
4 <widget class="QDockWidget" name="Profiler">
5 <property name="geometry">
6 <rect>
7 <x>0</x>
8 <y>0</y>
9 <width>400</width>
10 <height>300</height>
11 </rect>
12 </property>
13 <property name="windowTitle">
14 <string>Profiler</string>
15 </property>
16 <widget class="QWidget" name="dockWidgetContents">
17 <layout class="QVBoxLayout" name="verticalLayout">
18 <item>
19 <widget class="QTreeView" name="treeView">
20 <property name="alternatingRowColors">
21 <bool>true</bool>
22 </property>
23 <property name="uniformRowHeights">
24 <bool>true</bool>
25 </property>
26 </widget>
27 </item>
28 </layout>
29 </widget>
30 </widget>
31 <resources/>
32 <connections/>
33</ui>
diff --git a/src/citra_qt/game_list.cpp b/src/citra_qt/game_list.cpp
index 09469f3c5..a9ec9e830 100644
--- a/src/citra_qt/game_list.cpp
+++ b/src/citra_qt/game_list.cpp
@@ -2,6 +2,7 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <QFileInfo>
5#include <QHeaderView> 6#include <QHeaderView>
6#include <QMenu> 7#include <QMenu>
7#include <QThreadPool> 8#include <QThreadPool>
@@ -38,11 +39,13 @@ GameList::GameList(QWidget* parent) : QWidget{parent} {
38 39
39 connect(tree_view, &QTreeView::activated, this, &GameList::ValidateEntry); 40 connect(tree_view, &QTreeView::activated, this, &GameList::ValidateEntry);
40 connect(tree_view, &QTreeView::customContextMenuRequested, this, &GameList::PopupContextMenu); 41 connect(tree_view, &QTreeView::customContextMenuRequested, this, &GameList::PopupContextMenu);
42 connect(&watcher, &QFileSystemWatcher::directoryChanged, this, &GameList::RefreshGameDirectory);
41 43
42 // We must register all custom types with the Qt Automoc system so that we are able to use it 44 // We must register all custom types with the Qt Automoc system so that we are able to use it
43 // with signals/slots. In this case, QList falls under the umbrells of custom types. 45 // with signals/slots. In this case, QList falls under the umbrells of custom types.
44 qRegisterMetaType<QList<QStandardItem*>>("QList<QStandardItem*>"); 46 qRegisterMetaType<QList<QStandardItem*>>("QList<QStandardItem*>");
45 47
48 layout->setContentsMargins(0, 0, 0, 0);
46 layout->addWidget(tree_view); 49 layout->addWidget(tree_view);
47 setLayout(layout); 50 setLayout(layout);
48} 51}
@@ -102,6 +105,12 @@ void GameList::PopulateAsync(const QString& dir_path, bool deep_scan) {
102 item_model->removeRows(0, item_model->rowCount()); 105 item_model->removeRows(0, item_model->rowCount());
103 106
104 emit ShouldCancelWorker(); 107 emit ShouldCancelWorker();
108
109 auto watch_dirs = watcher.directories();
110 if (!watch_dirs.isEmpty()) {
111 watcher.removePaths(watch_dirs);
112 }
113 UpdateWatcherList(dir_path.toStdString(), deep_scan ? 256 : 0);
105 GameListWorker* worker = new GameListWorker(dir_path, deep_scan); 114 GameListWorker* worker = new GameListWorker(dir_path, deep_scan);
106 115
107 connect(worker, &GameListWorker::EntryReady, this, &GameList::AddEntry, Qt::QueuedConnection); 116 connect(worker, &GameListWorker::EntryReady, this, &GameList::AddEntry, Qt::QueuedConnection);
@@ -131,6 +140,53 @@ void GameList::LoadInterfaceLayout() {
131 item_model->sort(header->sortIndicatorSection(), header->sortIndicatorOrder()); 140 item_model->sort(header->sortIndicatorSection(), header->sortIndicatorOrder());
132} 141}
133 142
143const QStringList GameList::supported_file_extensions = {"3ds", "3dsx", "elf", "axf",
144 "cci", "cxi", "app"};
145
146static bool HasSupportedFileExtension(const std::string& file_name) {
147 QFileInfo file = QFileInfo(file_name.c_str());
148 return GameList::supported_file_extensions.contains(file.suffix(), Qt::CaseInsensitive);
149}
150
151void GameList::RefreshGameDirectory() {
152 if (!UISettings::values.gamedir.isEmpty() && current_worker != nullptr) {
153 LOG_INFO(Frontend, "Change detected in the games directory. Reloading game list.");
154 PopulateAsync(UISettings::values.gamedir, UISettings::values.gamedir_deepscan);
155 }
156}
157
158/**
159 * Adds the game list folder to the QFileSystemWatcher to check for updates.
160 *
161 * The file watcher will fire off an update to the game list when a change is detected in the game
162 * list folder.
163 *
164 * Notice: This method is run on the UI thread because QFileSystemWatcher is not thread safe and
165 * this function is fast enough to not stall the UI thread. If performance is an issue, it should
166 * be moved to another thread and properly locked to prevent concurrency issues.
167 *
168 * @param dir folder to check for changes in
169 * @param recursion 0 if recursion is disabled. Any positive number passed to this will add each
170 * directory recursively to the watcher and will update the file list if any of the folders
171 * change. The number determines how deep the recursion should traverse.
172 */
173void GameList::UpdateWatcherList(const std::string& dir, unsigned int recursion) {
174 const auto callback = [this, recursion](unsigned* num_entries_out, const std::string& directory,
175 const std::string& virtual_name) -> bool {
176 std::string physical_name = directory + DIR_SEP + virtual_name;
177
178 if (FileUtil::IsDirectory(physical_name)) {
179 UpdateWatcherList(physical_name, recursion - 1);
180 }
181 return true;
182 };
183
184 watcher.addPath(QString::fromStdString(dir));
185 if (recursion > 0) {
186 FileUtil::ForeachDirectoryEntry(nullptr, dir, callback);
187 }
188}
189
134void GameListWorker::AddFstEntriesToGameList(const std::string& dir_path, unsigned int recursion) { 190void GameListWorker::AddFstEntriesToGameList(const std::string& dir_path, unsigned int recursion) {
135 const auto callback = [this, recursion](unsigned* num_entries_out, const std::string& directory, 191 const auto callback = [this, recursion](unsigned* num_entries_out, const std::string& directory,
136 const std::string& virtual_name) -> bool { 192 const std::string& virtual_name) -> bool {
@@ -139,7 +195,7 @@ void GameListWorker::AddFstEntriesToGameList(const std::string& dir_path, unsign
139 if (stop_processing) 195 if (stop_processing)
140 return false; // Breaks the callback loop. 196 return false; // Breaks the callback loop.
141 197
142 if (!FileUtil::IsDirectory(physical_name)) { 198 if (!FileUtil::IsDirectory(physical_name) && HasSupportedFileExtension(physical_name)) {
143 std::unique_ptr<Loader::AppLoader> loader = Loader::GetLoader(physical_name); 199 std::unique_ptr<Loader::AppLoader> loader = Loader::GetLoader(physical_name);
144 if (!loader) 200 if (!loader)
145 return true; 201 return true;
@@ -173,6 +229,6 @@ void GameListWorker::run() {
173} 229}
174 230
175void GameListWorker::Cancel() { 231void GameListWorker::Cancel() {
176 disconnect(this, nullptr, nullptr, nullptr); 232 this->disconnect();
177 stop_processing = true; 233 stop_processing = true;
178} 234}
diff --git a/src/citra_qt/game_list.h b/src/citra_qt/game_list.h
index 1abf10051..b141fa3a5 100644
--- a/src/citra_qt/game_list.h
+++ b/src/citra_qt/game_list.h
@@ -4,6 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <QFileSystemWatcher>
7#include <QModelIndex> 8#include <QModelIndex>
8#include <QSettings> 9#include <QSettings>
9#include <QStandardItem> 10#include <QStandardItem>
@@ -33,6 +34,8 @@ public:
33 void SaveInterfaceLayout(); 34 void SaveInterfaceLayout();
34 void LoadInterfaceLayout(); 35 void LoadInterfaceLayout();
35 36
37 static const QStringList supported_file_extensions;
38
36signals: 39signals:
37 void GameChosen(QString game_path); 40 void GameChosen(QString game_path);
38 void ShouldCancelWorker(); 41 void ShouldCancelWorker();
@@ -44,8 +47,11 @@ private:
44 void DonePopulating(); 47 void DonePopulating();
45 48
46 void PopupContextMenu(const QPoint& menu_location); 49 void PopupContextMenu(const QPoint& menu_location);
50 void UpdateWatcherList(const std::string& path, unsigned int recursion);
51 void RefreshGameDirectory();
47 52
48 QTreeView* tree_view = nullptr; 53 QTreeView* tree_view = nullptr;
49 QStandardItemModel* item_model = nullptr; 54 QStandardItemModel* item_model = nullptr;
50 GameListWorker* current_worker = nullptr; 55 GameListWorker* current_worker = nullptr;
56 QFileSystemWatcher watcher;
51}; 57};
diff --git a/src/citra_qt/game_list_p.h b/src/citra_qt/game_list_p.h
index a15f06c5f..3c11b6dd1 100644
--- a/src/citra_qt/game_list_p.h
+++ b/src/citra_qt/game_list_p.h
@@ -16,8 +16,8 @@
16#include "video_core/utils.h" 16#include "video_core/utils.h"
17 17
18/** 18/**
19 * Gets game icon from SMDH 19 * Gets the game icon from SMDH data.
20 * @param sdmh SMDH data 20 * @param smdh SMDH data
21 * @param large If true, returns large icon (48x48), otherwise returns small icon (24x24) 21 * @param large If true, returns large icon (48x48), otherwise returns small icon (24x24)
22 * @return QPixmap game icon 22 * @return QPixmap game icon
23 */ 23 */
@@ -42,8 +42,8 @@ static QPixmap GetDefaultIcon(bool large) {
42} 42}
43 43
44/** 44/**
45 * Gets the short game title fromn SMDH 45 * Gets the short game title from SMDH data.
46 * @param sdmh SMDH data 46 * @param smdh SMDH data
47 * @param language title language 47 * @param language title language
48 * @return QString short title 48 * @return QString short title
49 */ 49 */
diff --git a/src/citra_qt/hotkeys.h b/src/citra_qt/hotkeys.h
index 46f48c2d8..a4ccc193b 100644
--- a/src/citra_qt/hotkeys.h
+++ b/src/citra_qt/hotkeys.h
@@ -29,6 +29,8 @@ void RegisterHotkey(const QString& group, const QString& action,
29/** 29/**
30 * Returns a QShortcut object whose activated() signal can be connected to other QObjects' slots. 30 * Returns a QShortcut object whose activated() signal can be connected to other QObjects' slots.
31 * 31 *
32 * @param group General group this hotkey belongs to (e.g. "Main Window", "Debugger").
33 * @param action Name of the action (e.g. "Start Emulation", "Load Image").
32 * @param widget Parent widget of the returned QShortcut. 34 * @param widget Parent widget of the returned QShortcut.
33 * @warning If multiple QWidgets' call this function for the same action, the returned QShortcut 35 * @warning If multiple QWidgets' call this function for the same action, the returned QShortcut
34 * will be the same. Thus, you shouldn't rely on the caller really being the QShortcut's parent. 36 * will be the same. Thus, you shouldn't rely on the caller really being the QShortcut's parent.
diff --git a/src/citra_qt/main.cpp b/src/citra_qt/main.cpp
index f765c0147..fd51659b9 100644
--- a/src/citra_qt/main.cpp
+++ b/src/citra_qt/main.cpp
@@ -54,28 +54,30 @@ Q_IMPORT_PLUGIN(QWindowsIntegrationPlugin);
54 54
55GMainWindow::GMainWindow() : config(new Config()), emu_thread(nullptr) { 55GMainWindow::GMainWindow() : config(new Config()), emu_thread(nullptr) {
56 Pica::g_debug_context = Pica::DebugContext::Construct(); 56 Pica::g_debug_context = Pica::DebugContext::Construct();
57 57 setAcceptDrops(true);
58 ui.setupUi(this); 58 ui.setupUi(this);
59 statusBar()->hide(); 59 statusBar()->hide();
60 60
61 InitializeWidgets(); 61 InitializeWidgets();
62 InitializeDebugMenuActions(); 62 InitializeDebugWidgets();
63 InitializeRecentFileMenuActions(); 63 InitializeRecentFileMenuActions();
64 InitializeHotkeys(); 64 InitializeHotkeys();
65 65
66 SetDefaultUIGeometry(); 66 SetDefaultUIGeometry();
67 RestoreUIState(); 67 RestoreUIState();
68 68
69 ConnectMenuEvents();
69 ConnectWidgetEvents(); 70 ConnectWidgetEvents();
70 71
71 setWindowTitle(QString("Citra | %1-%2").arg(Common::g_scm_branch, Common::g_scm_desc)); 72 setWindowTitle(QString("Citra %1| %2-%3")
73 .arg(Common::g_build_name, Common::g_scm_branch, Common::g_scm_desc));
72 show(); 74 show();
73 75
74 game_list->PopulateAsync(UISettings::values.gamedir, UISettings::values.gamedir_deepscan); 76 game_list->PopulateAsync(UISettings::values.gamedir, UISettings::values.gamedir_deepscan);
75 77
76 QStringList args = QApplication::arguments(); 78 QStringList args = QApplication::arguments();
77 if (args.length() >= 2) { 79 if (args.length() >= 2) {
78 BootGame(args[1].toStdString()); 80 BootGame(args[1]);
79 } 81 }
80} 82}
81 83
@@ -94,73 +96,99 @@ void GMainWindow::InitializeWidgets() {
94 game_list = new GameList(); 96 game_list = new GameList();
95 ui.horizontalLayout->addWidget(game_list); 97 ui.horizontalLayout->addWidget(game_list);
96 98
97 profilerWidget = new ProfilerWidget(this); 99 // Create status bar
98 addDockWidget(Qt::BottomDockWidgetArea, profilerWidget); 100 emu_speed_label = new QLabel();
99 profilerWidget->hide(); 101 emu_speed_label->setToolTip(tr("Current emulation speed. Values higher or lower than 100% "
102 "indicate emulation is running faster or slower than a 3DS."));
103 game_fps_label = new QLabel();
104 game_fps_label->setToolTip(tr("How many frames per second the game is currently displaying. "
105 "This will vary from game to game and scene to scene."));
106 emu_frametime_label = new QLabel();
107 emu_frametime_label->setToolTip(
108 tr("Time taken to emulate a 3DS frame, not counting framelimiting or v-sync. For "
109 "full-speed emulation this should be at most 16.67 ms."));
110
111 for (auto& label : {emu_speed_label, game_fps_label, emu_frametime_label}) {
112 label->setVisible(false);
113 label->setFrameStyle(QFrame::NoFrame);
114 label->setContentsMargins(4, 0, 4, 0);
115 statusBar()->addPermanentWidget(label);
116 }
117 statusBar()->setVisible(true);
118}
119
120void GMainWindow::InitializeDebugWidgets() {
121 connect(ui.action_Create_Pica_Surface_Viewer, &QAction::triggered, this,
122 &GMainWindow::OnCreateGraphicsSurfaceViewer);
123
124 QMenu* debug_menu = ui.menu_View_Debugging;
100 125
101#if MICROPROFILE_ENABLED 126#if MICROPROFILE_ENABLED
102 microProfileDialog = new MicroProfileDialog(this); 127 microProfileDialog = new MicroProfileDialog(this);
103 microProfileDialog->hide(); 128 microProfileDialog->hide();
129 debug_menu->addAction(microProfileDialog->toggleViewAction());
104#endif 130#endif
105 131
106 disasmWidget = new DisassemblerWidget(this, emu_thread.get()); 132 disasmWidget = new DisassemblerWidget(this, emu_thread.get());
107 addDockWidget(Qt::BottomDockWidgetArea, disasmWidget); 133 addDockWidget(Qt::BottomDockWidgetArea, disasmWidget);
108 disasmWidget->hide(); 134 disasmWidget->hide();
135 debug_menu->addAction(disasmWidget->toggleViewAction());
136 connect(this, &GMainWindow::EmulationStarting, disasmWidget,
137 &DisassemblerWidget::OnEmulationStarting);
138 connect(this, &GMainWindow::EmulationStopping, disasmWidget,
139 &DisassemblerWidget::OnEmulationStopping);
109 140
110 registersWidget = new RegistersWidget(this); 141 registersWidget = new RegistersWidget(this);
111 addDockWidget(Qt::RightDockWidgetArea, registersWidget); 142 addDockWidget(Qt::RightDockWidgetArea, registersWidget);
112 registersWidget->hide(); 143 registersWidget->hide();
144 debug_menu->addAction(registersWidget->toggleViewAction());
145 connect(this, &GMainWindow::EmulationStarting, registersWidget,
146 &RegistersWidget::OnEmulationStarting);
147 connect(this, &GMainWindow::EmulationStopping, registersWidget,
148 &RegistersWidget::OnEmulationStopping);
113 149
114 callstackWidget = new CallstackWidget(this); 150 callstackWidget = new CallstackWidget(this);
115 addDockWidget(Qt::RightDockWidgetArea, callstackWidget); 151 addDockWidget(Qt::RightDockWidgetArea, callstackWidget);
116 callstackWidget->hide(); 152 callstackWidget->hide();
153 debug_menu->addAction(callstackWidget->toggleViewAction());
117 154
118 graphicsWidget = new GPUCommandStreamWidget(this); 155 graphicsWidget = new GPUCommandStreamWidget(this);
119 addDockWidget(Qt::RightDockWidgetArea, graphicsWidget); 156 addDockWidget(Qt::RightDockWidgetArea, graphicsWidget);
120 graphicsWidget->hide(); 157 graphicsWidget->hide();
158 debug_menu->addAction(graphicsWidget->toggleViewAction());
121 159
122 graphicsCommandsWidget = new GPUCommandListWidget(this); 160 graphicsCommandsWidget = new GPUCommandListWidget(this);
123 addDockWidget(Qt::RightDockWidgetArea, graphicsCommandsWidget); 161 addDockWidget(Qt::RightDockWidgetArea, graphicsCommandsWidget);
124 graphicsCommandsWidget->hide(); 162 graphicsCommandsWidget->hide();
163 debug_menu->addAction(graphicsCommandsWidget->toggleViewAction());
125 164
126 graphicsBreakpointsWidget = new GraphicsBreakPointsWidget(Pica::g_debug_context, this); 165 graphicsBreakpointsWidget = new GraphicsBreakPointsWidget(Pica::g_debug_context, this);
127 addDockWidget(Qt::RightDockWidgetArea, graphicsBreakpointsWidget); 166 addDockWidget(Qt::RightDockWidgetArea, graphicsBreakpointsWidget);
128 graphicsBreakpointsWidget->hide(); 167 graphicsBreakpointsWidget->hide();
168 debug_menu->addAction(graphicsBreakpointsWidget->toggleViewAction());
129 169
130 graphicsVertexShaderWidget = new GraphicsVertexShaderWidget(Pica::g_debug_context, this); 170 graphicsVertexShaderWidget = new GraphicsVertexShaderWidget(Pica::g_debug_context, this);
131 addDockWidget(Qt::RightDockWidgetArea, graphicsVertexShaderWidget); 171 addDockWidget(Qt::RightDockWidgetArea, graphicsVertexShaderWidget);
132 graphicsVertexShaderWidget->hide(); 172 graphicsVertexShaderWidget->hide();
173 debug_menu->addAction(graphicsVertexShaderWidget->toggleViewAction());
133 174
134 graphicsTracingWidget = new GraphicsTracingWidget(Pica::g_debug_context, this); 175 graphicsTracingWidget = new GraphicsTracingWidget(Pica::g_debug_context, this);
135 addDockWidget(Qt::RightDockWidgetArea, graphicsTracingWidget); 176 addDockWidget(Qt::RightDockWidgetArea, graphicsTracingWidget);
136 graphicsTracingWidget->hide(); 177 graphicsTracingWidget->hide();
178 debug_menu->addAction(graphicsTracingWidget->toggleViewAction());
179 connect(this, &GMainWindow::EmulationStarting, graphicsTracingWidget,
180 &GraphicsTracingWidget::OnEmulationStarting);
181 connect(this, &GMainWindow::EmulationStopping, graphicsTracingWidget,
182 &GraphicsTracingWidget::OnEmulationStopping);
137 183
138 waitTreeWidget = new WaitTreeWidget(this); 184 waitTreeWidget = new WaitTreeWidget(this);
139 addDockWidget(Qt::LeftDockWidgetArea, waitTreeWidget); 185 addDockWidget(Qt::LeftDockWidgetArea, waitTreeWidget);
140 waitTreeWidget->hide(); 186 waitTreeWidget->hide();
141}
142
143void GMainWindow::InitializeDebugMenuActions() {
144 auto graphicsSurfaceViewerAction = new QAction(tr("Create Pica Surface Viewer"), this);
145 connect(graphicsSurfaceViewerAction, SIGNAL(triggered()), this,
146 SLOT(OnCreateGraphicsSurfaceViewer()));
147
148 QMenu* debug_menu = ui.menu_View->addMenu(tr("Debugging"));
149 debug_menu->addAction(graphicsSurfaceViewerAction);
150 debug_menu->addSeparator();
151 debug_menu->addAction(profilerWidget->toggleViewAction());
152#if MICROPROFILE_ENABLED
153 debug_menu->addAction(microProfileDialog->toggleViewAction());
154#endif
155 debug_menu->addAction(disasmWidget->toggleViewAction());
156 debug_menu->addAction(registersWidget->toggleViewAction());
157 debug_menu->addAction(callstackWidget->toggleViewAction());
158 debug_menu->addAction(graphicsWidget->toggleViewAction());
159 debug_menu->addAction(graphicsCommandsWidget->toggleViewAction());
160 debug_menu->addAction(graphicsBreakpointsWidget->toggleViewAction());
161 debug_menu->addAction(graphicsVertexShaderWidget->toggleViewAction());
162 debug_menu->addAction(graphicsTracingWidget->toggleViewAction());
163 debug_menu->addAction(waitTreeWidget->toggleViewAction()); 187 debug_menu->addAction(waitTreeWidget->toggleViewAction());
188 connect(this, &GMainWindow::EmulationStarting, waitTreeWidget,
189 &WaitTreeWidget::OnEmulationStarting);
190 connect(this, &GMainWindow::EmulationStopping, waitTreeWidget,
191 &WaitTreeWidget::OnEmulationStopping);
164} 192}
165 193
166void GMainWindow::InitializeRecentFileMenuActions() { 194void GMainWindow::InitializeRecentFileMenuActions() {
@@ -215,41 +243,46 @@ void GMainWindow::RestoreUIState() {
215 ui.action_Single_Window_Mode->setChecked(UISettings::values.single_window_mode); 243 ui.action_Single_Window_Mode->setChecked(UISettings::values.single_window_mode);
216 ToggleWindowMode(); 244 ToggleWindowMode();
217 245
218 ui.actionDisplay_widget_title_bars->setChecked(UISettings::values.display_titlebar); 246 ui.action_Display_Dock_Widget_Headers->setChecked(UISettings::values.display_titlebar);
219 OnDisplayTitleBars(ui.actionDisplay_widget_title_bars->isChecked()); 247 OnDisplayTitleBars(ui.action_Display_Dock_Widget_Headers->isChecked());
248
249 ui.action_Show_Status_Bar->setChecked(UISettings::values.show_status_bar);
250 statusBar()->setVisible(ui.action_Show_Status_Bar->isChecked());
220} 251}
221 252
222void GMainWindow::ConnectWidgetEvents() { 253void GMainWindow::ConnectWidgetEvents() {
223 connect(game_list, SIGNAL(GameChosen(QString)), this, SLOT(OnGameListLoadFile(QString)), 254 connect(game_list, SIGNAL(GameChosen(QString)), this, SLOT(OnGameListLoadFile(QString)));
224 Qt::DirectConnection);
225 connect(game_list, SIGNAL(OpenSaveFolderRequested(u64)), this, 255 connect(game_list, SIGNAL(OpenSaveFolderRequested(u64)), this,
226 SLOT(OnGameListOpenSaveFolder(u64)), Qt::DirectConnection); 256 SLOT(OnGameListOpenSaveFolder(u64)));
227 connect(ui.action_Configure, SIGNAL(triggered()), this, SLOT(OnConfigure())); 257
228 connect(ui.action_Load_File, SIGNAL(triggered()), this, SLOT(OnMenuLoadFile()),
229 Qt::DirectConnection);
230 connect(ui.action_Load_Symbol_Map, SIGNAL(triggered()), this, SLOT(OnMenuLoadSymbolMap()));
231 connect(ui.action_Select_Game_List_Root, SIGNAL(triggered()), this,
232 SLOT(OnMenuSelectGameListRoot()));
233 connect(ui.action_Start, SIGNAL(triggered()), this, SLOT(OnStartGame()));
234 connect(ui.action_Pause, SIGNAL(triggered()), this, SLOT(OnPauseGame()));
235 connect(ui.action_Stop, SIGNAL(triggered()), this, SLOT(OnStopGame()));
236 connect(ui.action_Single_Window_Mode, SIGNAL(triggered(bool)), this, SLOT(ToggleWindowMode()));
237
238 connect(this, SIGNAL(EmulationStarting(EmuThread*)), disasmWidget,
239 SLOT(OnEmulationStarting(EmuThread*)));
240 connect(this, SIGNAL(EmulationStopping()), disasmWidget, SLOT(OnEmulationStopping()));
241 connect(this, SIGNAL(EmulationStarting(EmuThread*)), registersWidget,
242 SLOT(OnEmulationStarting(EmuThread*)));
243 connect(this, SIGNAL(EmulationStopping()), registersWidget, SLOT(OnEmulationStopping()));
244 connect(this, SIGNAL(EmulationStarting(EmuThread*)), render_window, 258 connect(this, SIGNAL(EmulationStarting(EmuThread*)), render_window,
245 SLOT(OnEmulationStarting(EmuThread*))); 259 SLOT(OnEmulationStarting(EmuThread*)));
246 connect(this, SIGNAL(EmulationStopping()), render_window, SLOT(OnEmulationStopping())); 260 connect(this, SIGNAL(EmulationStopping()), render_window, SLOT(OnEmulationStopping()));
247 connect(this, SIGNAL(EmulationStarting(EmuThread*)), graphicsTracingWidget, 261
248 SLOT(OnEmulationStarting(EmuThread*))); 262 connect(&status_bar_update_timer, &QTimer::timeout, this, &GMainWindow::UpdateStatusBar);
249 connect(this, SIGNAL(EmulationStopping()), graphicsTracingWidget, SLOT(OnEmulationStopping())); 263}
250 connect(this, SIGNAL(EmulationStarting(EmuThread*)), waitTreeWidget, 264
251 SLOT(OnEmulationStarting(EmuThread*))); 265void GMainWindow::ConnectMenuEvents() {
252 connect(this, SIGNAL(EmulationStopping()), waitTreeWidget, SLOT(OnEmulationStopping())); 266 // File
267 connect(ui.action_Load_File, &QAction::triggered, this, &GMainWindow::OnMenuLoadFile);
268 connect(ui.action_Load_Symbol_Map, &QAction::triggered, this,
269 &GMainWindow::OnMenuLoadSymbolMap);
270 connect(ui.action_Select_Game_List_Root, &QAction::triggered, this,
271 &GMainWindow::OnMenuSelectGameListRoot);
272 connect(ui.action_Exit, &QAction::triggered, this, &QMainWindow::close);
273
274 // Emulation
275 connect(ui.action_Start, &QAction::triggered, this, &GMainWindow::OnStartGame);
276 connect(ui.action_Pause, &QAction::triggered, this, &GMainWindow::OnPauseGame);
277 connect(ui.action_Stop, &QAction::triggered, this, &GMainWindow::OnStopGame);
278 connect(ui.action_Configure, &QAction::triggered, this, &GMainWindow::OnConfigure);
279
280 // View
281 connect(ui.action_Single_Window_Mode, &QAction::triggered, this,
282 &GMainWindow::ToggleWindowMode);
283 connect(ui.action_Display_Dock_Widget_Headers, &QAction::triggered, this,
284 &GMainWindow::OnDisplayTitleBars);
285 connect(ui.action_Show_Status_Bar, &QAction::triggered, statusBar(), &QStatusBar::setVisible);
253} 286}
254 287
255void GMainWindow::OnDisplayTitleBars(bool show) { 288void GMainWindow::OnDisplayTitleBars(bool show) {
@@ -272,7 +305,7 @@ void GMainWindow::OnDisplayTitleBars(bool show) {
272 } 305 }
273} 306}
274 307
275bool GMainWindow::LoadROM(const std::string& filename) { 308bool GMainWindow::LoadROM(const QString& filename) {
276 // Shutdown previous session if the emu thread is still active... 309 // Shutdown previous session if the emu thread is still active...
277 if (emu_thread != nullptr) 310 if (emu_thread != nullptr)
278 ShutdownGame(); 311 ShutdownGame();
@@ -290,12 +323,13 @@ bool GMainWindow::LoadROM(const std::string& filename) {
290 323
291 Core::System& system{Core::System::GetInstance()}; 324 Core::System& system{Core::System::GetInstance()};
292 325
293 const Core::System::ResultStatus result{system.Load(render_window, filename)}; 326 const Core::System::ResultStatus result{system.Load(render_window, filename.toStdString())};
294 327
295 if (result != Core::System::ResultStatus::Success) { 328 if (result != Core::System::ResultStatus::Success) {
296 switch (result) { 329 switch (result) {
297 case Core::System::ResultStatus::ErrorGetLoader: 330 case Core::System::ResultStatus::ErrorGetLoader:
298 LOG_CRITICAL(Frontend, "Failed to obtain loader for %s!", filename.c_str()); 331 LOG_CRITICAL(Frontend, "Failed to obtain loader for %s!",
332 filename.toStdString().c_str());
299 QMessageBox::critical(this, tr("Error while loading ROM!"), 333 QMessageBox::critical(this, tr("Error while loading ROM!"),
300 tr("The ROM format is not supported.")); 334 tr("The ROM format is not supported."));
301 break; 335 break;
@@ -335,7 +369,7 @@ bool GMainWindow::LoadROM(const std::string& filename) {
335 return true; 369 return true;
336} 370}
337 371
338void GMainWindow::BootGame(const std::string& filename) { 372void GMainWindow::BootGame(const QString& filename) {
339 LOG_INFO(Frontend, "Citra starting..."); 373 LOG_INFO(Frontend, "Citra starting...");
340 StoreRecentFile(filename); // Put the filename on top of the list 374 StoreRecentFile(filename); // Put the filename on top of the list
341 375
@@ -374,6 +408,8 @@ void GMainWindow::BootGame(const std::string& filename) {
374 if (ui.action_Single_Window_Mode->isChecked()) { 408 if (ui.action_Single_Window_Mode->isChecked()) {
375 game_list->hide(); 409 game_list->hide();
376 } 410 }
411 status_bar_update_timer.start(2000);
412
377 render_window->show(); 413 render_window->show();
378 render_window->setFocus(); 414 render_window->setFocus();
379 415
@@ -408,11 +444,17 @@ void GMainWindow::ShutdownGame() {
408 render_window->hide(); 444 render_window->hide();
409 game_list->show(); 445 game_list->show();
410 446
447 // Disable status bar updates
448 status_bar_update_timer.stop();
449 emu_speed_label->setVisible(false);
450 game_fps_label->setVisible(false);
451 emu_frametime_label->setVisible(false);
452
411 emulation_running = false; 453 emulation_running = false;
412} 454}
413 455
414void GMainWindow::StoreRecentFile(const std::string& filename) { 456void GMainWindow::StoreRecentFile(const QString& filename) {
415 UISettings::values.recent_files.prepend(QString::fromStdString(filename)); 457 UISettings::values.recent_files.prepend(filename);
416 UISettings::values.recent_files.removeDuplicates(); 458 UISettings::values.recent_files.removeDuplicates();
417 while (UISettings::values.recent_files.size() > max_recent_files_item) { 459 while (UISettings::values.recent_files.size() > max_recent_files_item) {
418 UISettings::values.recent_files.removeLast(); 460 UISettings::values.recent_files.removeLast();
@@ -447,7 +489,7 @@ void GMainWindow::UpdateRecentFiles() {
447} 489}
448 490
449void GMainWindow::OnGameListLoadFile(QString game_path) { 491void GMainWindow::OnGameListLoadFile(QString game_path) {
450 BootGame(game_path.toStdString()); 492 BootGame(game_path);
451} 493}
452 494
453void GMainWindow::OnGameListOpenSaveFolder(u64 program_id) { 495void GMainWindow::OnGameListOpenSaveFolder(u64 program_id) {
@@ -466,19 +508,25 @@ void GMainWindow::OnGameListOpenSaveFolder(u64 program_id) {
466} 508}
467 509
468void GMainWindow::OnMenuLoadFile() { 510void GMainWindow::OnMenuLoadFile() {
469 QString filename = 511 QString extensions;
470 QFileDialog::getOpenFileName(this, tr("Load File"), UISettings::values.roms_path, 512 for (const auto& piece : game_list->supported_file_extensions)
471 tr("3DS executable (*.3ds *.3dsx *.elf *.axf *.cci *.cxi)")); 513 extensions += "*." + piece + " ";
514
515 QString file_filter = tr("3DS Executable") + " (" + extensions + ")";
516 file_filter += ";;" + tr("All Files (*.*)");
517
518 QString filename = QFileDialog::getOpenFileName(this, tr("Load File"),
519 UISettings::values.roms_path, file_filter);
472 if (!filename.isEmpty()) { 520 if (!filename.isEmpty()) {
473 UISettings::values.roms_path = QFileInfo(filename).path(); 521 UISettings::values.roms_path = QFileInfo(filename).path();
474 522
475 BootGame(filename.toStdString()); 523 BootGame(filename);
476 } 524 }
477} 525}
478 526
479void GMainWindow::OnMenuLoadSymbolMap() { 527void GMainWindow::OnMenuLoadSymbolMap() {
480 QString filename = QFileDialog::getOpenFileName( 528 QString filename = QFileDialog::getOpenFileName(
481 this, tr("Load Symbol Map"), UISettings::values.symbols_path, tr("Symbol map (*)")); 529 this, tr("Load Symbol Map"), UISettings::values.symbols_path, tr("Symbol Map (*.*)"));
482 if (!filename.isEmpty()) { 530 if (!filename.isEmpty()) {
483 UISettings::values.symbols_path = QFileInfo(filename).path(); 531 UISettings::values.symbols_path = QFileInfo(filename).path();
484 532
@@ -501,7 +549,7 @@ void GMainWindow::OnMenuRecentFile() {
501 QString filename = action->data().toString(); 549 QString filename = action->data().toString();
502 QFileInfo file_info(filename); 550 QFileInfo file_info(filename);
503 if (file_info.exists()) { 551 if (file_info.exists()) {
504 BootGame(filename.toStdString()); 552 BootGame(filename);
505 } else { 553 } else {
506 // Display an error message and remove the file from the list. 554 // Display an error message and remove the file from the list.
507 QMessageBox::information(this, tr("File not found"), 555 QMessageBox::information(this, tr("File not found"),
@@ -581,6 +629,23 @@ void GMainWindow::OnCreateGraphicsSurfaceViewer() {
581 graphicsSurfaceViewerWidget->show(); 629 graphicsSurfaceViewerWidget->show();
582} 630}
583 631
632void GMainWindow::UpdateStatusBar() {
633 if (emu_thread == nullptr) {
634 status_bar_update_timer.stop();
635 return;
636 }
637
638 auto results = Core::System::GetInstance().GetAndResetPerfStats();
639
640 emu_speed_label->setText(tr("Speed: %1%").arg(results.emulation_speed * 100.0, 0, 'f', 0));
641 game_fps_label->setText(tr("Game: %1 FPS").arg(results.game_fps, 0, 'f', 0));
642 emu_frametime_label->setText(tr("Frame: %1 ms").arg(results.frametime * 1000.0, 0, 'f', 2));
643
644 emu_speed_label->setVisible(true);
645 game_fps_label->setVisible(true);
646 emu_frametime_label->setVisible(true);
647}
648
584bool GMainWindow::ConfirmClose() { 649bool GMainWindow::ConfirmClose() {
585 if (emu_thread == nullptr || !UISettings::values.confirm_before_closing) 650 if (emu_thread == nullptr || !UISettings::values.confirm_before_closing)
586 return true; 651 return true;
@@ -605,7 +670,8 @@ void GMainWindow::closeEvent(QCloseEvent* event) {
605 UISettings::values.microprofile_visible = microProfileDialog->isVisible(); 670 UISettings::values.microprofile_visible = microProfileDialog->isVisible();
606#endif 671#endif
607 UISettings::values.single_window_mode = ui.action_Single_Window_Mode->isChecked(); 672 UISettings::values.single_window_mode = ui.action_Single_Window_Mode->isChecked();
608 UISettings::values.display_titlebar = ui.actionDisplay_widget_title_bars->isChecked(); 673 UISettings::values.display_titlebar = ui.action_Display_Dock_Widget_Headers->isChecked();
674 UISettings::values.show_status_bar = ui.action_Show_Status_Bar->isChecked();
609 UISettings::values.first_start = false; 675 UISettings::values.first_start = false;
610 676
611 game_list->SaveInterfaceLayout(); 677 game_list->SaveInterfaceLayout();
@@ -620,6 +686,40 @@ void GMainWindow::closeEvent(QCloseEvent* event) {
620 QWidget::closeEvent(event); 686 QWidget::closeEvent(event);
621} 687}
622 688
689static bool IsSingleFileDropEvent(QDropEvent* event) {
690 const QMimeData* mimeData = event->mimeData();
691 return mimeData->hasUrls() && mimeData->urls().length() == 1;
692}
693
694void GMainWindow::dropEvent(QDropEvent* event) {
695 if (IsSingleFileDropEvent(event) && ConfirmChangeGame()) {
696 const QMimeData* mimeData = event->mimeData();
697 QString filename = mimeData->urls().at(0).toLocalFile();
698 BootGame(filename);
699 }
700}
701
702void GMainWindow::dragEnterEvent(QDragEnterEvent* event) {
703 if (IsSingleFileDropEvent(event)) {
704 event->acceptProposedAction();
705 }
706}
707
708void GMainWindow::dragMoveEvent(QDragMoveEvent* event) {
709 event->acceptProposedAction();
710}
711
712bool GMainWindow::ConfirmChangeGame() {
713 if (emu_thread == nullptr)
714 return true;
715
716 auto answer = QMessageBox::question(
717 this, tr("Citra"),
718 tr("Are you sure you want to stop the emulation? Any unsaved progress will be lost."),
719 QMessageBox::Yes | QMessageBox::No, QMessageBox::No);
720 return answer != QMessageBox::No;
721}
722
623#ifdef main 723#ifdef main
624#undef main 724#undef main
625#endif 725#endif
diff --git a/src/citra_qt/main.h b/src/citra_qt/main.h
index a2fd45c47..ec841eaa5 100644
--- a/src/citra_qt/main.h
+++ b/src/citra_qt/main.h
@@ -64,7 +64,7 @@ signals:
64 64
65private: 65private:
66 void InitializeWidgets(); 66 void InitializeWidgets();
67 void InitializeDebugMenuActions(); 67 void InitializeDebugWidgets();
68 void InitializeRecentFileMenuActions(); 68 void InitializeRecentFileMenuActions();
69 void InitializeHotkeys(); 69 void InitializeHotkeys();
70 70
@@ -72,15 +72,10 @@ private:
72 void RestoreUIState(); 72 void RestoreUIState();
73 73
74 void ConnectWidgetEvents(); 74 void ConnectWidgetEvents();
75 void ConnectMenuEvents();
75 76
76 /** 77 bool LoadROM(const QString& filename);
77 * Initializes the emulation system. 78 void BootGame(const QString& filename);
78 * @param system_mode The system mode with which to intialize the kernel.
79 * @returns Whether the system was properly initialized.
80 */
81 bool InitializeSystem(u32 system_mode);
82 bool LoadROM(const std::string& filename);
83 void BootGame(const std::string& filename);
84 void ShutdownGame(); 79 void ShutdownGame();
85 80
86 /** 81 /**
@@ -94,7 +89,7 @@ private:
94 * 89 *
95 * @param filename the filename to store 90 * @param filename the filename to store
96 */ 91 */
97 void StoreRecentFile(const std::string& filename); 92 void StoreRecentFile(const QString& filename);
98 93
99 /** 94 /**
100 * Updates the recent files menu. 95 * Updates the recent files menu.
@@ -110,6 +105,7 @@ private:
110 * @return true if the user confirmed 105 * @return true if the user confirmed
111 */ 106 */
112 bool ConfirmClose(); 107 bool ConfirmClose();
108 bool ConfirmChangeGame();
113 void closeEvent(QCloseEvent* event) override; 109 void closeEvent(QCloseEvent* event) override;
114 110
115private slots: 111private slots:
@@ -131,17 +127,26 @@ private slots:
131 void OnCreateGraphicsSurfaceViewer(); 127 void OnCreateGraphicsSurfaceViewer();
132 128
133private: 129private:
130 void UpdateStatusBar();
131
134 Ui::MainWindow ui; 132 Ui::MainWindow ui;
135 133
136 GRenderWindow* render_window; 134 GRenderWindow* render_window;
137 GameList* game_list; 135 GameList* game_list;
138 136
137 // Status bar elements
138 QLabel* emu_speed_label = nullptr;
139 QLabel* game_fps_label = nullptr;
140 QLabel* emu_frametime_label = nullptr;
141 QTimer status_bar_update_timer;
142
139 std::unique_ptr<Config> config; 143 std::unique_ptr<Config> config;
140 144
141 // Whether emulation is currently running in Citra. 145 // Whether emulation is currently running in Citra.
142 bool emulation_running = false; 146 bool emulation_running = false;
143 std::unique_ptr<EmuThread> emu_thread; 147 std::unique_ptr<EmuThread> emu_thread;
144 148
149 // Debugger panes
145 ProfilerWidget* profilerWidget; 150 ProfilerWidget* profilerWidget;
146 MicroProfileDialog* microProfileDialog; 151 MicroProfileDialog* microProfileDialog;
147 DisassemblerWidget* disasmWidget; 152 DisassemblerWidget* disasmWidget;
@@ -155,6 +160,11 @@ private:
155 WaitTreeWidget* waitTreeWidget; 160 WaitTreeWidget* waitTreeWidget;
156 161
157 QAction* actions_recent_files[max_recent_files_item]; 162 QAction* actions_recent_files[max_recent_files_item];
163
164protected:
165 void dropEvent(QDropEvent* event) override;
166 void dragEnterEvent(QDragEnterEvent* event) override;
167 void dragMoveEvent(QDragMoveEvent* event) override;
158}; 168};
159 169
160#endif // _CITRA_QT_MAIN_HXX_ 170#endif // _CITRA_QT_MAIN_HXX_
diff --git a/src/citra_qt/main.ui b/src/citra_qt/main.ui
index adfa3689e..47dbb6ef7 100644
--- a/src/citra_qt/main.ui
+++ b/src/citra_qt/main.ui
@@ -79,8 +79,17 @@
79 <property name="title"> 79 <property name="title">
80 <string>&amp;View</string> 80 <string>&amp;View</string>
81 </property> 81 </property>
82 <widget class="QMenu" name="menu_View_Debugging">
83 <property name="title">
84 <string>Debugging</string>
85 </property>
86 <addaction name="action_Create_Pica_Surface_Viewer"/>
87 <addaction name="separator"/>
88 </widget>
82 <addaction name="action_Single_Window_Mode"/> 89 <addaction name="action_Single_Window_Mode"/>
83 <addaction name="actionDisplay_widget_title_bars"/> 90 <addaction name="action_Display_Dock_Widget_Headers"/>
91 <addaction name="action_Show_Status_Bar"/>
92 <addaction name="menu_View_Debugging"/>
84 </widget> 93 </widget>
85 <widget class="QMenu" name="menu_Help"> 94 <widget class="QMenu" name="menu_Help">
86 <property name="title"> 95 <property name="title">
@@ -93,7 +102,6 @@
93 <addaction name="menu_View"/> 102 <addaction name="menu_View"/>
94 <addaction name="menu_Help"/> 103 <addaction name="menu_Help"/>
95 </widget> 104 </widget>
96 <widget class="QStatusBar" name="statusbar"/>
97 <action name="action_Load_File"> 105 <action name="action_Load_File">
98 <property name="text"> 106 <property name="text">
99 <string>Load File...</string> 107 <string>Load File...</string>
@@ -151,7 +159,7 @@
151 <string>Configure...</string> 159 <string>Configure...</string>
152 </property> 160 </property>
153 </action> 161 </action>
154 <action name="actionDisplay_widget_title_bars"> 162 <action name="action_Display_Dock_Widget_Headers">
155 <property name="checkable"> 163 <property name="checkable">
156 <bool>true</bool> 164 <bool>true</bool>
157 </property> 165 </property>
@@ -159,6 +167,14 @@
159 <string>Display Dock Widget Headers</string> 167 <string>Display Dock Widget Headers</string>
160 </property> 168 </property>
161 </action> 169 </action>
170 <action name="action_Show_Status_Bar">
171 <property name="checkable">
172 <bool>true</bool>
173 </property>
174 <property name="text">
175 <string>Show Status Bar</string>
176 </property>
177 </action>
162 <action name="action_Select_Game_List_Root"> 178 <action name="action_Select_Game_List_Root">
163 <property name="text"> 179 <property name="text">
164 <string>Select Game Directory...</string> 180 <string>Select Game Directory...</string>
@@ -167,44 +183,11 @@
167 <string>Selects a folder to display in the game list</string> 183 <string>Selects a folder to display in the game list</string>
168 </property> 184 </property>
169 </action> 185 </action>
186 <action name="action_Create_Pica_Surface_Viewer">
187 <property name="text">
188 <string>Create Pica Surface Viewer</string>
189 </property>
190 </action>
170 </widget> 191 </widget>
171 <resources/> 192 <resources/>
172 <connections>
173 <connection>
174 <sender>action_Exit</sender>
175 <signal>triggered()</signal>
176 <receiver>MainWindow</receiver>
177 <slot>close()</slot>
178 <hints>
179 <hint type="sourcelabel">
180 <x>-1</x>
181 <y>-1</y>
182 </hint>
183 <hint type="destinationlabel">
184 <x>367</x>
185 <y>314</y>
186 </hint>
187 </hints>
188 </connection>
189 <connection>
190 <sender>actionDisplay_widget_title_bars</sender>
191 <signal>triggered(bool)</signal>
192 <receiver>MainWindow</receiver>
193 <slot>OnDisplayTitleBars(bool)</slot>
194 <hints>
195 <hint type="sourcelabel">
196 <x>-1</x>
197 <y>-1</y>
198 </hint>
199 <hint type="destinationlabel">
200 <x>540</x>
201 <y>364</y>
202 </hint>
203 </hints>
204 </connection>
205 </connections>
206 <slots>
207 <slot>OnConfigure()</slot>
208 <slot>OnDisplayTitleBars(bool)</slot>
209 </slots>
210</ui> 193</ui>
diff --git a/src/citra_qt/ui_settings.h b/src/citra_qt/ui_settings.h
index ed7fdff7e..6408ece2b 100644
--- a/src/citra_qt/ui_settings.h
+++ b/src/citra_qt/ui_settings.h
@@ -27,6 +27,7 @@ struct Values {
27 27
28 bool single_window_mode; 28 bool single_window_mode;
29 bool display_titlebar; 29 bool display_titlebar;
30 bool show_status_bar;
30 31
31 bool confirm_before_closing; 32 bool confirm_before_closing;
32 bool first_start; 33 bool first_start;
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index a7a4a688c..8a6170257 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -1,4 +1,27 @@
1# Generate cpp with Git revision from template 1# Generate cpp with Git revision from template
2# Also if this is a CI build, add the build name (ie: Nightly, Bleeding Edge) to the scm_rev file as well
3set(REPO_NAME "")
4if ($ENV{CI})
5 if ($ENV{TRAVIS})
6 set(BUILD_REPOSITORY $ENV{TRAVIS_REPO_SLUG})
7 elseif($ENV{APPVEYOR})
8 set(BUILD_REPOSITORY $ENV{APPVEYOR_REPO_NAME})
9 endif()
10 # regex capture the string nightly or bleeding-edge into CMAKE_MATCH_1
11 string(REGEX MATCH "citra-emu/citra-?(.*)" OUTVAR ${BUILD_REPOSITORY})
12 if (${CMAKE_MATCH_COUNT} GREATER 0)
13 # capitalize the first letter of each word in the repo name.
14 string(REPLACE "-" ";" REPO_NAME_LIST ${CMAKE_MATCH_1})
15 foreach(WORD ${REPO_NAME_LIST})
16 string(SUBSTRING ${WORD} 0 1 FIRST_LETTER)
17 string(SUBSTRING ${WORD} 1 -1 REMAINDER)
18 string(TOUPPER ${FIRST_LETTER} FIRST_LETTER)
19 # this leaves a trailing space on the last word, but we actually want that
20 # because of how its styled in the title bar.
21 set(REPO_NAME "${REPO_NAME}${FIRST_LETTER}${REMAINDER} ")
22 endforeach()
23 endif()
24endif()
2configure_file("${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp" @ONLY) 25configure_file("${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp" @ONLY)
3 26
4set(SRCS 27set(SRCS
@@ -12,7 +35,6 @@ set(SRCS
12 memory_util.cpp 35 memory_util.cpp
13 microprofile.cpp 36 microprofile.cpp
14 misc.cpp 37 misc.cpp
15 profiler.cpp
16 scm_rev.cpp 38 scm_rev.cpp
17 string_util.cpp 39 string_util.cpp
18 symbols.cpp 40 symbols.cpp
@@ -45,7 +67,6 @@ set(HEADERS
45 microprofile.h 67 microprofile.h
46 microprofileui.h 68 microprofileui.h
47 platform.h 69 platform.h
48 profiler_reporting.h
49 quaternion.h 70 quaternion.h
50 scm_rev.h 71 scm_rev.h
51 scope_exit.h 72 scope_exit.h
@@ -61,14 +82,11 @@ set(HEADERS
61 82
62if(ARCHITECTURE_x86_64) 83if(ARCHITECTURE_x86_64)
63 set(SRCS ${SRCS} 84 set(SRCS ${SRCS}
64 x64/abi.cpp
65 x64/cpu_detect.cpp 85 x64/cpu_detect.cpp
66 x64/emitter.cpp) 86 )
67 87
68 set(HEADERS ${HEADERS} 88 set(HEADERS ${HEADERS}
69 x64/abi.h
70 x64/cpu_detect.h 89 x64/cpu_detect.h
71 x64/emitter.h
72 x64/xbyak_abi.h 90 x64/xbyak_abi.h
73 x64/xbyak_util.h 91 x64/xbyak_util.h
74 ) 92 )
diff --git a/src/common/bit_set.h b/src/common/bit_set.h
index 3059d0cb0..9c2e6b28c 100644
--- a/src/common/bit_set.h
+++ b/src/common/bit_set.h
@@ -121,22 +121,19 @@ public:
121 class Iterator { 121 class Iterator {
122 public: 122 public:
123 Iterator(const Iterator& other) : m_val(other.m_val), m_bit(other.m_bit) {} 123 Iterator(const Iterator& other) : m_val(other.m_val), m_bit(other.m_bit) {}
124 Iterator(IntTy val, int bit) : m_val(val), m_bit(bit) {} 124 Iterator(IntTy val) : m_val(val), m_bit(0) {}
125 Iterator& operator=(Iterator other) { 125 Iterator& operator=(Iterator other) {
126 new (this) Iterator(other); 126 new (this) Iterator(other);
127 return *this; 127 return *this;
128 } 128 }
129 int operator*() { 129 int operator*() {
130 return m_bit; 130 return m_bit + ComputeLsb();
131 } 131 }
132 Iterator& operator++() { 132 Iterator& operator++() {
133 if (m_val == 0) { 133 int lsb = ComputeLsb();
134 m_bit = -1; 134 m_val >>= lsb + 1;
135 } else { 135 m_bit += lsb + 1;
136 int bit = LeastSignificantSetBit(m_val); 136 m_has_lsb = false;
137 m_val &= ~(1 << bit);
138 m_bit = bit;
139 }
140 return *this; 137 return *this;
141 } 138 }
142 Iterator operator++(int _) { 139 Iterator operator++(int _) {
@@ -145,15 +142,24 @@ public:
145 return other; 142 return other;
146 } 143 }
147 bool operator==(Iterator other) const { 144 bool operator==(Iterator other) const {
148 return m_bit == other.m_bit; 145 return m_val == other.m_val;
149 } 146 }
150 bool operator!=(Iterator other) const { 147 bool operator!=(Iterator other) const {
151 return m_bit != other.m_bit; 148 return m_val != other.m_val;
152 } 149 }
153 150
154 private: 151 private:
152 int ComputeLsb() {
153 if (!m_has_lsb) {
154 m_lsb = LeastSignificantSetBit(m_val);
155 m_has_lsb = true;
156 }
157 return m_lsb;
158 }
155 IntTy m_val; 159 IntTy m_val;
156 int m_bit; 160 int m_bit;
161 int m_lsb = -1;
162 bool m_has_lsb = false;
157 }; 163 };
158 164
159 BitSet() : m_val(0) {} 165 BitSet() : m_val(0) {}
@@ -221,11 +227,10 @@ public:
221 } 227 }
222 228
223 Iterator begin() const { 229 Iterator begin() const {
224 Iterator it(m_val, 0); 230 return Iterator(m_val);
225 return ++it;
226 } 231 }
227 Iterator end() const { 232 Iterator end() const {
228 return Iterator(m_val, -1); 233 return Iterator(0);
229 } 234 }
230 235
231 IntTy m_val; 236 IntTy m_val;
diff --git a/src/common/common_paths.h b/src/common/common_paths.h
index b56105306..d5b510cdb 100644
--- a/src/common/common_paths.h
+++ b/src/common/common_paths.h
@@ -45,3 +45,4 @@
45 45
46// Sys files 46// Sys files
47#define SHARED_FONT "shared_font.bin" 47#define SHARED_FONT "shared_font.bin"
48#define AES_KEYS "aes_keys.txt"
diff --git a/src/common/file_util.cpp b/src/common/file_util.cpp
index 1a1f5d9b5..df234c225 100644
--- a/src/common/file_util.cpp
+++ b/src/common/file_util.cpp
@@ -303,7 +303,7 @@ bool Copy(const std::string& srcFilename, const std::string& destFilename) {
303 // copy loop 303 // copy loop
304 while (!feof(input)) { 304 while (!feof(input)) {
305 // read input 305 // read input
306 int rnum = fread(buffer, sizeof(char), BSIZE, input); 306 size_t rnum = fread(buffer, sizeof(char), BSIZE, input);
307 if (rnum != BSIZE) { 307 if (rnum != BSIZE) {
308 if (ferror(input) != 0) { 308 if (ferror(input) != 0) {
309 LOG_ERROR(Common_Filesystem, "failed reading from source, %s --> %s: %s", 309 LOG_ERROR(Common_Filesystem, "failed reading from source, %s --> %s: %s",
@@ -313,7 +313,7 @@ bool Copy(const std::string& srcFilename, const std::string& destFilename) {
313 } 313 }
314 314
315 // write output 315 // write output
316 int wnum = fwrite(buffer, sizeof(char), rnum, output); 316 size_t wnum = fwrite(buffer, sizeof(char), rnum, output);
317 if (wnum != rnum) { 317 if (wnum != rnum) {
318 LOG_ERROR(Common_Filesystem, "failed writing to output, %s --> %s: %s", 318 LOG_ERROR(Common_Filesystem, "failed writing to output, %s --> %s: %s",
319 srcFilename.c_str(), destFilename.c_str(), GetLastErrorMsg()); 319 srcFilename.c_str(), destFilename.c_str(), GetLastErrorMsg());
diff --git a/src/common/hash.cpp b/src/common/hash.cpp
index 2309320bb..f3d390dc5 100644
--- a/src/common/hash.cpp
+++ b/src/common/hash.cpp
@@ -16,7 +16,7 @@ namespace Common {
16 16
17// Block read - if your platform needs to do endian-swapping or can only handle aligned reads, do 17// Block read - if your platform needs to do endian-swapping or can only handle aligned reads, do
18// the conversion here 18// the conversion here
19static FORCE_INLINE u64 getblock64(const u64* p, int i) { 19static FORCE_INLINE u64 getblock64(const u64* p, size_t i) {
20 return p[i]; 20 return p[i];
21} 21}
22 22
@@ -34,9 +34,9 @@ static FORCE_INLINE u64 fmix64(u64 k) {
34// This is the 128-bit variant of the MurmurHash3 hash function that is targeted for 64-bit 34// This is the 128-bit variant of the MurmurHash3 hash function that is targeted for 64-bit
35// platforms (MurmurHash3_x64_128). It was taken from: 35// platforms (MurmurHash3_x64_128). It was taken from:
36// https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp 36// https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
37void MurmurHash3_128(const void* key, int len, u32 seed, void* out) { 37void MurmurHash3_128(const void* key, size_t len, u32 seed, void* out) {
38 const u8* data = (const u8*)key; 38 const u8* data = (const u8*)key;
39 const int nblocks = len / 16; 39 const size_t nblocks = len / 16;
40 40
41 u64 h1 = seed; 41 u64 h1 = seed;
42 u64 h2 = seed; 42 u64 h2 = seed;
@@ -48,7 +48,7 @@ void MurmurHash3_128(const void* key, int len, u32 seed, void* out) {
48 48
49 const u64* blocks = (const u64*)(data); 49 const u64* blocks = (const u64*)(data);
50 50
51 for (int i = 0; i < nblocks; i++) { 51 for (size_t i = 0; i < nblocks; i++) {
52 u64 k1 = getblock64(blocks, i * 2 + 0); 52 u64 k1 = getblock64(blocks, i * 2 + 0);
53 u64 k2 = getblock64(blocks, i * 2 + 1); 53 u64 k2 = getblock64(blocks, i * 2 + 1);
54 54
diff --git a/src/common/hash.h b/src/common/hash.h
index a3850be68..ee2560dad 100644
--- a/src/common/hash.h
+++ b/src/common/hash.h
@@ -4,11 +4,12 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <cstddef>
7#include "common/common_types.h" 8#include "common/common_types.h"
8 9
9namespace Common { 10namespace Common {
10 11
11void MurmurHash3_128(const void* key, int len, u32 seed, void* out); 12void MurmurHash3_128(const void* key, size_t len, u32 seed, void* out);
12 13
13/** 14/**
14 * Computes a 64-bit hash over the specified block of data 15 * Computes a 64-bit hash over the specified block of data
@@ -16,7 +17,7 @@ void MurmurHash3_128(const void* key, int len, u32 seed, void* out);
16 * @param len Length of data (in bytes) to compute hash over 17 * @param len Length of data (in bytes) to compute hash over
17 * @returns 64-bit hash value that was computed over the data block 18 * @returns 64-bit hash value that was computed over the data block
18 */ 19 */
19static inline u64 ComputeHash64(const void* data, int len) { 20static inline u64 ComputeHash64(const void* data, size_t len) {
20 u64 res[2]; 21 u64 res[2];
21 MurmurHash3_128(data, len, 0, res); 22 MurmurHash3_128(data, len, 0, res);
22 return res[0]; 23 return res[0];
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp
index 2ef3e6b05..737e1d57f 100644
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -55,6 +55,7 @@ namespace Log {
55 SUB(Service, DSP) \ 55 SUB(Service, DSP) \
56 SUB(Service, DLP) \ 56 SUB(Service, DLP) \
57 SUB(Service, HID) \ 57 SUB(Service, HID) \
58 SUB(Service, HTTP) \
58 SUB(Service, SOC) \ 59 SUB(Service, SOC) \
59 SUB(Service, IR) \ 60 SUB(Service, IR) \
60 SUB(Service, Y2R) \ 61 SUB(Service, Y2R) \
@@ -62,6 +63,7 @@ namespace Log {
62 SUB(HW, Memory) \ 63 SUB(HW, Memory) \
63 SUB(HW, LCD) \ 64 SUB(HW, LCD) \
64 SUB(HW, GPU) \ 65 SUB(HW, GPU) \
66 SUB(HW, AES) \
65 CLS(Frontend) \ 67 CLS(Frontend) \
66 CLS(Render) \ 68 CLS(Render) \
67 SUB(Render, Software) \ 69 SUB(Render, Software) \
diff --git a/src/common/logging/log.h b/src/common/logging/log.h
index 4330ef879..4b0f8ff03 100644
--- a/src/common/logging/log.h
+++ b/src/common/logging/log.h
@@ -72,6 +72,7 @@ enum class Class : ClassType {
72 Service_DSP, ///< The DSP (DSP control) service 72 Service_DSP, ///< The DSP (DSP control) service
73 Service_DLP, ///< The DLP (Download Play) service 73 Service_DLP, ///< The DLP (Download Play) service
74 Service_HID, ///< The HID (Human interface device) service 74 Service_HID, ///< The HID (Human interface device) service
75 Service_HTTP, ///< The HTTP service
75 Service_SOC, ///< The SOC (Socket) service 76 Service_SOC, ///< The SOC (Socket) service
76 Service_IR, ///< The IR service 77 Service_IR, ///< The IR service
77 Service_Y2R, ///< The Y2R (YUV to RGB conversion) service 78 Service_Y2R, ///< The Y2R (YUV to RGB conversion) service
@@ -79,6 +80,7 @@ enum class Class : ClassType {
79 HW_Memory, ///< Memory-map and address translation 80 HW_Memory, ///< Memory-map and address translation
80 HW_LCD, ///< LCD register emulation 81 HW_LCD, ///< LCD register emulation
81 HW_GPU, ///< GPU control emulation 82 HW_GPU, ///< GPU control emulation
83 HW_AES, ///< AES engine emulation
82 Frontend, ///< Emulator UI 84 Frontend, ///< Emulator UI
83 Render, ///< Emulator video output and hardware acceleration 85 Render, ///< Emulator video output and hardware acceleration
84 Render_Software, ///< Software renderer backend 86 Render_Software, ///< Software renderer backend
diff --git a/src/common/profiler.cpp b/src/common/profiler.cpp
deleted file mode 100644
index b40e7205d..000000000
--- a/src/common/profiler.cpp
+++ /dev/null
@@ -1,101 +0,0 @@
1// Copyright 2015 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <cstddef>
7#include <vector>
8#include "common/assert.h"
9#include "common/profiler_reporting.h"
10#include "common/synchronized_wrapper.h"
11
12namespace Common {
13namespace Profiling {
14
15ProfilingManager::ProfilingManager()
16 : last_frame_end(Clock::now()), this_frame_start(Clock::now()) {}
17
18void ProfilingManager::BeginFrame() {
19 this_frame_start = Clock::now();
20}
21
22void ProfilingManager::FinishFrame() {
23 Clock::time_point now = Clock::now();
24
25 results.interframe_time = now - last_frame_end;
26 results.frame_time = now - this_frame_start;
27
28 last_frame_end = now;
29}
30
31TimingResultsAggregator::TimingResultsAggregator(size_t window_size)
32 : max_window_size(window_size), window_size(0) {
33 interframe_times.resize(window_size, Duration::zero());
34 frame_times.resize(window_size, Duration::zero());
35}
36
37void TimingResultsAggregator::Clear() {
38 window_size = cursor = 0;
39}
40
41void TimingResultsAggregator::AddFrame(const ProfilingFrameResult& frame_result) {
42 interframe_times[cursor] = frame_result.interframe_time;
43 frame_times[cursor] = frame_result.frame_time;
44
45 ++cursor;
46 if (cursor == max_window_size)
47 cursor = 0;
48 if (window_size < max_window_size)
49 ++window_size;
50}
51
52static AggregatedDuration AggregateField(const std::vector<Duration>& v, size_t len) {
53 AggregatedDuration result;
54 result.avg = Duration::zero();
55 result.min = result.max = (len == 0 ? Duration::zero() : v[0]);
56
57 for (size_t i = 0; i < len; ++i) {
58 Duration value = v[i];
59 result.avg += value;
60 result.min = std::min(result.min, value);
61 result.max = std::max(result.max, value);
62 }
63 if (len != 0)
64 result.avg /= len;
65
66 return result;
67}
68
69static float tof(Common::Profiling::Duration dur) {
70 using FloatMs = std::chrono::duration<float, std::chrono::milliseconds::period>;
71 return std::chrono::duration_cast<FloatMs>(dur).count();
72}
73
74AggregatedFrameResult TimingResultsAggregator::GetAggregatedResults() const {
75 AggregatedFrameResult result;
76
77 result.interframe_time = AggregateField(interframe_times, window_size);
78 result.frame_time = AggregateField(frame_times, window_size);
79
80 if (result.interframe_time.avg != Duration::zero()) {
81 result.fps = 1000.0f / tof(result.interframe_time.avg);
82 } else {
83 result.fps = 0.0f;
84 }
85
86 return result;
87}
88
89ProfilingManager& GetProfilingManager() {
90 // Takes advantage of "magic" static initialization for race-free initialization.
91 static ProfilingManager manager;
92 return manager;
93}
94
95SynchronizedRef<TimingResultsAggregator> GetTimingResultsAggregator() {
96 static SynchronizedWrapper<TimingResultsAggregator> aggregator(30);
97 return SynchronizedRef<TimingResultsAggregator>(aggregator);
98}
99
100} // namespace Profiling
101} // namespace Common
diff --git a/src/common/profiler_reporting.h b/src/common/profiler_reporting.h
deleted file mode 100644
index e9ce6d41c..000000000
--- a/src/common/profiler_reporting.h
+++ /dev/null
@@ -1,83 +0,0 @@
1// Copyright 2015 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <chrono>
8#include <cstddef>
9#include <vector>
10#include "common/synchronized_wrapper.h"
11
12namespace Common {
13namespace Profiling {
14
15using Clock = std::chrono::high_resolution_clock;
16using Duration = Clock::duration;
17
18struct ProfilingFrameResult {
19 /// Time since the last delivered frame
20 Duration interframe_time;
21
22 /// Time spent processing a frame, excluding VSync
23 Duration frame_time;
24};
25
26class ProfilingManager final {
27public:
28 ProfilingManager();
29
30 /// This should be called after swapping screen buffers.
31 void BeginFrame();
32 /// This should be called before swapping screen buffers.
33 void FinishFrame();
34
35 /// Get the timing results from the previous frame. This is updated when you call FinishFrame().
36 const ProfilingFrameResult& GetPreviousFrameResults() const {
37 return results;
38 }
39
40private:
41 Clock::time_point last_frame_end;
42 Clock::time_point this_frame_start;
43
44 ProfilingFrameResult results;
45};
46
47struct AggregatedDuration {
48 Duration avg, min, max;
49};
50
51struct AggregatedFrameResult {
52 /// Time since the last delivered frame
53 AggregatedDuration interframe_time;
54
55 /// Time spent processing a frame, excluding VSync
56 AggregatedDuration frame_time;
57
58 float fps;
59};
60
61class TimingResultsAggregator final {
62public:
63 TimingResultsAggregator(size_t window_size);
64
65 void Clear();
66
67 void AddFrame(const ProfilingFrameResult& frame_result);
68
69 AggregatedFrameResult GetAggregatedResults() const;
70
71 size_t max_window_size;
72 size_t window_size;
73 size_t cursor;
74
75 std::vector<Duration> interframe_times;
76 std::vector<Duration> frame_times;
77};
78
79ProfilingManager& GetProfilingManager();
80SynchronizedRef<TimingResultsAggregator> GetTimingResultsAggregator();
81
82} // namespace Profiling
83} // namespace Common
diff --git a/src/common/scm_rev.cpp.in b/src/common/scm_rev.cpp.in
index 79b404bb8..0080db5d5 100644
--- a/src/common/scm_rev.cpp.in
+++ b/src/common/scm_rev.cpp.in
@@ -7,12 +7,14 @@
7#define GIT_REV "@GIT_REV@" 7#define GIT_REV "@GIT_REV@"
8#define GIT_BRANCH "@GIT_BRANCH@" 8#define GIT_BRANCH "@GIT_BRANCH@"
9#define GIT_DESC "@GIT_DESC@" 9#define GIT_DESC "@GIT_DESC@"
10#define BUILD_NAME "@REPO_NAME@"
10 11
11namespace Common { 12namespace Common {
12 13
13const char g_scm_rev[] = GIT_REV; 14const char g_scm_rev[] = GIT_REV;
14const char g_scm_branch[] = GIT_BRANCH; 15const char g_scm_branch[] = GIT_BRANCH;
15const char g_scm_desc[] = GIT_DESC; 16const char g_scm_desc[] = GIT_DESC;
17const char g_build_name[] = BUILD_NAME;
16 18
17} // namespace 19} // namespace
18 20
diff --git a/src/common/scm_rev.h b/src/common/scm_rev.h
index 0ef190afa..e22389803 100644
--- a/src/common/scm_rev.h
+++ b/src/common/scm_rev.h
@@ -9,5 +9,6 @@ namespace Common {
9extern const char g_scm_rev[]; 9extern const char g_scm_rev[];
10extern const char g_scm_branch[]; 10extern const char g_scm_branch[];
11extern const char g_scm_desc[]; 11extern const char g_scm_desc[];
12extern const char g_build_name[];
12 13
13} // namespace 14} // namespace
diff --git a/src/common/synchronized_wrapper.h b/src/common/synchronized_wrapper.h
index 04b4f2e51..4a1984c46 100644
--- a/src/common/synchronized_wrapper.h
+++ b/src/common/synchronized_wrapper.h
@@ -9,25 +9,8 @@
9 9
10namespace Common { 10namespace Common {
11 11
12/**
13 * Wraps an object, only allowing access to it via a locking reference wrapper. Good to ensure no
14 * one forgets to lock a mutex before acessing an object. To access the wrapped object construct a
15 * SyncronizedRef on this wrapper. Inspired by Rust's Mutex type
16 * (http://doc.rust-lang.org/std/sync/struct.Mutex.html).
17 */
18template <typename T> 12template <typename T>
19class SynchronizedWrapper { 13class SynchronizedWrapper;
20public:
21 template <typename... Args>
22 SynchronizedWrapper(Args&&... args) : data(std::forward<Args>(args)...) {}
23
24private:
25 template <typename U>
26 friend class SynchronizedRef;
27
28 std::mutex mutex;
29 T data;
30};
31 14
32/** 15/**
33 * Synchronized reference, that keeps a SynchronizedWrapper's mutex locked during its lifetime. This 16 * Synchronized reference, that keeps a SynchronizedWrapper's mutex locked during its lifetime. This
@@ -75,4 +58,28 @@ private:
75 SynchronizedWrapper<T>* wrapper; 58 SynchronizedWrapper<T>* wrapper;
76}; 59};
77 60
61/**
62 * Wraps an object, only allowing access to it via a locking reference wrapper. Good to ensure no
63 * one forgets to lock a mutex before acessing an object. To access the wrapped object construct a
64 * SyncronizedRef on this wrapper. Inspired by Rust's Mutex type
65 * (http://doc.rust-lang.org/std/sync/struct.Mutex.html).
66 */
67template <typename T>
68class SynchronizedWrapper {
69public:
70 template <typename... Args>
71 SynchronizedWrapper(Args&&... args) : data(std::forward<Args>(args)...) {}
72
73 SynchronizedRef<T> Lock() {
74 return {*this};
75 }
76
77private:
78 template <typename U>
79 friend class SynchronizedRef;
80
81 std::mutex mutex;
82 T data;
83};
84
78} // namespace Common 85} // namespace Common
diff --git a/src/common/x64/abi.cpp b/src/common/x64/abi.cpp
deleted file mode 100644
index 504b9c940..000000000
--- a/src/common/x64/abi.cpp
+++ /dev/null
@@ -1,350 +0,0 @@
1// Copyright (C) 2003 Dolphin Project.
2
3// This program is free software: you can redistribute it and/or modify
4// it under the terms of the GNU General Public License as published by
5// the Free Software Foundation, version 2.0 or later versions.
6
7// This program is distributed in the hope that it will be useful,
8// but WITHOUT ANY WARRANTY; without even the implied warranty of
9// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10// GNU General Public License 2.0 for more details.
11
12// A copy of the GPL 2.0 should have been included with the program.
13// If not, see http://www.gnu.org/licenses/
14
15// Official SVN repository and contact information can be found at
16// http://code.google.com/p/dolphin-emu/
17
18#include "abi.h"
19#include "emitter.h"
20
21using namespace Gen;
22
23// Shared code between Win64 and Unix64
24
25void XEmitter::ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size,
26 size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp) {
27 size_t shadow = 0;
28#if defined(_WIN32)
29 shadow = 0x20;
30#endif
31
32 int count = (mask & ABI_ALL_GPRS).Count();
33 rsp_alignment -= count * 8;
34 size_t subtraction = 0;
35 int fpr_count = (mask & ABI_ALL_FPRS).Count();
36 if (fpr_count) {
37 // If we have any XMMs to save, we must align the stack here.
38 subtraction = rsp_alignment & 0xf;
39 }
40 subtraction += 16 * fpr_count;
41 size_t xmm_base_subtraction = subtraction;
42 subtraction += needed_frame_size;
43 subtraction += shadow;
44 // Final alignment.
45 rsp_alignment -= subtraction;
46 subtraction += rsp_alignment & 0xf;
47
48 *shadowp = shadow;
49 *subtractionp = subtraction;
50 *xmm_offsetp = subtraction - xmm_base_subtraction;
51}
52
53size_t XEmitter::ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment,
54 size_t needed_frame_size) {
55 size_t shadow, subtraction, xmm_offset;
56 ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction,
57 &xmm_offset);
58
59 for (int r : mask& ABI_ALL_GPRS)
60 PUSH((X64Reg)r);
61
62 if (subtraction)
63 SUB(64, R(RSP), subtraction >= 0x80 ? Imm32((u32)subtraction) : Imm8((u8)subtraction));
64
65 for (int x : mask& ABI_ALL_FPRS) {
66 MOVAPD(MDisp(RSP, (int)xmm_offset), (X64Reg)(x - 16));
67 xmm_offset += 16;
68 }
69
70 return shadow;
71}
72
73void XEmitter::ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment,
74 size_t needed_frame_size) {
75 size_t shadow, subtraction, xmm_offset;
76 ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction,
77 &xmm_offset);
78
79 for (int x : mask& ABI_ALL_FPRS) {
80 MOVAPD((X64Reg)(x - 16), MDisp(RSP, (int)xmm_offset));
81 xmm_offset += 16;
82 }
83
84 if (subtraction)
85 ADD(64, R(RSP), subtraction >= 0x80 ? Imm32((u32)subtraction) : Imm8((u8)subtraction));
86
87 for (int r = 15; r >= 0; r--) {
88 if (mask[r])
89 POP((X64Reg)r);
90 }
91}
92
93// Common functions
94void XEmitter::ABI_CallFunction(const void* func) {
95 u64 distance = u64(func) - (u64(code) + 5);
96 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
97 // Far call
98 MOV(64, R(RAX), ImmPtr(func));
99 CALLptr(R(RAX));
100 } else {
101 CALL(func);
102 }
103}
104
105void XEmitter::ABI_CallFunctionC16(const void* func, u16 param1) {
106 MOV(32, R(ABI_PARAM1), Imm32((u32)param1));
107 u64 distance = u64(func) - (u64(code) + 5);
108 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
109 // Far call
110 MOV(64, R(RAX), ImmPtr(func));
111 CALLptr(R(RAX));
112 } else {
113 CALL(func);
114 }
115}
116
117void XEmitter::ABI_CallFunctionCC16(const void* func, u32 param1, u16 param2) {
118 MOV(32, R(ABI_PARAM1), Imm32(param1));
119 MOV(32, R(ABI_PARAM2), Imm32((u32)param2));
120 u64 distance = u64(func) - (u64(code) + 5);
121 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
122 // Far call
123 MOV(64, R(RAX), ImmPtr(func));
124 CALLptr(R(RAX));
125 } else {
126 CALL(func);
127 }
128}
129
130void XEmitter::ABI_CallFunctionC(const void* func, u32 param1) {
131 MOV(32, R(ABI_PARAM1), Imm32(param1));
132 u64 distance = u64(func) - (u64(code) + 5);
133 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
134 // Far call
135 MOV(64, R(RAX), ImmPtr(func));
136 CALLptr(R(RAX));
137 } else {
138 CALL(func);
139 }
140}
141
142void XEmitter::ABI_CallFunctionCC(const void* func, u32 param1, u32 param2) {
143 MOV(32, R(ABI_PARAM1), Imm32(param1));
144 MOV(32, R(ABI_PARAM2), Imm32(param2));
145 u64 distance = u64(func) - (u64(code) + 5);
146 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
147 // Far call
148 MOV(64, R(RAX), ImmPtr(func));
149 CALLptr(R(RAX));
150 } else {
151 CALL(func);
152 }
153}
154
155void XEmitter::ABI_CallFunctionCCC(const void* func, u32 param1, u32 param2, u32 param3) {
156 MOV(32, R(ABI_PARAM1), Imm32(param1));
157 MOV(32, R(ABI_PARAM2), Imm32(param2));
158 MOV(32, R(ABI_PARAM3), Imm32(param3));
159 u64 distance = u64(func) - (u64(code) + 5);
160 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
161 // Far call
162 MOV(64, R(RAX), ImmPtr(func));
163 CALLptr(R(RAX));
164 } else {
165 CALL(func);
166 }
167}
168
169void XEmitter::ABI_CallFunctionCCP(const void* func, u32 param1, u32 param2, void* param3) {
170 MOV(32, R(ABI_PARAM1), Imm32(param1));
171 MOV(32, R(ABI_PARAM2), Imm32(param2));
172 MOV(64, R(ABI_PARAM3), ImmPtr(param3));
173 u64 distance = u64(func) - (u64(code) + 5);
174 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
175 // Far call
176 MOV(64, R(RAX), ImmPtr(func));
177 CALLptr(R(RAX));
178 } else {
179 CALL(func);
180 }
181}
182
183void XEmitter::ABI_CallFunctionCCCP(const void* func, u32 param1, u32 param2, u32 param3,
184 void* param4) {
185 MOV(32, R(ABI_PARAM1), Imm32(param1));
186 MOV(32, R(ABI_PARAM2), Imm32(param2));
187 MOV(32, R(ABI_PARAM3), Imm32(param3));
188 MOV(64, R(ABI_PARAM4), ImmPtr(param4));
189 u64 distance = u64(func) - (u64(code) + 5);
190 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
191 // Far call
192 MOV(64, R(RAX), ImmPtr(func));
193 CALLptr(R(RAX));
194 } else {
195 CALL(func);
196 }
197}
198
199void XEmitter::ABI_CallFunctionP(const void* func, void* param1) {
200 MOV(64, R(ABI_PARAM1), ImmPtr(param1));
201 u64 distance = u64(func) - (u64(code) + 5);
202 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
203 // Far call
204 MOV(64, R(RAX), ImmPtr(func));
205 CALLptr(R(RAX));
206 } else {
207 CALL(func);
208 }
209}
210
211void XEmitter::ABI_CallFunctionPA(const void* func, void* param1, const Gen::OpArg& arg2) {
212 MOV(64, R(ABI_PARAM1), ImmPtr(param1));
213 if (!arg2.IsSimpleReg(ABI_PARAM2))
214 MOV(32, R(ABI_PARAM2), arg2);
215 u64 distance = u64(func) - (u64(code) + 5);
216 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
217 // Far call
218 MOV(64, R(RAX), ImmPtr(func));
219 CALLptr(R(RAX));
220 } else {
221 CALL(func);
222 }
223}
224
225void XEmitter::ABI_CallFunctionPAA(const void* func, void* param1, const Gen::OpArg& arg2,
226 const Gen::OpArg& arg3) {
227 MOV(64, R(ABI_PARAM1), ImmPtr(param1));
228 if (!arg2.IsSimpleReg(ABI_PARAM2))
229 MOV(32, R(ABI_PARAM2), arg2);
230 if (!arg3.IsSimpleReg(ABI_PARAM3))
231 MOV(32, R(ABI_PARAM3), arg3);
232 u64 distance = u64(func) - (u64(code) + 5);
233 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
234 // Far call
235 MOV(64, R(RAX), ImmPtr(func));
236 CALLptr(R(RAX));
237 } else {
238 CALL(func);
239 }
240}
241
242void XEmitter::ABI_CallFunctionPPC(const void* func, void* param1, void* param2, u32 param3) {
243 MOV(64, R(ABI_PARAM1), ImmPtr(param1));
244 MOV(64, R(ABI_PARAM2), ImmPtr(param2));
245 MOV(32, R(ABI_PARAM3), Imm32(param3));
246 u64 distance = u64(func) - (u64(code) + 5);
247 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
248 // Far call
249 MOV(64, R(RAX), ImmPtr(func));
250 CALLptr(R(RAX));
251 } else {
252 CALL(func);
253 }
254}
255
256// Pass a register as a parameter.
257void XEmitter::ABI_CallFunctionR(const void* func, X64Reg reg1) {
258 if (reg1 != ABI_PARAM1)
259 MOV(32, R(ABI_PARAM1), R(reg1));
260 u64 distance = u64(func) - (u64(code) + 5);
261 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
262 // Far call
263 MOV(64, R(RAX), ImmPtr(func));
264 CALLptr(R(RAX));
265 } else {
266 CALL(func);
267 }
268}
269
270// Pass two registers as parameters.
271void XEmitter::ABI_CallFunctionRR(const void* func, X64Reg reg1, X64Reg reg2) {
272 if (reg2 != ABI_PARAM1) {
273 if (reg1 != ABI_PARAM1)
274 MOV(64, R(ABI_PARAM1), R(reg1));
275 if (reg2 != ABI_PARAM2)
276 MOV(64, R(ABI_PARAM2), R(reg2));
277 } else {
278 if (reg2 != ABI_PARAM2)
279 MOV(64, R(ABI_PARAM2), R(reg2));
280 if (reg1 != ABI_PARAM1)
281 MOV(64, R(ABI_PARAM1), R(reg1));
282 }
283 u64 distance = u64(func) - (u64(code) + 5);
284 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
285 // Far call
286 MOV(64, R(RAX), ImmPtr(func));
287 CALLptr(R(RAX));
288 } else {
289 CALL(func);
290 }
291}
292
293void XEmitter::ABI_CallFunctionAC(const void* func, const Gen::OpArg& arg1, u32 param2) {
294 if (!arg1.IsSimpleReg(ABI_PARAM1))
295 MOV(32, R(ABI_PARAM1), arg1);
296 MOV(32, R(ABI_PARAM2), Imm32(param2));
297 u64 distance = u64(func) - (u64(code) + 5);
298 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
299 // Far call
300 MOV(64, R(RAX), ImmPtr(func));
301 CALLptr(R(RAX));
302 } else {
303 CALL(func);
304 }
305}
306
307void XEmitter::ABI_CallFunctionACC(const void* func, const Gen::OpArg& arg1, u32 param2,
308 u32 param3) {
309 if (!arg1.IsSimpleReg(ABI_PARAM1))
310 MOV(32, R(ABI_PARAM1), arg1);
311 MOV(32, R(ABI_PARAM2), Imm32(param2));
312 MOV(64, R(ABI_PARAM3), Imm64(param3));
313 u64 distance = u64(func) - (u64(code) + 5);
314 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
315 // Far call
316 MOV(64, R(RAX), ImmPtr(func));
317 CALLptr(R(RAX));
318 } else {
319 CALL(func);
320 }
321}
322
323void XEmitter::ABI_CallFunctionA(const void* func, const Gen::OpArg& arg1) {
324 if (!arg1.IsSimpleReg(ABI_PARAM1))
325 MOV(32, R(ABI_PARAM1), arg1);
326 u64 distance = u64(func) - (u64(code) + 5);
327 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
328 // Far call
329 MOV(64, R(RAX), ImmPtr(func));
330 CALLptr(R(RAX));
331 } else {
332 CALL(func);
333 }
334}
335
336void XEmitter::ABI_CallFunctionAA(const void* func, const Gen::OpArg& arg1,
337 const Gen::OpArg& arg2) {
338 if (!arg1.IsSimpleReg(ABI_PARAM1))
339 MOV(32, R(ABI_PARAM1), arg1);
340 if (!arg2.IsSimpleReg(ABI_PARAM2))
341 MOV(32, R(ABI_PARAM2), arg2);
342 u64 distance = u64(func) - (u64(code) + 5);
343 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
344 // Far call
345 MOV(64, R(RAX), ImmPtr(func));
346 CALLptr(R(RAX));
347 } else {
348 CALL(func);
349 }
350} \ No newline at end of file
diff --git a/src/common/x64/abi.h b/src/common/x64/abi.h
deleted file mode 100644
index eaaf81d89..000000000
--- a/src/common/x64/abi.h
+++ /dev/null
@@ -1,58 +0,0 @@
1// Copyright 2008 Dolphin Emulator Project
2// Licensed under GPLv2+
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/bit_set.h"
8#include "emitter.h"
9
10// x64 ABI:s, and helpers to help follow them when JIT-ing code.
11// All convensions return values in EAX (+ possibly EDX).
12
13// Windows 64-bit
14// * 4-reg "fastcall" variant, very new-skool stack handling
15// * Callee moves stack pointer, to make room for shadow regs for the biggest function _it itself
16// calls_
17// * Parameters passed in RCX, RDX, ... further parameters are MOVed into the allocated stack space.
18// Scratch: RAX RCX RDX R8 R9 R10 R11
19// Callee-save: RBX RSI RDI RBP R12 R13 R14 R15
20// Parameters: RCX RDX R8 R9, further MOV-ed
21
22// Linux 64-bit
23// * 6-reg "fastcall" variant, old skool stack handling (parameters are pushed)
24// Scratch: RAX RCX RDX RSI RDI R8 R9 R10 R11
25// Callee-save: RBX RBP R12 R13 R14 R15
26// Parameters: RDI RSI RDX RCX R8 R9
27
28#define ABI_ALL_FPRS BitSet32(0xffff0000)
29#define ABI_ALL_GPRS BitSet32(0x0000ffff)
30
31#ifdef _WIN32 // 64-bit Windows - the really exotic calling convention
32
33#define ABI_PARAM1 RCX
34#define ABI_PARAM2 RDX
35#define ABI_PARAM3 R8
36#define ABI_PARAM4 R9
37
38// xmm0-xmm15 use the upper 16 bits in the functions that push/pop registers.
39#define ABI_ALL_CALLER_SAVED \
40 (BitSet32{RAX, RCX, RDX, R8, R9, R10, R11, XMM0 + 16, XMM1 + 16, XMM2 + 16, XMM3 + 16, \
41 XMM4 + 16, XMM5 + 16})
42#else // 64-bit Unix / OS X
43
44#define ABI_PARAM1 RDI
45#define ABI_PARAM2 RSI
46#define ABI_PARAM3 RDX
47#define ABI_PARAM4 RCX
48#define ABI_PARAM5 R8
49#define ABI_PARAM6 R9
50
51// TODO: Avoid pushing all 16 XMM registers when possible. Most functions we call probably
52// don't actually clobber them.
53#define ABI_ALL_CALLER_SAVED (BitSet32{RAX, RCX, RDX, RDI, RSI, R8, R9, R10, R11} | ABI_ALL_FPRS)
54#endif // WIN32
55
56#define ABI_ALL_CALLEE_SAVED (~ABI_ALL_CALLER_SAVED)
57
58#define ABI_RETURN RAX
diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp
index 370ae2c80..2cb3ab9cc 100644
--- a/src/common/x64/cpu_detect.cpp
+++ b/src/common/x64/cpu_detect.cpp
@@ -8,9 +8,9 @@
8#include "common/common_types.h" 8#include "common/common_types.h"
9#include "cpu_detect.h" 9#include "cpu_detect.h"
10 10
11namespace Common { 11#ifdef _MSC_VER
12 12#include <intrin.h>
13#ifndef _MSC_VER 13#else
14 14
15#if defined(__DragonFly__) || defined(__FreeBSD__) 15#if defined(__DragonFly__) || defined(__FreeBSD__)
16// clang-format off 16// clang-format off
@@ -37,13 +37,15 @@ static inline void __cpuid(int info[4], int function_id) {
37} 37}
38 38
39#define _XCR_XFEATURE_ENABLED_MASK 0 39#define _XCR_XFEATURE_ENABLED_MASK 0
40static u64 _xgetbv(u32 index) { 40static inline u64 _xgetbv(u32 index) {
41 u32 eax, edx; 41 u32 eax, edx;
42 __asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index)); 42 __asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
43 return ((u64)edx << 32) | eax; 43 return ((u64)edx << 32) | eax;
44} 44}
45 45
46#endif // ifndef _MSC_VER 46#endif // _MSC_VER
47
48namespace Common {
47 49
48// Detects the various CPU features 50// Detects the various CPU features
49static CPUCaps Detect() { 51static CPUCaps Detect() {
diff --git a/src/common/x64/emitter.cpp b/src/common/x64/emitter.cpp
deleted file mode 100644
index f5930abec..000000000
--- a/src/common/x64/emitter.cpp
+++ /dev/null
@@ -1,2583 +0,0 @@
1// Copyright (C) 2003 Dolphin Project.
2
3// This program is free software: you can redistribute it and/or modify
4// it under the terms of the GNU General Public License as published by
5// the Free Software Foundation, version 2.0 or later versions.
6
7// This program is distributed in the hope that it will be useful,
8// but WITHOUT ANY WARRANTY; without even the implied warranty of
9// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10// GNU General Public License 2.0 for more details.
11
12// A copy of the GPL 2.0 should have been included with the program.
13// If not, see http://www.gnu.org/licenses/
14
15// Official SVN repository and contact information can be found at
16// http://code.google.com/p/dolphin-emu/
17
18#include <cinttypes>
19#include <cstring>
20#include "abi.h"
21#include "common/assert.h"
22#include "common/logging/log.h"
23#include "common/memory_util.h"
24#include "cpu_detect.h"
25#include "emitter.h"
26
27namespace Gen {
28
29struct NormalOpDef {
30 u8 toRm8, toRm32, fromRm8, fromRm32, imm8, imm32, simm8, eaximm8, eaximm32, ext;
31};
32
33// 0xCC is code for invalid combination of immediates
34static const NormalOpDef normalops[11] = {
35 {0x00, 0x01, 0x02, 0x03, 0x80, 0x81, 0x83, 0x04, 0x05, 0}, // ADD
36 {0x10, 0x11, 0x12, 0x13, 0x80, 0x81, 0x83, 0x14, 0x15, 2}, // ADC
37
38 {0x28, 0x29, 0x2A, 0x2B, 0x80, 0x81, 0x83, 0x2C, 0x2D, 5}, // SUB
39 {0x18, 0x19, 0x1A, 0x1B, 0x80, 0x81, 0x83, 0x1C, 0x1D, 3}, // SBB
40
41 {0x20, 0x21, 0x22, 0x23, 0x80, 0x81, 0x83, 0x24, 0x25, 4}, // AND
42 {0x08, 0x09, 0x0A, 0x0B, 0x80, 0x81, 0x83, 0x0C, 0x0D, 1}, // OR
43
44 {0x30, 0x31, 0x32, 0x33, 0x80, 0x81, 0x83, 0x34, 0x35, 6}, // XOR
45 {0x88, 0x89, 0x8A, 0x8B, 0xC6, 0xC7, 0xCC, 0xCC, 0xCC, 0}, // MOV
46
47 {0x84, 0x85, 0x84, 0x85, 0xF6, 0xF7, 0xCC, 0xA8, 0xA9, 0}, // TEST (to == from)
48 {0x38, 0x39, 0x3A, 0x3B, 0x80, 0x81, 0x83, 0x3C, 0x3D, 7}, // CMP
49
50 {0x86, 0x87, 0x86, 0x87, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 7}, // XCHG
51};
52
53enum NormalSSEOps {
54 sseCMP = 0xC2,
55 sseADD = 0x58, // ADD
56 sseSUB = 0x5C, // SUB
57 sseAND = 0x54, // AND
58 sseANDN = 0x55, // ANDN
59 sseOR = 0x56,
60 sseXOR = 0x57,
61 sseMUL = 0x59, // MUL
62 sseDIV = 0x5E, // DIV
63 sseMIN = 0x5D, // MIN
64 sseMAX = 0x5F, // MAX
65 sseCOMIS = 0x2F, // COMIS
66 sseUCOMIS = 0x2E, // UCOMIS
67 sseSQRT = 0x51, // SQRT
68 sseRSQRT = 0x52, // RSQRT (NO DOUBLE PRECISION!!!)
69 sseRCP = 0x53, // RCP
70 sseMOVAPfromRM = 0x28, // MOVAP from RM
71 sseMOVAPtoRM = 0x29, // MOVAP to RM
72 sseMOVUPfromRM = 0x10, // MOVUP from RM
73 sseMOVUPtoRM = 0x11, // MOVUP to RM
74 sseMOVLPfromRM = 0x12,
75 sseMOVLPtoRM = 0x13,
76 sseMOVHPfromRM = 0x16,
77 sseMOVHPtoRM = 0x17,
78 sseMOVHLPS = 0x12,
79 sseMOVLHPS = 0x16,
80 sseMOVDQfromRM = 0x6F,
81 sseMOVDQtoRM = 0x7F,
82 sseMASKMOVDQU = 0xF7,
83 sseLDDQU = 0xF0,
84 sseSHUF = 0xC6,
85 sseMOVNTDQ = 0xE7,
86 sseMOVNTP = 0x2B,
87 sseHADD = 0x7C,
88};
89
90void XEmitter::SetCodePtr(u8* ptr) {
91 code = ptr;
92}
93
94const u8* XEmitter::GetCodePtr() const {
95 return code;
96}
97
98u8* XEmitter::GetWritableCodePtr() {
99 return code;
100}
101
102void XEmitter::Write8(u8 value) {
103 *code++ = value;
104}
105
106void XEmitter::Write16(u16 value) {
107 std::memcpy(code, &value, sizeof(u16));
108 code += sizeof(u16);
109}
110
111void XEmitter::Write32(u32 value) {
112 std::memcpy(code, &value, sizeof(u32));
113 code += sizeof(u32);
114}
115
116void XEmitter::Write64(u64 value) {
117 std::memcpy(code, &value, sizeof(u64));
118 code += sizeof(u64);
119}
120
121void XEmitter::ReserveCodeSpace(int bytes) {
122 for (int i = 0; i < bytes; i++)
123 *code++ = 0xCC;
124}
125
126const u8* XEmitter::AlignCode4() {
127 int c = int((u64)code & 3);
128 if (c)
129 ReserveCodeSpace(4 - c);
130 return code;
131}
132
133const u8* XEmitter::AlignCode16() {
134 int c = int((u64)code & 15);
135 if (c)
136 ReserveCodeSpace(16 - c);
137 return code;
138}
139
140const u8* XEmitter::AlignCodePage() {
141 int c = int((u64)code & 4095);
142 if (c)
143 ReserveCodeSpace(4096 - c);
144 return code;
145}
146
147// This operation modifies flags; check to see the flags are locked.
148// If the flags are locked, we should immediately and loudly fail before
149// causing a subtle JIT bug.
150void XEmitter::CheckFlags() {
151 ASSERT_MSG(!flags_locked, "Attempt to modify flags while flags locked!");
152}
153
154void XEmitter::WriteModRM(int mod, int reg, int rm) {
155 Write8((u8)((mod << 6) | ((reg & 7) << 3) | (rm & 7)));
156}
157
158void XEmitter::WriteSIB(int scale, int index, int base) {
159 Write8((u8)((scale << 6) | ((index & 7) << 3) | (base & 7)));
160}
161
162void OpArg::WriteRex(XEmitter* emit, int opBits, int bits, int customOp) const {
163 if (customOp == -1)
164 customOp = operandReg;
165#ifdef ARCHITECTURE_x86_64
166 u8 op = 0x40;
167 // REX.W (whether operation is a 64-bit operation)
168 if (opBits == 64)
169 op |= 8;
170 // REX.R (whether ModR/M reg field refers to R8-R15.
171 if (customOp & 8)
172 op |= 4;
173 // REX.X (whether ModR/M SIB index field refers to R8-R15)
174 if (indexReg & 8)
175 op |= 2;
176 // REX.B (whether ModR/M rm or SIB base or opcode reg field refers to R8-R15)
177 if (offsetOrBaseReg & 8)
178 op |= 1;
179 // Write REX if wr have REX bits to write, or if the operation accesses
180 // SIL, DIL, BPL, or SPL.
181 if (op != 0x40 || (scale == SCALE_NONE && bits == 8 && (offsetOrBaseReg & 0x10c) == 4) ||
182 (opBits == 8 && (customOp & 0x10c) == 4)) {
183 emit->Write8(op);
184 // Check the operation doesn't access AH, BH, CH, or DH.
185 DEBUG_ASSERT((offsetOrBaseReg & 0x100) == 0);
186 DEBUG_ASSERT((customOp & 0x100) == 0);
187 }
188#else
189 DEBUG_ASSERT(opBits != 64);
190 DEBUG_ASSERT((customOp & 8) == 0 || customOp == -1);
191 DEBUG_ASSERT((indexReg & 8) == 0);
192 DEBUG_ASSERT((offsetOrBaseReg & 8) == 0);
193 DEBUG_ASSERT(opBits != 8 || (customOp & 0x10c) != 4 || customOp == -1);
194 DEBUG_ASSERT(scale == SCALE_ATREG || bits != 8 || (offsetOrBaseReg & 0x10c) != 4);
195#endif
196}
197
198void OpArg::WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp, int mmmmm,
199 int W) const {
200 int R = !(regOp1 & 8);
201 int X = !(indexReg & 8);
202 int B = !(offsetOrBaseReg & 8);
203
204 int vvvv = (regOp2 == X64Reg::INVALID_REG) ? 0xf : (regOp2 ^ 0xf);
205
206 // do we need any VEX fields that only appear in the three-byte form?
207 if (X == 1 && B == 1 && W == 0 && mmmmm == 1) {
208 u8 RvvvvLpp = (R << 7) | (vvvv << 3) | (L << 2) | pp;
209 emit->Write8(0xC5);
210 emit->Write8(RvvvvLpp);
211 } else {
212 u8 RXBmmmmm = (R << 7) | (X << 6) | (B << 5) | mmmmm;
213 u8 WvvvvLpp = (W << 7) | (vvvv << 3) | (L << 2) | pp;
214 emit->Write8(0xC4);
215 emit->Write8(RXBmmmmm);
216 emit->Write8(WvvvvLpp);
217 }
218}
219
220void OpArg::WriteRest(XEmitter* emit, int extraBytes, X64Reg _operandReg,
221 bool warn_64bit_offset) const {
222 if (_operandReg == INVALID_REG)
223 _operandReg = (X64Reg)this->operandReg;
224 int mod = 0;
225 int ireg = indexReg;
226 bool SIB = false;
227 int _offsetOrBaseReg = this->offsetOrBaseReg;
228
229 if (scale == SCALE_RIP) // Also, on 32-bit, just an immediate address
230 {
231 // Oh, RIP addressing.
232 _offsetOrBaseReg = 5;
233 emit->WriteModRM(0, _operandReg, _offsetOrBaseReg);
234// TODO : add some checks
235#ifdef ARCHITECTURE_x86_64
236 u64 ripAddr = (u64)emit->GetCodePtr() + 4 + extraBytes;
237 s64 distance = (s64)offset - (s64)ripAddr;
238 ASSERT_MSG((distance < 0x80000000LL && distance >= -0x80000000LL) || !warn_64bit_offset,
239 "WriteRest: op out of range (0x%" PRIx64 " uses 0x%" PRIx64 ")", ripAddr,
240 offset);
241 s32 offs = (s32)distance;
242 emit->Write32((u32)offs);
243#else
244 emit->Write32((u32)offset);
245#endif
246 return;
247 }
248
249 if (scale == 0) {
250 // Oh, no memory, Just a reg.
251 mod = 3; // 11
252 } else if (scale >= 1) {
253 // Ah good, no scaling.
254 if (scale == SCALE_ATREG && !((_offsetOrBaseReg & 7) == 4 || (_offsetOrBaseReg & 7) == 5)) {
255 // Okay, we're good. No SIB necessary.
256 int ioff = (int)offset;
257 if (ioff == 0) {
258 mod = 0;
259 } else if (ioff < -128 || ioff > 127) {
260 mod = 2; // 32-bit displacement
261 } else {
262 mod = 1; // 8-bit displacement
263 }
264 } else if (scale >= SCALE_NOBASE_2 && scale <= SCALE_NOBASE_8) {
265 SIB = true;
266 mod = 0;
267 _offsetOrBaseReg = 5;
268 } else // if (scale != SCALE_ATREG)
269 {
270 if ((_offsetOrBaseReg & 7) == 4) // this would occupy the SIB encoding :(
271 {
272 // So we have to fake it with SIB encoding :(
273 SIB = true;
274 }
275
276 if (scale >= SCALE_1 && scale < SCALE_ATREG) {
277 SIB = true;
278 }
279
280 if (scale == SCALE_ATREG && ((_offsetOrBaseReg & 7) == 4)) {
281 SIB = true;
282 ireg = _offsetOrBaseReg;
283 }
284
285 // Okay, we're fine. Just disp encoding.
286 // We need displacement. Which size?
287 int ioff = (int)(s64)offset;
288 if (ioff < -128 || ioff > 127) {
289 mod = 2; // 32-bit displacement
290 } else {
291 mod = 1; // 8-bit displacement
292 }
293 }
294 }
295
296 // Okay. Time to do the actual writing
297 // ModRM byte:
298 int oreg = _offsetOrBaseReg;
299 if (SIB)
300 oreg = 4;
301
302 // TODO(ector): WTF is this if about? I don't remember writing it :-)
303 // if (RIP)
304 // oreg = 5;
305
306 emit->WriteModRM(mod, _operandReg & 7, oreg & 7);
307
308 if (SIB) {
309 // SIB byte
310 int ss;
311 switch (scale) {
312 case SCALE_NONE:
313 _offsetOrBaseReg = 4;
314 ss = 0;
315 break; // RSP
316 case SCALE_1:
317 ss = 0;
318 break;
319 case SCALE_2:
320 ss = 1;
321 break;
322 case SCALE_4:
323 ss = 2;
324 break;
325 case SCALE_8:
326 ss = 3;
327 break;
328 case SCALE_NOBASE_2:
329 ss = 1;
330 break;
331 case SCALE_NOBASE_4:
332 ss = 2;
333 break;
334 case SCALE_NOBASE_8:
335 ss = 3;
336 break;
337 case SCALE_ATREG:
338 ss = 0;
339 break;
340 default:
341 ASSERT_MSG(0, "Invalid scale for SIB byte");
342 ss = 0;
343 break;
344 }
345 emit->Write8((u8)((ss << 6) | ((ireg & 7) << 3) | (_offsetOrBaseReg & 7)));
346 }
347
348 if (mod == 1) // 8-bit disp
349 {
350 emit->Write8((u8)(s8)(s32)offset);
351 } else if (mod == 2 || (scale >= SCALE_NOBASE_2 && scale <= SCALE_NOBASE_8)) // 32-bit disp
352 {
353 emit->Write32((u32)offset);
354 }
355}
356
357// W = operand extended width (1 if 64-bit)
358// R = register# upper bit
359// X = scale amnt upper bit
360// B = base register# upper bit
361void XEmitter::Rex(int w, int r, int x, int b) {
362 w = w ? 1 : 0;
363 r = r ? 1 : 0;
364 x = x ? 1 : 0;
365 b = b ? 1 : 0;
366 u8 rx = (u8)(0x40 | (w << 3) | (r << 2) | (x << 1) | (b));
367 if (rx != 0x40)
368 Write8(rx);
369}
370
371void XEmitter::JMP(const u8* addr, bool force5Bytes) {
372 u64 fn = (u64)addr;
373 if (!force5Bytes) {
374 s64 distance = (s64)(fn - ((u64)code + 2));
375 ASSERT_MSG(distance >= -0x80 && distance < 0x80,
376 "Jump target too far away, needs force5Bytes = true");
377 // 8 bits will do
378 Write8(0xEB);
379 Write8((u8)(s8)distance);
380 } else {
381 s64 distance = (s64)(fn - ((u64)code + 5));
382
383 ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL,
384 "Jump target too far away, needs indirect register");
385 Write8(0xE9);
386 Write32((u32)(s32)distance);
387 }
388}
389
390void XEmitter::JMPptr(const OpArg& arg2) {
391 OpArg arg = arg2;
392 if (arg.IsImm())
393 ASSERT_MSG(0, "JMPptr - Imm argument");
394 arg.operandReg = 4;
395 arg.WriteRex(this, 0, 0);
396 Write8(0xFF);
397 arg.WriteRest(this);
398}
399
400// Can be used to trap other processors, before overwriting their code
401// not used in dolphin
402void XEmitter::JMPself() {
403 Write8(0xEB);
404 Write8(0xFE);
405}
406
407void XEmitter::CALLptr(OpArg arg) {
408 if (arg.IsImm())
409 ASSERT_MSG(0, "CALLptr - Imm argument");
410 arg.operandReg = 2;
411 arg.WriteRex(this, 0, 0);
412 Write8(0xFF);
413 arg.WriteRest(this);
414}
415
416void XEmitter::CALL(const void* fnptr) {
417 u64 distance = u64(fnptr) - (u64(code) + 5);
418 ASSERT_MSG(distance < 0x0000000080000000ULL || distance >= 0xFFFFFFFF80000000ULL,
419 "CALL out of range (%p calls %p)", code, fnptr);
420 Write8(0xE8);
421 Write32(u32(distance));
422}
423
424FixupBranch XEmitter::CALL() {
425 FixupBranch branch;
426 branch.type = 1;
427 branch.ptr = code + 5;
428
429 Write8(0xE8);
430 Write32(0);
431
432 return branch;
433}
434
435FixupBranch XEmitter::J(bool force5bytes) {
436 FixupBranch branch;
437 branch.type = force5bytes ? 1 : 0;
438 branch.ptr = code + (force5bytes ? 5 : 2);
439 if (!force5bytes) {
440 // 8 bits will do
441 Write8(0xEB);
442 Write8(0);
443 } else {
444 Write8(0xE9);
445 Write32(0);
446 }
447 return branch;
448}
449
450FixupBranch XEmitter::J_CC(CCFlags conditionCode, bool force5bytes) {
451 FixupBranch branch;
452 branch.type = force5bytes ? 1 : 0;
453 branch.ptr = code + (force5bytes ? 6 : 2);
454 if (!force5bytes) {
455 // 8 bits will do
456 Write8(0x70 + conditionCode);
457 Write8(0);
458 } else {
459 Write8(0x0F);
460 Write8(0x80 + conditionCode);
461 Write32(0);
462 }
463 return branch;
464}
465
466void XEmitter::J_CC(CCFlags conditionCode, const u8* addr, bool force5bytes) {
467 u64 fn = (u64)addr;
468 s64 distance = (s64)(fn - ((u64)code + 2));
469 if (distance < -0x80 || distance >= 0x80 || force5bytes) {
470 distance = (s64)(fn - ((u64)code + 6));
471 ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL,
472 "Jump target too far away, needs indirect register");
473 Write8(0x0F);
474 Write8(0x80 + conditionCode);
475 Write32((u32)(s32)distance);
476 } else {
477 Write8(0x70 + conditionCode);
478 Write8((u8)(s8)distance);
479 }
480}
481
482void XEmitter::SetJumpTarget(const FixupBranch& branch) {
483 if (branch.type == 0) {
484 s64 distance = (s64)(code - branch.ptr);
485 ASSERT_MSG(distance >= -0x80 && distance < 0x80,
486 "Jump target too far away, needs force5Bytes = true");
487 branch.ptr[-1] = (u8)(s8)distance;
488 } else if (branch.type == 1) {
489 s64 distance = (s64)(code - branch.ptr);
490 ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL,
491 "Jump target too far away, needs indirect register");
492 ((s32*)branch.ptr)[-1] = (s32)distance;
493 }
494}
495
496void XEmitter::SetJumpTarget(const FixupBranch& branch, const u8* target) {
497 if (branch.type == 0) {
498 s64 distance = (s64)(target - branch.ptr);
499 ASSERT_MSG(distance >= -0x80 && distance < 0x80,
500 "Jump target too far away, needs force5Bytes = true");
501 branch.ptr[-1] = (u8)(s8)distance;
502 } else if (branch.type == 1) {
503 s64 distance = (s64)(target - branch.ptr);
504 ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL,
505 "Jump target too far away, needs indirect register");
506 ((s32*)branch.ptr)[-1] = (s32)distance;
507 }
508}
509
510// Single byte opcodes
511// There is no PUSHAD/POPAD in 64-bit mode.
512void XEmitter::INT3() {
513 Write8(0xCC);
514}
515void XEmitter::RET() {
516 Write8(0xC3);
517}
518void XEmitter::RET_FAST() {
519 Write8(0xF3);
520 Write8(0xC3);
521} // two-byte return (rep ret) - recommended by AMD optimization manual for the case of jumping to a
522 // ret
523
524// The first sign of decadence: optimized NOPs.
525void XEmitter::NOP(size_t size) {
526 DEBUG_ASSERT((int)size > 0);
527 while (true) {
528 switch (size) {
529 case 0:
530 return;
531 case 1:
532 Write8(0x90);
533 return;
534 case 2:
535 Write8(0x66);
536 Write8(0x90);
537 return;
538 case 3:
539 Write8(0x0F);
540 Write8(0x1F);
541 Write8(0x00);
542 return;
543 case 4:
544 Write8(0x0F);
545 Write8(0x1F);
546 Write8(0x40);
547 Write8(0x00);
548 return;
549 case 5:
550 Write8(0x0F);
551 Write8(0x1F);
552 Write8(0x44);
553 Write8(0x00);
554 Write8(0x00);
555 return;
556 case 6:
557 Write8(0x66);
558 Write8(0x0F);
559 Write8(0x1F);
560 Write8(0x44);
561 Write8(0x00);
562 Write8(0x00);
563 return;
564 case 7:
565 Write8(0x0F);
566 Write8(0x1F);
567 Write8(0x80);
568 Write8(0x00);
569 Write8(0x00);
570 Write8(0x00);
571 Write8(0x00);
572 return;
573 case 8:
574 Write8(0x0F);
575 Write8(0x1F);
576 Write8(0x84);
577 Write8(0x00);
578 Write8(0x00);
579 Write8(0x00);
580 Write8(0x00);
581 Write8(0x00);
582 return;
583 case 9:
584 Write8(0x66);
585 Write8(0x0F);
586 Write8(0x1F);
587 Write8(0x84);
588 Write8(0x00);
589 Write8(0x00);
590 Write8(0x00);
591 Write8(0x00);
592 Write8(0x00);
593 return;
594 case 10:
595 Write8(0x66);
596 Write8(0x66);
597 Write8(0x0F);
598 Write8(0x1F);
599 Write8(0x84);
600 Write8(0x00);
601 Write8(0x00);
602 Write8(0x00);
603 Write8(0x00);
604 Write8(0x00);
605 return;
606 default:
607 // Even though x86 instructions are allowed to be up to 15 bytes long,
608 // AMD advises against using NOPs longer than 11 bytes because they
609 // carry a performance penalty on CPUs older than AMD family 16h.
610 Write8(0x66);
611 Write8(0x66);
612 Write8(0x66);
613 Write8(0x0F);
614 Write8(0x1F);
615 Write8(0x84);
616 Write8(0x00);
617 Write8(0x00);
618 Write8(0x00);
619 Write8(0x00);
620 Write8(0x00);
621 size -= 11;
622 continue;
623 }
624 }
625}
626
627void XEmitter::PAUSE() {
628 Write8(0xF3);
629 NOP();
630} // use in tight spinloops for energy saving on some cpu
631void XEmitter::CLC() {
632 CheckFlags();
633 Write8(0xF8);
634} // clear carry
635void XEmitter::CMC() {
636 CheckFlags();
637 Write8(0xF5);
638} // flip carry
639void XEmitter::STC() {
640 CheckFlags();
641 Write8(0xF9);
642} // set carry
643
644// TODO: xchg ah, al ???
645void XEmitter::XCHG_AHAL() {
646 Write8(0x86);
647 Write8(0xe0);
648 // alt. 86 c4
649}
650
651// These two can not be executed on early Intel 64-bit CPU:s, only on AMD!
652void XEmitter::LAHF() {
653 Write8(0x9F);
654}
655void XEmitter::SAHF() {
656 CheckFlags();
657 Write8(0x9E);
658}
659
660void XEmitter::PUSHF() {
661 Write8(0x9C);
662}
663void XEmitter::POPF() {
664 CheckFlags();
665 Write8(0x9D);
666}
667
668void XEmitter::LFENCE() {
669 Write8(0x0F);
670 Write8(0xAE);
671 Write8(0xE8);
672}
673void XEmitter::MFENCE() {
674 Write8(0x0F);
675 Write8(0xAE);
676 Write8(0xF0);
677}
678void XEmitter::SFENCE() {
679 Write8(0x0F);
680 Write8(0xAE);
681 Write8(0xF8);
682}
683
684void XEmitter::WriteSimple1Byte(int bits, u8 byte, X64Reg reg) {
685 if (bits == 16)
686 Write8(0x66);
687 Rex(bits == 64, 0, 0, (int)reg >> 3);
688 Write8(byte + ((int)reg & 7));
689}
690
691void XEmitter::WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg) {
692 if (bits == 16)
693 Write8(0x66);
694 Rex(bits == 64, 0, 0, (int)reg >> 3);
695 Write8(byte1);
696 Write8(byte2 + ((int)reg & 7));
697}
698
699void XEmitter::CWD(int bits) {
700 if (bits == 16)
701 Write8(0x66);
702 Rex(bits == 64, 0, 0, 0);
703 Write8(0x99);
704}
705
706void XEmitter::CBW(int bits) {
707 if (bits == 8)
708 Write8(0x66);
709 Rex(bits == 32, 0, 0, 0);
710 Write8(0x98);
711}
712
713// Simple opcodes
714
715// push/pop do not need wide to be 64-bit
716void XEmitter::PUSH(X64Reg reg) {
717 WriteSimple1Byte(32, 0x50, reg);
718}
719void XEmitter::POP(X64Reg reg) {
720 WriteSimple1Byte(32, 0x58, reg);
721}
722
723void XEmitter::PUSH(int bits, const OpArg& reg) {
724 if (reg.IsSimpleReg())
725 PUSH(reg.GetSimpleReg());
726 else if (reg.IsImm()) {
727 switch (reg.GetImmBits()) {
728 case 8:
729 Write8(0x6A);
730 Write8((u8)(s8)reg.offset);
731 break;
732 case 16:
733 Write8(0x66);
734 Write8(0x68);
735 Write16((u16)(s16)(s32)reg.offset);
736 break;
737 case 32:
738 Write8(0x68);
739 Write32((u32)reg.offset);
740 break;
741 default:
742 ASSERT_MSG(0, "PUSH - Bad imm bits");
743 break;
744 }
745 } else {
746 if (bits == 16)
747 Write8(0x66);
748 reg.WriteRex(this, bits, bits);
749 Write8(0xFF);
750 reg.WriteRest(this, 0, (X64Reg)6);
751 }
752}
753
754void XEmitter::POP(int /*bits*/, const OpArg& reg) {
755 if (reg.IsSimpleReg())
756 POP(reg.GetSimpleReg());
757 else
758 ASSERT_MSG(0, "POP - Unsupported encoding");
759}
760
761void XEmitter::BSWAP(int bits, X64Reg reg) {
762 if (bits >= 32) {
763 WriteSimple2Byte(bits, 0x0F, 0xC8, reg);
764 } else if (bits == 16) {
765 ROL(16, R(reg), Imm8(8));
766 } else if (bits == 8) {
767 // Do nothing - can't bswap a single byte...
768 } else {
769 ASSERT_MSG(0, "BSWAP - Wrong number of bits");
770 }
771}
772
773// Undefined opcode - reserved
774// If we ever need a way to always cause a non-breakpoint hard exception...
775void XEmitter::UD2() {
776 Write8(0x0F);
777 Write8(0x0B);
778}
779
780void XEmitter::PREFETCH(PrefetchLevel level, OpArg arg) {
781 ASSERT_MSG(!arg.IsImm(), "PREFETCH - Imm argument");
782 arg.operandReg = (u8)level;
783 arg.WriteRex(this, 0, 0);
784 Write8(0x0F);
785 Write8(0x18);
786 arg.WriteRest(this);
787}
788
789void XEmitter::SETcc(CCFlags flag, OpArg dest) {
790 ASSERT_MSG(!dest.IsImm(), "SETcc - Imm argument");
791 dest.operandReg = 0;
792 dest.WriteRex(this, 0, 8);
793 Write8(0x0F);
794 Write8(0x90 + (u8)flag);
795 dest.WriteRest(this);
796}
797
798void XEmitter::CMOVcc(int bits, X64Reg dest, OpArg src, CCFlags flag) {
799 ASSERT_MSG(!src.IsImm(), "CMOVcc - Imm argument");
800 ASSERT_MSG(bits != 8, "CMOVcc - 8 bits unsupported");
801 if (bits == 16)
802 Write8(0x66);
803 src.operandReg = dest;
804 src.WriteRex(this, bits, bits);
805 Write8(0x0F);
806 Write8(0x40 + (u8)flag);
807 src.WriteRest(this);
808}
809
810void XEmitter::WriteMulDivType(int bits, OpArg src, int ext) {
811 ASSERT_MSG(!src.IsImm(), "WriteMulDivType - Imm argument");
812 CheckFlags();
813 src.operandReg = ext;
814 if (bits == 16)
815 Write8(0x66);
816 src.WriteRex(this, bits, bits, 0);
817 if (bits == 8) {
818 Write8(0xF6);
819 } else {
820 Write8(0xF7);
821 }
822 src.WriteRest(this);
823}
824
825void XEmitter::MUL(int bits, const OpArg& src) {
826 WriteMulDivType(bits, src, 4);
827}
828void XEmitter::DIV(int bits, const OpArg& src) {
829 WriteMulDivType(bits, src, 6);
830}
831void XEmitter::IMUL(int bits, const OpArg& src) {
832 WriteMulDivType(bits, src, 5);
833}
834void XEmitter::IDIV(int bits, const OpArg& src) {
835 WriteMulDivType(bits, src, 7);
836}
837void XEmitter::NEG(int bits, const OpArg& src) {
838 WriteMulDivType(bits, src, 3);
839}
840void XEmitter::NOT(int bits, const OpArg& src) {
841 WriteMulDivType(bits, src, 2);
842}
843
844void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep) {
845 ASSERT_MSG(!src.IsImm(), "WriteBitSearchType - Imm argument");
846 CheckFlags();
847 src.operandReg = (u8)dest;
848 if (bits == 16)
849 Write8(0x66);
850 if (rep)
851 Write8(0xF3);
852 src.WriteRex(this, bits, bits);
853 Write8(0x0F);
854 Write8(byte2);
855 src.WriteRest(this);
856}
857
858void XEmitter::MOVNTI(int bits, const OpArg& dest, X64Reg src) {
859 if (bits <= 16)
860 ASSERT_MSG(0, "MOVNTI - bits<=16");
861 WriteBitSearchType(bits, src, dest, 0xC3);
862}
863
864void XEmitter::BSF(int bits, X64Reg dest, const OpArg& src) {
865 WriteBitSearchType(bits, dest, src, 0xBC);
866} // Bottom bit to top bit
867void XEmitter::BSR(int bits, X64Reg dest, const OpArg& src) {
868 WriteBitSearchType(bits, dest, src, 0xBD);
869} // Top bit to bottom bit
870
871void XEmitter::TZCNT(int bits, X64Reg dest, const OpArg& src) {
872 CheckFlags();
873 if (!Common::GetCPUCaps().bmi1)
874 ASSERT_MSG(0, "Trying to use BMI1 on a system that doesn't support it. Bad programmer.");
875 WriteBitSearchType(bits, dest, src, 0xBC, true);
876}
877void XEmitter::LZCNT(int bits, X64Reg dest, const OpArg& src) {
878 CheckFlags();
879 if (!Common::GetCPUCaps().lzcnt)
880 ASSERT_MSG(0, "Trying to use LZCNT on a system that doesn't support it. Bad programmer.");
881 WriteBitSearchType(bits, dest, src, 0xBD, true);
882}
883
884void XEmitter::MOVSX(int dbits, int sbits, X64Reg dest, OpArg src) {
885 ASSERT_MSG(!src.IsImm(), "MOVSX - Imm argument");
886 if (dbits == sbits) {
887 MOV(dbits, R(dest), src);
888 return;
889 }
890 src.operandReg = (u8)dest;
891 if (dbits == 16)
892 Write8(0x66);
893 src.WriteRex(this, dbits, sbits);
894 if (sbits == 8) {
895 Write8(0x0F);
896 Write8(0xBE);
897 } else if (sbits == 16) {
898 Write8(0x0F);
899 Write8(0xBF);
900 } else if (sbits == 32 && dbits == 64) {
901 Write8(0x63);
902 } else {
903 Crash();
904 }
905 src.WriteRest(this);
906}
907
908void XEmitter::MOVZX(int dbits, int sbits, X64Reg dest, OpArg src) {
909 ASSERT_MSG(!src.IsImm(), "MOVZX - Imm argument");
910 if (dbits == sbits) {
911 MOV(dbits, R(dest), src);
912 return;
913 }
914 src.operandReg = (u8)dest;
915 if (dbits == 16)
916 Write8(0x66);
917 // the 32bit result is automatically zero extended to 64bit
918 src.WriteRex(this, dbits == 64 ? 32 : dbits, sbits);
919 if (sbits == 8) {
920 Write8(0x0F);
921 Write8(0xB6);
922 } else if (sbits == 16) {
923 Write8(0x0F);
924 Write8(0xB7);
925 } else if (sbits == 32 && dbits == 64) {
926 Write8(0x8B);
927 } else {
928 ASSERT_MSG(0, "MOVZX - Invalid size");
929 }
930 src.WriteRest(this);
931}
932
933void XEmitter::MOVBE(int bits, const OpArg& dest, const OpArg& src) {
934 ASSERT_MSG(Common::GetCPUCaps().movbe,
935 "Generating MOVBE on a system that does not support it.");
936 if (bits == 8) {
937 MOV(bits, dest, src);
938 return;
939 }
940
941 if (bits == 16)
942 Write8(0x66);
943
944 if (dest.IsSimpleReg()) {
945 ASSERT_MSG(!src.IsSimpleReg() && !src.IsImm(), "MOVBE: Loading from !mem");
946 src.WriteRex(this, bits, bits, dest.GetSimpleReg());
947 Write8(0x0F);
948 Write8(0x38);
949 Write8(0xF0);
950 src.WriteRest(this, 0, dest.GetSimpleReg());
951 } else if (src.IsSimpleReg()) {
952 ASSERT_MSG(!dest.IsSimpleReg() && !dest.IsImm(), "MOVBE: Storing to !mem");
953 dest.WriteRex(this, bits, bits, src.GetSimpleReg());
954 Write8(0x0F);
955 Write8(0x38);
956 Write8(0xF1);
957 dest.WriteRest(this, 0, src.GetSimpleReg());
958 } else {
959 ASSERT_MSG(0, "MOVBE: Not loading or storing to mem");
960 }
961}
962
963void XEmitter::LEA(int bits, X64Reg dest, OpArg src) {
964 ASSERT_MSG(!src.IsImm(), "LEA - Imm argument");
965 src.operandReg = (u8)dest;
966 if (bits == 16)
967 Write8(0x66); // TODO: performance warning
968 src.WriteRex(this, bits, bits);
969 Write8(0x8D);
970 src.WriteRest(this, 0, INVALID_REG, bits == 64);
971}
972
973// shift can be either imm8 or cl
974void XEmitter::WriteShift(int bits, OpArg dest, const OpArg& shift, int ext) {
975 CheckFlags();
976 bool writeImm = false;
977 if (dest.IsImm()) {
978 ASSERT_MSG(0, "WriteShift - can't shift imms");
979 }
980 if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) ||
981 (shift.IsImm() && shift.GetImmBits() != 8)) {
982 ASSERT_MSG(0, "WriteShift - illegal argument");
983 }
984 dest.operandReg = ext;
985 if (bits == 16)
986 Write8(0x66);
987 dest.WriteRex(this, bits, bits, 0);
988 if (shift.GetImmBits() == 8) {
989 // ok an imm
990 u8 imm = (u8)shift.offset;
991 if (imm == 1) {
992 Write8(bits == 8 ? 0xD0 : 0xD1);
993 } else {
994 writeImm = true;
995 Write8(bits == 8 ? 0xC0 : 0xC1);
996 }
997 } else {
998 Write8(bits == 8 ? 0xD2 : 0xD3);
999 }
1000 dest.WriteRest(this, writeImm ? 1 : 0);
1001 if (writeImm)
1002 Write8((u8)shift.offset);
1003}
1004
1005// large rotates and shift are slower on intel than amd
1006// intel likes to rotate by 1, and the op is smaller too
1007void XEmitter::ROL(int bits, const OpArg& dest, const OpArg& shift) {
1008 WriteShift(bits, dest, shift, 0);
1009}
1010void XEmitter::ROR(int bits, const OpArg& dest, const OpArg& shift) {
1011 WriteShift(bits, dest, shift, 1);
1012}
1013void XEmitter::RCL(int bits, const OpArg& dest, const OpArg& shift) {
1014 WriteShift(bits, dest, shift, 2);
1015}
1016void XEmitter::RCR(int bits, const OpArg& dest, const OpArg& shift) {
1017 WriteShift(bits, dest, shift, 3);
1018}
1019void XEmitter::SHL(int bits, const OpArg& dest, const OpArg& shift) {
1020 WriteShift(bits, dest, shift, 4);
1021}
1022void XEmitter::SHR(int bits, const OpArg& dest, const OpArg& shift) {
1023 WriteShift(bits, dest, shift, 5);
1024}
1025void XEmitter::SAR(int bits, const OpArg& dest, const OpArg& shift) {
1026 WriteShift(bits, dest, shift, 7);
1027}
1028
1029// index can be either imm8 or register, don't use memory destination because it's slow
1030void XEmitter::WriteBitTest(int bits, const OpArg& dest, const OpArg& index, int ext) {
1031 CheckFlags();
1032 if (dest.IsImm()) {
1033 ASSERT_MSG(0, "WriteBitTest - can't test imms");
1034 }
1035 if ((index.IsImm() && index.GetImmBits() != 8)) {
1036 ASSERT_MSG(0, "WriteBitTest - illegal argument");
1037 }
1038 if (bits == 16)
1039 Write8(0x66);
1040 if (index.IsImm()) {
1041 dest.WriteRex(this, bits, bits);
1042 Write8(0x0F);
1043 Write8(0xBA);
1044 dest.WriteRest(this, 1, (X64Reg)ext);
1045 Write8((u8)index.offset);
1046 } else {
1047 X64Reg operand = index.GetSimpleReg();
1048 dest.WriteRex(this, bits, bits, operand);
1049 Write8(0x0F);
1050 Write8(0x83 + 8 * ext);
1051 dest.WriteRest(this, 1, operand);
1052 }
1053}
1054
1055void XEmitter::BT(int bits, const OpArg& dest, const OpArg& index) {
1056 WriteBitTest(bits, dest, index, 4);
1057}
1058void XEmitter::BTS(int bits, const OpArg& dest, const OpArg& index) {
1059 WriteBitTest(bits, dest, index, 5);
1060}
1061void XEmitter::BTR(int bits, const OpArg& dest, const OpArg& index) {
1062 WriteBitTest(bits, dest, index, 6);
1063}
1064void XEmitter::BTC(int bits, const OpArg& dest, const OpArg& index) {
1065 WriteBitTest(bits, dest, index, 7);
1066}
1067
1068// shift can be either imm8 or cl
1069void XEmitter::SHRD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift) {
1070 CheckFlags();
1071 if (dest.IsImm()) {
1072 ASSERT_MSG(0, "SHRD - can't use imms as destination");
1073 }
1074 if (!src.IsSimpleReg()) {
1075 ASSERT_MSG(0, "SHRD - must use simple register as source");
1076 }
1077 if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) ||
1078 (shift.IsImm() && shift.GetImmBits() != 8)) {
1079 ASSERT_MSG(0, "SHRD - illegal shift");
1080 }
1081 if (bits == 16)
1082 Write8(0x66);
1083 X64Reg operand = src.GetSimpleReg();
1084 dest.WriteRex(this, bits, bits, operand);
1085 if (shift.GetImmBits() == 8) {
1086 Write8(0x0F);
1087 Write8(0xAC);
1088 dest.WriteRest(this, 1, operand);
1089 Write8((u8)shift.offset);
1090 } else {
1091 Write8(0x0F);
1092 Write8(0xAD);
1093 dest.WriteRest(this, 0, operand);
1094 }
1095}
1096
1097void XEmitter::SHLD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift) {
1098 CheckFlags();
1099 if (dest.IsImm()) {
1100 ASSERT_MSG(0, "SHLD - can't use imms as destination");
1101 }
1102 if (!src.IsSimpleReg()) {
1103 ASSERT_MSG(0, "SHLD - must use simple register as source");
1104 }
1105 if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) ||
1106 (shift.IsImm() && shift.GetImmBits() != 8)) {
1107 ASSERT_MSG(0, "SHLD - illegal shift");
1108 }
1109 if (bits == 16)
1110 Write8(0x66);
1111 X64Reg operand = src.GetSimpleReg();
1112 dest.WriteRex(this, bits, bits, operand);
1113 if (shift.GetImmBits() == 8) {
1114 Write8(0x0F);
1115 Write8(0xA4);
1116 dest.WriteRest(this, 1, operand);
1117 Write8((u8)shift.offset);
1118 } else {
1119 Write8(0x0F);
1120 Write8(0xA5);
1121 dest.WriteRest(this, 0, operand);
1122 }
1123}
1124
1125void OpArg::WriteSingleByteOp(XEmitter* emit, u8 op, X64Reg _operandReg, int bits) {
1126 if (bits == 16)
1127 emit->Write8(0x66);
1128
1129 this->operandReg = (u8)_operandReg;
1130 WriteRex(emit, bits, bits);
1131 emit->Write8(op);
1132 WriteRest(emit);
1133}
1134
1135// operand can either be immediate or register
1136void OpArg::WriteNormalOp(XEmitter* emit, bool toRM, NormalOp op, const OpArg& operand,
1137 int bits) const {
1138 X64Reg _operandReg;
1139 if (IsImm()) {
1140 ASSERT_MSG(0, "WriteNormalOp - Imm argument, wrong order");
1141 }
1142
1143 if (bits == 16)
1144 emit->Write8(0x66);
1145
1146 int immToWrite = 0;
1147
1148 if (operand.IsImm()) {
1149 WriteRex(emit, bits, bits);
1150
1151 if (!toRM) {
1152 ASSERT_MSG(0, "WriteNormalOp - Writing to Imm (!toRM)");
1153 }
1154
1155 if (operand.scale == SCALE_IMM8 && bits == 8) {
1156 // op al, imm8
1157 if (!scale && offsetOrBaseReg == AL && normalops[op].eaximm8 != 0xCC) {
1158 emit->Write8(normalops[op].eaximm8);
1159 emit->Write8((u8)operand.offset);
1160 return;
1161 }
1162 // mov reg, imm8
1163 if (!scale && op == nrmMOV) {
1164 emit->Write8(0xB0 + (offsetOrBaseReg & 7));
1165 emit->Write8((u8)operand.offset);
1166 return;
1167 }
1168 // op r/m8, imm8
1169 emit->Write8(normalops[op].imm8);
1170 immToWrite = 8;
1171 } else if ((operand.scale == SCALE_IMM16 && bits == 16) ||
1172 (operand.scale == SCALE_IMM32 && bits == 32) ||
1173 (operand.scale == SCALE_IMM32 && bits == 64)) {
1174 // Try to save immediate size if we can, but first check to see
1175 // if the instruction supports simm8.
1176 // op r/m, imm8
1177 if (normalops[op].simm8 != 0xCC &&
1178 ((operand.scale == SCALE_IMM16 && (s16)operand.offset == (s8)operand.offset) ||
1179 (operand.scale == SCALE_IMM32 && (s32)operand.offset == (s8)operand.offset))) {
1180 emit->Write8(normalops[op].simm8);
1181 immToWrite = 8;
1182 } else {
1183 // mov reg, imm
1184 if (!scale && op == nrmMOV && bits != 64) {
1185 emit->Write8(0xB8 + (offsetOrBaseReg & 7));
1186 if (bits == 16)
1187 emit->Write16((u16)operand.offset);
1188 else
1189 emit->Write32((u32)operand.offset);
1190 return;
1191 }
1192 // op eax, imm
1193 if (!scale && offsetOrBaseReg == EAX && normalops[op].eaximm32 != 0xCC) {
1194 emit->Write8(normalops[op].eaximm32);
1195 if (bits == 16)
1196 emit->Write16((u16)operand.offset);
1197 else
1198 emit->Write32((u32)operand.offset);
1199 return;
1200 }
1201 // op r/m, imm
1202 emit->Write8(normalops[op].imm32);
1203 immToWrite = bits == 16 ? 16 : 32;
1204 }
1205 } else if ((operand.scale == SCALE_IMM8 && bits == 16) ||
1206 (operand.scale == SCALE_IMM8 && bits == 32) ||
1207 (operand.scale == SCALE_IMM8 && bits == 64)) {
1208 // op r/m, imm8
1209 emit->Write8(normalops[op].simm8);
1210 immToWrite = 8;
1211 } else if (operand.scale == SCALE_IMM64 && bits == 64) {
1212 if (scale) {
1213 ASSERT_MSG(0, "WriteNormalOp - MOV with 64-bit imm requres register destination");
1214 }
1215 // mov reg64, imm64
1216 else if (op == nrmMOV) {
1217 emit->Write8(0xB8 + (offsetOrBaseReg & 7));
1218 emit->Write64((u64)operand.offset);
1219 return;
1220 }
1221 ASSERT_MSG(0, "WriteNormalOp - Only MOV can take 64-bit imm");
1222 } else {
1223 ASSERT_MSG(0, "WriteNormalOp - Unhandled case");
1224 }
1225 _operandReg = (X64Reg)normalops[op].ext; // pass extension in REG of ModRM
1226 } else {
1227 _operandReg = (X64Reg)operand.offsetOrBaseReg;
1228 WriteRex(emit, bits, bits, _operandReg);
1229 // op r/m, reg
1230 if (toRM) {
1231 emit->Write8(bits == 8 ? normalops[op].toRm8 : normalops[op].toRm32);
1232 }
1233 // op reg, r/m
1234 else {
1235 emit->Write8(bits == 8 ? normalops[op].fromRm8 : normalops[op].fromRm32);
1236 }
1237 }
1238 WriteRest(emit, immToWrite >> 3, _operandReg);
1239 switch (immToWrite) {
1240 case 0:
1241 break;
1242 case 8:
1243 emit->Write8((u8)operand.offset);
1244 break;
1245 case 16:
1246 emit->Write16((u16)operand.offset);
1247 break;
1248 case 32:
1249 emit->Write32((u32)operand.offset);
1250 break;
1251 default:
1252 ASSERT_MSG(0, "WriteNormalOp - Unhandled case");
1253 }
1254}
1255
1256void XEmitter::WriteNormalOp(XEmitter* emit, int bits, NormalOp op, const OpArg& a1,
1257 const OpArg& a2) {
1258 if (a1.IsImm()) {
1259 // Booh! Can't write to an imm
1260 ASSERT_MSG(0, "WriteNormalOp - a1 cannot be imm");
1261 return;
1262 }
1263 if (a2.IsImm()) {
1264 a1.WriteNormalOp(emit, true, op, a2, bits);
1265 } else {
1266 if (a1.IsSimpleReg()) {
1267 a2.WriteNormalOp(emit, false, op, a1, bits);
1268 } else {
1269 ASSERT_MSG(a2.IsSimpleReg() || a2.IsImm(),
1270 "WriteNormalOp - a1 and a2 cannot both be memory");
1271 a1.WriteNormalOp(emit, true, op, a2, bits);
1272 }
1273 }
1274}
1275
1276void XEmitter::ADD(int bits, const OpArg& a1, const OpArg& a2) {
1277 CheckFlags();
1278 WriteNormalOp(this, bits, nrmADD, a1, a2);
1279}
1280void XEmitter::ADC(int bits, const OpArg& a1, const OpArg& a2) {
1281 CheckFlags();
1282 WriteNormalOp(this, bits, nrmADC, a1, a2);
1283}
1284void XEmitter::SUB(int bits, const OpArg& a1, const OpArg& a2) {
1285 CheckFlags();
1286 WriteNormalOp(this, bits, nrmSUB, a1, a2);
1287}
1288void XEmitter::SBB(int bits, const OpArg& a1, const OpArg& a2) {
1289 CheckFlags();
1290 WriteNormalOp(this, bits, nrmSBB, a1, a2);
1291}
1292void XEmitter::AND(int bits, const OpArg& a1, const OpArg& a2) {
1293 CheckFlags();
1294 WriteNormalOp(this, bits, nrmAND, a1, a2);
1295}
1296void XEmitter::OR(int bits, const OpArg& a1, const OpArg& a2) {
1297 CheckFlags();
1298 WriteNormalOp(this, bits, nrmOR, a1, a2);
1299}
1300void XEmitter::XOR(int bits, const OpArg& a1, const OpArg& a2) {
1301 CheckFlags();
1302 WriteNormalOp(this, bits, nrmXOR, a1, a2);
1303}
1304void XEmitter::MOV(int bits, const OpArg& a1, const OpArg& a2) {
1305 if (a1.IsSimpleReg() && a2.IsSimpleReg() && a1.GetSimpleReg() == a2.GetSimpleReg())
1306 LOG_ERROR(Common, "Redundant MOV @ %p - bug in JIT?", code);
1307 WriteNormalOp(this, bits, nrmMOV, a1, a2);
1308}
1309void XEmitter::TEST(int bits, const OpArg& a1, const OpArg& a2) {
1310 CheckFlags();
1311 WriteNormalOp(this, bits, nrmTEST, a1, a2);
1312}
1313void XEmitter::CMP(int bits, const OpArg& a1, const OpArg& a2) {
1314 CheckFlags();
1315 WriteNormalOp(this, bits, nrmCMP, a1, a2);
1316}
1317void XEmitter::XCHG(int bits, const OpArg& a1, const OpArg& a2) {
1318 WriteNormalOp(this, bits, nrmXCHG, a1, a2);
1319}
1320
1321void XEmitter::IMUL(int bits, X64Reg regOp, const OpArg& a1, const OpArg& a2) {
1322 CheckFlags();
1323 if (bits == 8) {
1324 ASSERT_MSG(0, "IMUL - illegal bit size!");
1325 return;
1326 }
1327
1328 if (a1.IsImm()) {
1329 ASSERT_MSG(0, "IMUL - second arg cannot be imm!");
1330 return;
1331 }
1332
1333 if (!a2.IsImm()) {
1334 ASSERT_MSG(0, "IMUL - third arg must be imm!");
1335 return;
1336 }
1337
1338 if (bits == 16)
1339 Write8(0x66);
1340 a1.WriteRex(this, bits, bits, regOp);
1341
1342 if (a2.GetImmBits() == 8 || (a2.GetImmBits() == 16 && (s8)a2.offset == (s16)a2.offset) ||
1343 (a2.GetImmBits() == 32 && (s8)a2.offset == (s32)a2.offset)) {
1344 Write8(0x6B);
1345 a1.WriteRest(this, 1, regOp);
1346 Write8((u8)a2.offset);
1347 } else {
1348 Write8(0x69);
1349 if (a2.GetImmBits() == 16 && bits == 16) {
1350 a1.WriteRest(this, 2, regOp);
1351 Write16((u16)a2.offset);
1352 } else if (a2.GetImmBits() == 32 && (bits == 32 || bits == 64)) {
1353 a1.WriteRest(this, 4, regOp);
1354 Write32((u32)a2.offset);
1355 } else {
1356 ASSERT_MSG(0, "IMUL - unhandled case!");
1357 }
1358 }
1359}
1360
1361void XEmitter::IMUL(int bits, X64Reg regOp, const OpArg& a) {
1362 CheckFlags();
1363 if (bits == 8) {
1364 ASSERT_MSG(0, "IMUL - illegal bit size!");
1365 return;
1366 }
1367
1368 if (a.IsImm()) {
1369 IMUL(bits, regOp, R(regOp), a);
1370 return;
1371 }
1372
1373 if (bits == 16)
1374 Write8(0x66);
1375 a.WriteRex(this, bits, bits, regOp);
1376 Write8(0x0F);
1377 Write8(0xAF);
1378 a.WriteRest(this, 0, regOp);
1379}
1380
1381void XEmitter::WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes) {
1382 if (opPrefix)
1383 Write8(opPrefix);
1384 arg.operandReg = regOp;
1385 arg.WriteRex(this, 0, 0);
1386 Write8(0x0F);
1387 if (op > 0xFF)
1388 Write8((op >> 8) & 0xFF);
1389 Write8(op & 0xFF);
1390 arg.WriteRest(this, extrabytes);
1391}
1392
1393void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes) {
1394 WriteAVXOp(opPrefix, op, regOp, INVALID_REG, arg, extrabytes);
1395}
1396
1397static int GetVEXmmmmm(u16 op) {
1398 // Currently, only 0x38 and 0x3A are used as secondary escape byte.
1399 if ((op >> 8) == 0x3A)
1400 return 3;
1401 if ((op >> 8) == 0x38)
1402 return 2;
1403
1404 return 1;
1405}
1406
1407static int GetVEXpp(u8 opPrefix) {
1408 if (opPrefix == 0x66)
1409 return 1;
1410 if (opPrefix == 0xF3)
1411 return 2;
1412 if (opPrefix == 0xF2)
1413 return 3;
1414
1415 return 0;
1416}
1417
1418void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg,
1419 int extrabytes) {
1420 if (!Common::GetCPUCaps().avx)
1421 ASSERT_MSG(0, "Trying to use AVX on a system that doesn't support it. Bad programmer.");
1422 int mmmmm = GetVEXmmmmm(op);
1423 int pp = GetVEXpp(opPrefix);
1424 // FIXME: we currently don't support 256-bit instructions, and "size" is not the vector size
1425 // here
1426 arg.WriteVex(this, regOp1, regOp2, 0, pp, mmmmm);
1427 Write8(op & 0xFF);
1428 arg.WriteRest(this, extrabytes, regOp1);
1429}
1430
1431// Like the above, but more general; covers GPR-based VEX operations, like BMI1/2
1432void XEmitter::WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2,
1433 const OpArg& arg, int extrabytes) {
1434 if (size != 32 && size != 64)
1435 ASSERT_MSG(0, "VEX GPR instructions only support 32-bit and 64-bit modes!");
1436 int mmmmm = GetVEXmmmmm(op);
1437 int pp = GetVEXpp(opPrefix);
1438 arg.WriteVex(this, regOp1, regOp2, 0, pp, mmmmm, size == 64);
1439 Write8(op & 0xFF);
1440 arg.WriteRest(this, extrabytes, regOp1);
1441}
1442
1443void XEmitter::WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2,
1444 const OpArg& arg, int extrabytes) {
1445 CheckFlags();
1446 if (!Common::GetCPUCaps().bmi1)
1447 ASSERT_MSG(0, "Trying to use BMI1 on a system that doesn't support it. Bad programmer.");
1448 WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes);
1449}
1450
1451void XEmitter::WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2,
1452 const OpArg& arg, int extrabytes) {
1453 CheckFlags();
1454 if (!Common::GetCPUCaps().bmi2)
1455 ASSERT_MSG(0, "Trying to use BMI2 on a system that doesn't support it. Bad programmer.");
1456 WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes);
1457}
1458
1459void XEmitter::MOVD_xmm(X64Reg dest, const OpArg& arg) {
1460 WriteSSEOp(0x66, 0x6E, dest, arg, 0);
1461}
1462void XEmitter::MOVD_xmm(const OpArg& arg, X64Reg src) {
1463 WriteSSEOp(0x66, 0x7E, src, arg, 0);
1464}
1465
1466void XEmitter::MOVQ_xmm(X64Reg dest, OpArg arg) {
1467#ifdef ARCHITECTURE_x86_64
1468 // Alternate encoding
1469 // This does not display correctly in MSVC's debugger, it thinks it's a MOVD
1470 arg.operandReg = dest;
1471 Write8(0x66);
1472 arg.WriteRex(this, 64, 0);
1473 Write8(0x0f);
1474 Write8(0x6E);
1475 arg.WriteRest(this, 0);
1476#else
1477 arg.operandReg = dest;
1478 Write8(0xF3);
1479 Write8(0x0f);
1480 Write8(0x7E);
1481 arg.WriteRest(this, 0);
1482#endif
1483}
1484
1485void XEmitter::MOVQ_xmm(OpArg arg, X64Reg src) {
1486 if (src > 7 || arg.IsSimpleReg()) {
1487 // Alternate encoding
1488 // This does not display correctly in MSVC's debugger, it thinks it's a MOVD
1489 arg.operandReg = src;
1490 Write8(0x66);
1491 arg.WriteRex(this, 64, 0);
1492 Write8(0x0f);
1493 Write8(0x7E);
1494 arg.WriteRest(this, 0);
1495 } else {
1496 arg.operandReg = src;
1497 arg.WriteRex(this, 0, 0);
1498 Write8(0x66);
1499 Write8(0x0f);
1500 Write8(0xD6);
1501 arg.WriteRest(this, 0);
1502 }
1503}
1504
1505void XEmitter::WriteMXCSR(OpArg arg, int ext) {
1506 if (arg.IsImm() || arg.IsSimpleReg())
1507 ASSERT_MSG(0, "MXCSR - invalid operand");
1508
1509 arg.operandReg = ext;
1510 arg.WriteRex(this, 0, 0);
1511 Write8(0x0F);
1512 Write8(0xAE);
1513 arg.WriteRest(this);
1514}
1515
1516void XEmitter::STMXCSR(const OpArg& memloc) {
1517 WriteMXCSR(memloc, 3);
1518}
1519void XEmitter::LDMXCSR(const OpArg& memloc) {
1520 WriteMXCSR(memloc, 2);
1521}
1522
1523void XEmitter::MOVNTDQ(const OpArg& arg, X64Reg regOp) {
1524 WriteSSEOp(0x66, sseMOVNTDQ, regOp, arg);
1525}
1526void XEmitter::MOVNTPS(const OpArg& arg, X64Reg regOp) {
1527 WriteSSEOp(0x00, sseMOVNTP, regOp, arg);
1528}
1529void XEmitter::MOVNTPD(const OpArg& arg, X64Reg regOp) {
1530 WriteSSEOp(0x66, sseMOVNTP, regOp, arg);
1531}
1532
1533void XEmitter::ADDSS(X64Reg regOp, const OpArg& arg) {
1534 WriteSSEOp(0xF3, sseADD, regOp, arg);
1535}
1536void XEmitter::ADDSD(X64Reg regOp, const OpArg& arg) {
1537 WriteSSEOp(0xF2, sseADD, regOp, arg);
1538}
1539void XEmitter::SUBSS(X64Reg regOp, const OpArg& arg) {
1540 WriteSSEOp(0xF3, sseSUB, regOp, arg);
1541}
1542void XEmitter::SUBSD(X64Reg regOp, const OpArg& arg) {
1543 WriteSSEOp(0xF2, sseSUB, regOp, arg);
1544}
1545void XEmitter::CMPSS(X64Reg regOp, const OpArg& arg, u8 compare) {
1546 WriteSSEOp(0xF3, sseCMP, regOp, arg, 1);
1547 Write8(compare);
1548}
1549void XEmitter::CMPSD(X64Reg regOp, const OpArg& arg, u8 compare) {
1550 WriteSSEOp(0xF2, sseCMP, regOp, arg, 1);
1551 Write8(compare);
1552}
1553void XEmitter::MULSS(X64Reg regOp, const OpArg& arg) {
1554 WriteSSEOp(0xF3, sseMUL, regOp, arg);
1555}
1556void XEmitter::MULSD(X64Reg regOp, const OpArg& arg) {
1557 WriteSSEOp(0xF2, sseMUL, regOp, arg);
1558}
1559void XEmitter::DIVSS(X64Reg regOp, const OpArg& arg) {
1560 WriteSSEOp(0xF3, sseDIV, regOp, arg);
1561}
1562void XEmitter::DIVSD(X64Reg regOp, const OpArg& arg) {
1563 WriteSSEOp(0xF2, sseDIV, regOp, arg);
1564}
1565void XEmitter::MINSS(X64Reg regOp, const OpArg& arg) {
1566 WriteSSEOp(0xF3, sseMIN, regOp, arg);
1567}
1568void XEmitter::MINSD(X64Reg regOp, const OpArg& arg) {
1569 WriteSSEOp(0xF2, sseMIN, regOp, arg);
1570}
1571void XEmitter::MAXSS(X64Reg regOp, const OpArg& arg) {
1572 WriteSSEOp(0xF3, sseMAX, regOp, arg);
1573}
1574void XEmitter::MAXSD(X64Reg regOp, const OpArg& arg) {
1575 WriteSSEOp(0xF2, sseMAX, regOp, arg);
1576}
1577void XEmitter::SQRTSS(X64Reg regOp, const OpArg& arg) {
1578 WriteSSEOp(0xF3, sseSQRT, regOp, arg);
1579}
1580void XEmitter::SQRTSD(X64Reg regOp, const OpArg& arg) {
1581 WriteSSEOp(0xF2, sseSQRT, regOp, arg);
1582}
1583void XEmitter::RCPSS(X64Reg regOp, const OpArg& arg) {
1584 WriteSSEOp(0xF3, sseRCP, regOp, arg);
1585}
1586void XEmitter::RSQRTSS(X64Reg regOp, const OpArg& arg) {
1587 WriteSSEOp(0xF3, sseRSQRT, regOp, arg);
1588}
1589
1590void XEmitter::ADDPS(X64Reg regOp, const OpArg& arg) {
1591 WriteSSEOp(0x00, sseADD, regOp, arg);
1592}
1593void XEmitter::ADDPD(X64Reg regOp, const OpArg& arg) {
1594 WriteSSEOp(0x66, sseADD, regOp, arg);
1595}
1596void XEmitter::SUBPS(X64Reg regOp, const OpArg& arg) {
1597 WriteSSEOp(0x00, sseSUB, regOp, arg);
1598}
1599void XEmitter::SUBPD(X64Reg regOp, const OpArg& arg) {
1600 WriteSSEOp(0x66, sseSUB, regOp, arg);
1601}
1602void XEmitter::CMPPS(X64Reg regOp, const OpArg& arg, u8 compare) {
1603 WriteSSEOp(0x00, sseCMP, regOp, arg, 1);
1604 Write8(compare);
1605}
1606void XEmitter::CMPPD(X64Reg regOp, const OpArg& arg, u8 compare) {
1607 WriteSSEOp(0x66, sseCMP, regOp, arg, 1);
1608 Write8(compare);
1609}
1610void XEmitter::ANDPS(X64Reg regOp, const OpArg& arg) {
1611 WriteSSEOp(0x00, sseAND, regOp, arg);
1612}
1613void XEmitter::ANDPD(X64Reg regOp, const OpArg& arg) {
1614 WriteSSEOp(0x66, sseAND, regOp, arg);
1615}
1616void XEmitter::ANDNPS(X64Reg regOp, const OpArg& arg) {
1617 WriteSSEOp(0x00, sseANDN, regOp, arg);
1618}
1619void XEmitter::ANDNPD(X64Reg regOp, const OpArg& arg) {
1620 WriteSSEOp(0x66, sseANDN, regOp, arg);
1621}
1622void XEmitter::ORPS(X64Reg regOp, const OpArg& arg) {
1623 WriteSSEOp(0x00, sseOR, regOp, arg);
1624}
1625void XEmitter::ORPD(X64Reg regOp, const OpArg& arg) {
1626 WriteSSEOp(0x66, sseOR, regOp, arg);
1627}
1628void XEmitter::XORPS(X64Reg regOp, const OpArg& arg) {
1629 WriteSSEOp(0x00, sseXOR, regOp, arg);
1630}
1631void XEmitter::XORPD(X64Reg regOp, const OpArg& arg) {
1632 WriteSSEOp(0x66, sseXOR, regOp, arg);
1633}
1634void XEmitter::MULPS(X64Reg regOp, const OpArg& arg) {
1635 WriteSSEOp(0x00, sseMUL, regOp, arg);
1636}
1637void XEmitter::MULPD(X64Reg regOp, const OpArg& arg) {
1638 WriteSSEOp(0x66, sseMUL, regOp, arg);
1639}
1640void XEmitter::DIVPS(X64Reg regOp, const OpArg& arg) {
1641 WriteSSEOp(0x00, sseDIV, regOp, arg);
1642}
1643void XEmitter::DIVPD(X64Reg regOp, const OpArg& arg) {
1644 WriteSSEOp(0x66, sseDIV, regOp, arg);
1645}
1646void XEmitter::MINPS(X64Reg regOp, const OpArg& arg) {
1647 WriteSSEOp(0x00, sseMIN, regOp, arg);
1648}
1649void XEmitter::MINPD(X64Reg regOp, const OpArg& arg) {
1650 WriteSSEOp(0x66, sseMIN, regOp, arg);
1651}
1652void XEmitter::MAXPS(X64Reg regOp, const OpArg& arg) {
1653 WriteSSEOp(0x00, sseMAX, regOp, arg);
1654}
1655void XEmitter::MAXPD(X64Reg regOp, const OpArg& arg) {
1656 WriteSSEOp(0x66, sseMAX, regOp, arg);
1657}
1658void XEmitter::SQRTPS(X64Reg regOp, const OpArg& arg) {
1659 WriteSSEOp(0x00, sseSQRT, regOp, arg);
1660}
1661void XEmitter::SQRTPD(X64Reg regOp, const OpArg& arg) {
1662 WriteSSEOp(0x66, sseSQRT, regOp, arg);
1663}
1664void XEmitter::RCPPS(X64Reg regOp, const OpArg& arg) {
1665 WriteSSEOp(0x00, sseRCP, regOp, arg);
1666}
1667void XEmitter::RSQRTPS(X64Reg regOp, const OpArg& arg) {
1668 WriteSSEOp(0x00, sseRSQRT, regOp, arg);
1669}
1670void XEmitter::SHUFPS(X64Reg regOp, const OpArg& arg, u8 shuffle) {
1671 WriteSSEOp(0x00, sseSHUF, regOp, arg, 1);
1672 Write8(shuffle);
1673}
1674void XEmitter::SHUFPD(X64Reg regOp, const OpArg& arg, u8 shuffle) {
1675 WriteSSEOp(0x66, sseSHUF, regOp, arg, 1);
1676 Write8(shuffle);
1677}
1678
1679void XEmitter::HADDPS(X64Reg regOp, const OpArg& arg) {
1680 WriteSSEOp(0xF2, sseHADD, regOp, arg);
1681}
1682
1683void XEmitter::COMISS(X64Reg regOp, const OpArg& arg) {
1684 WriteSSEOp(0x00, sseCOMIS, regOp, arg);
1685} // weird that these should be packed
1686void XEmitter::COMISD(X64Reg regOp, const OpArg& arg) {
1687 WriteSSEOp(0x66, sseCOMIS, regOp, arg);
1688} // ordered
1689void XEmitter::UCOMISS(X64Reg regOp, const OpArg& arg) {
1690 WriteSSEOp(0x00, sseUCOMIS, regOp, arg);
1691} // unordered
1692void XEmitter::UCOMISD(X64Reg regOp, const OpArg& arg) {
1693 WriteSSEOp(0x66, sseUCOMIS, regOp, arg);
1694}
1695
1696void XEmitter::MOVAPS(X64Reg regOp, const OpArg& arg) {
1697 WriteSSEOp(0x00, sseMOVAPfromRM, regOp, arg);
1698}
1699void XEmitter::MOVAPD(X64Reg regOp, const OpArg& arg) {
1700 WriteSSEOp(0x66, sseMOVAPfromRM, regOp, arg);
1701}
1702void XEmitter::MOVAPS(const OpArg& arg, X64Reg regOp) {
1703 WriteSSEOp(0x00, sseMOVAPtoRM, regOp, arg);
1704}
1705void XEmitter::MOVAPD(const OpArg& arg, X64Reg regOp) {
1706 WriteSSEOp(0x66, sseMOVAPtoRM, regOp, arg);
1707}
1708
1709void XEmitter::MOVUPS(X64Reg regOp, const OpArg& arg) {
1710 WriteSSEOp(0x00, sseMOVUPfromRM, regOp, arg);
1711}
1712void XEmitter::MOVUPD(X64Reg regOp, const OpArg& arg) {
1713 WriteSSEOp(0x66, sseMOVUPfromRM, regOp, arg);
1714}
1715void XEmitter::MOVUPS(const OpArg& arg, X64Reg regOp) {
1716 WriteSSEOp(0x00, sseMOVUPtoRM, regOp, arg);
1717}
1718void XEmitter::MOVUPD(const OpArg& arg, X64Reg regOp) {
1719 WriteSSEOp(0x66, sseMOVUPtoRM, regOp, arg);
1720}
1721
1722void XEmitter::MOVDQA(X64Reg regOp, const OpArg& arg) {
1723 WriteSSEOp(0x66, sseMOVDQfromRM, regOp, arg);
1724}
1725void XEmitter::MOVDQA(const OpArg& arg, X64Reg regOp) {
1726 WriteSSEOp(0x66, sseMOVDQtoRM, regOp, arg);
1727}
1728void XEmitter::MOVDQU(X64Reg regOp, const OpArg& arg) {
1729 WriteSSEOp(0xF3, sseMOVDQfromRM, regOp, arg);
1730}
1731void XEmitter::MOVDQU(const OpArg& arg, X64Reg regOp) {
1732 WriteSSEOp(0xF3, sseMOVDQtoRM, regOp, arg);
1733}
1734
1735void XEmitter::MOVSS(X64Reg regOp, const OpArg& arg) {
1736 WriteSSEOp(0xF3, sseMOVUPfromRM, regOp, arg);
1737}
1738void XEmitter::MOVSD(X64Reg regOp, const OpArg& arg) {
1739 WriteSSEOp(0xF2, sseMOVUPfromRM, regOp, arg);
1740}
1741void XEmitter::MOVSS(const OpArg& arg, X64Reg regOp) {
1742 WriteSSEOp(0xF3, sseMOVUPtoRM, regOp, arg);
1743}
1744void XEmitter::MOVSD(const OpArg& arg, X64Reg regOp) {
1745 WriteSSEOp(0xF2, sseMOVUPtoRM, regOp, arg);
1746}
1747
1748void XEmitter::MOVLPS(X64Reg regOp, const OpArg& arg) {
1749 WriteSSEOp(0x00, sseMOVLPfromRM, regOp, arg);
1750}
1751void XEmitter::MOVLPD(X64Reg regOp, const OpArg& arg) {
1752 WriteSSEOp(0x66, sseMOVLPfromRM, regOp, arg);
1753}
1754void XEmitter::MOVLPS(const OpArg& arg, X64Reg regOp) {
1755 WriteSSEOp(0x00, sseMOVLPtoRM, regOp, arg);
1756}
1757void XEmitter::MOVLPD(const OpArg& arg, X64Reg regOp) {
1758 WriteSSEOp(0x66, sseMOVLPtoRM, regOp, arg);
1759}
1760
1761void XEmitter::MOVHPS(X64Reg regOp, const OpArg& arg) {
1762 WriteSSEOp(0x00, sseMOVHPfromRM, regOp, arg);
1763}
1764void XEmitter::MOVHPD(X64Reg regOp, const OpArg& arg) {
1765 WriteSSEOp(0x66, sseMOVHPfromRM, regOp, arg);
1766}
1767void XEmitter::MOVHPS(const OpArg& arg, X64Reg regOp) {
1768 WriteSSEOp(0x00, sseMOVHPtoRM, regOp, arg);
1769}
1770void XEmitter::MOVHPD(const OpArg& arg, X64Reg regOp) {
1771 WriteSSEOp(0x66, sseMOVHPtoRM, regOp, arg);
1772}
1773
1774void XEmitter::MOVHLPS(X64Reg regOp1, X64Reg regOp2) {
1775 WriteSSEOp(0x00, sseMOVHLPS, regOp1, R(regOp2));
1776}
1777void XEmitter::MOVLHPS(X64Reg regOp1, X64Reg regOp2) {
1778 WriteSSEOp(0x00, sseMOVLHPS, regOp1, R(regOp2));
1779}
1780
1781void XEmitter::CVTPS2PD(X64Reg regOp, const OpArg& arg) {
1782 WriteSSEOp(0x00, 0x5A, regOp, arg);
1783}
1784void XEmitter::CVTPD2PS(X64Reg regOp, const OpArg& arg) {
1785 WriteSSEOp(0x66, 0x5A, regOp, arg);
1786}
1787
1788void XEmitter::CVTSD2SS(X64Reg regOp, const OpArg& arg) {
1789 WriteSSEOp(0xF2, 0x5A, regOp, arg);
1790}
1791void XEmitter::CVTSS2SD(X64Reg regOp, const OpArg& arg) {
1792 WriteSSEOp(0xF3, 0x5A, regOp, arg);
1793}
1794void XEmitter::CVTSD2SI(X64Reg regOp, const OpArg& arg) {
1795 WriteSSEOp(0xF2, 0x2D, regOp, arg);
1796}
1797void XEmitter::CVTSS2SI(X64Reg regOp, const OpArg& arg) {
1798 WriteSSEOp(0xF3, 0x2D, regOp, arg);
1799}
1800void XEmitter::CVTSI2SD(X64Reg regOp, const OpArg& arg) {
1801 WriteSSEOp(0xF2, 0x2A, regOp, arg);
1802}
1803void XEmitter::CVTSI2SS(X64Reg regOp, const OpArg& arg) {
1804 WriteSSEOp(0xF3, 0x2A, regOp, arg);
1805}
1806
1807void XEmitter::CVTDQ2PD(X64Reg regOp, const OpArg& arg) {
1808 WriteSSEOp(0xF3, 0xE6, regOp, arg);
1809}
1810void XEmitter::CVTDQ2PS(X64Reg regOp, const OpArg& arg) {
1811 WriteSSEOp(0x00, 0x5B, regOp, arg);
1812}
1813void XEmitter::CVTPD2DQ(X64Reg regOp, const OpArg& arg) {
1814 WriteSSEOp(0xF2, 0xE6, regOp, arg);
1815}
1816void XEmitter::CVTPS2DQ(X64Reg regOp, const OpArg& arg) {
1817 WriteSSEOp(0x66, 0x5B, regOp, arg);
1818}
1819
1820void XEmitter::CVTTSD2SI(X64Reg regOp, const OpArg& arg) {
1821 WriteSSEOp(0xF2, 0x2C, regOp, arg);
1822}
1823void XEmitter::CVTTSS2SI(X64Reg regOp, const OpArg& arg) {
1824 WriteSSEOp(0xF3, 0x2C, regOp, arg);
1825}
1826void XEmitter::CVTTPS2DQ(X64Reg regOp, const OpArg& arg) {
1827 WriteSSEOp(0xF3, 0x5B, regOp, arg);
1828}
1829void XEmitter::CVTTPD2DQ(X64Reg regOp, const OpArg& arg) {
1830 WriteSSEOp(0x66, 0xE6, regOp, arg);
1831}
1832
1833void XEmitter::MASKMOVDQU(X64Reg dest, X64Reg src) {
1834 WriteSSEOp(0x66, sseMASKMOVDQU, dest, R(src));
1835}
1836
1837void XEmitter::MOVMSKPS(X64Reg dest, const OpArg& arg) {
1838 WriteSSEOp(0x00, 0x50, dest, arg);
1839}
1840void XEmitter::MOVMSKPD(X64Reg dest, const OpArg& arg) {
1841 WriteSSEOp(0x66, 0x50, dest, arg);
1842}
1843
1844void XEmitter::LDDQU(X64Reg dest, const OpArg& arg) {
1845 WriteSSEOp(0xF2, sseLDDQU, dest, arg);
1846} // For integer data only
1847
1848// THESE TWO ARE UNTESTED.
1849void XEmitter::UNPCKLPS(X64Reg dest, const OpArg& arg) {
1850 WriteSSEOp(0x00, 0x14, dest, arg);
1851}
1852void XEmitter::UNPCKHPS(X64Reg dest, const OpArg& arg) {
1853 WriteSSEOp(0x00, 0x15, dest, arg);
1854}
1855
1856void XEmitter::UNPCKLPD(X64Reg dest, const OpArg& arg) {
1857 WriteSSEOp(0x66, 0x14, dest, arg);
1858}
1859void XEmitter::UNPCKHPD(X64Reg dest, const OpArg& arg) {
1860 WriteSSEOp(0x66, 0x15, dest, arg);
1861}
1862
1863void XEmitter::MOVDDUP(X64Reg regOp, const OpArg& arg) {
1864 if (Common::GetCPUCaps().sse3) {
1865 WriteSSEOp(0xF2, 0x12, regOp, arg); // SSE3 movddup
1866 } else {
1867 // Simulate this instruction with SSE2 instructions
1868 if (!arg.IsSimpleReg(regOp))
1869 MOVSD(regOp, arg);
1870 UNPCKLPD(regOp, R(regOp));
1871 }
1872}
1873
1874// There are a few more left
1875
1876// Also some integer instructions are missing
1877void XEmitter::PACKSSDW(X64Reg dest, const OpArg& arg) {
1878 WriteSSEOp(0x66, 0x6B, dest, arg);
1879}
1880void XEmitter::PACKSSWB(X64Reg dest, const OpArg& arg) {
1881 WriteSSEOp(0x66, 0x63, dest, arg);
1882}
1883void XEmitter::PACKUSWB(X64Reg dest, const OpArg& arg) {
1884 WriteSSEOp(0x66, 0x67, dest, arg);
1885}
1886
1887void XEmitter::PUNPCKLBW(X64Reg dest, const OpArg& arg) {
1888 WriteSSEOp(0x66, 0x60, dest, arg);
1889}
1890void XEmitter::PUNPCKLWD(X64Reg dest, const OpArg& arg) {
1891 WriteSSEOp(0x66, 0x61, dest, arg);
1892}
1893void XEmitter::PUNPCKLDQ(X64Reg dest, const OpArg& arg) {
1894 WriteSSEOp(0x66, 0x62, dest, arg);
1895}
1896void XEmitter::PUNPCKLQDQ(X64Reg dest, const OpArg& arg) {
1897 WriteSSEOp(0x66, 0x6C, dest, arg);
1898}
1899
1900void XEmitter::PSRLW(X64Reg reg, int shift) {
1901 WriteSSEOp(0x66, 0x71, (X64Reg)2, R(reg));
1902 Write8(shift);
1903}
1904
1905void XEmitter::PSRLD(X64Reg reg, int shift) {
1906 WriteSSEOp(0x66, 0x72, (X64Reg)2, R(reg));
1907 Write8(shift);
1908}
1909
1910void XEmitter::PSRLQ(X64Reg reg, int shift) {
1911 WriteSSEOp(0x66, 0x73, (X64Reg)2, R(reg));
1912 Write8(shift);
1913}
1914
1915void XEmitter::PSRLQ(X64Reg reg, const OpArg& arg) {
1916 WriteSSEOp(0x66, 0xd3, reg, arg);
1917}
1918
1919void XEmitter::PSRLDQ(X64Reg reg, int shift) {
1920 WriteSSEOp(0x66, 0x73, (X64Reg)3, R(reg));
1921 Write8(shift);
1922}
1923
1924void XEmitter::PSLLW(X64Reg reg, int shift) {
1925 WriteSSEOp(0x66, 0x71, (X64Reg)6, R(reg));
1926 Write8(shift);
1927}
1928
1929void XEmitter::PSLLD(X64Reg reg, int shift) {
1930 WriteSSEOp(0x66, 0x72, (X64Reg)6, R(reg));
1931 Write8(shift);
1932}
1933
1934void XEmitter::PSLLQ(X64Reg reg, int shift) {
1935 WriteSSEOp(0x66, 0x73, (X64Reg)6, R(reg));
1936 Write8(shift);
1937}
1938
1939void XEmitter::PSLLDQ(X64Reg reg, int shift) {
1940 WriteSSEOp(0x66, 0x73, (X64Reg)7, R(reg));
1941 Write8(shift);
1942}
1943
1944void XEmitter::PSRAW(X64Reg reg, int shift) {
1945 WriteSSEOp(0x66, 0x71, (X64Reg)4, R(reg));
1946 Write8(shift);
1947}
1948
1949void XEmitter::PSRAD(X64Reg reg, int shift) {
1950 WriteSSEOp(0x66, 0x72, (X64Reg)4, R(reg));
1951 Write8(shift);
1952}
1953
1954void XEmitter::WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes) {
1955 if (!Common::GetCPUCaps().ssse3)
1956 ASSERT_MSG(0, "Trying to use SSSE3 on a system that doesn't support it. Bad programmer.");
1957 WriteSSEOp(opPrefix, op, regOp, arg, extrabytes);
1958}
1959
1960void XEmitter::WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes) {
1961 if (!Common::GetCPUCaps().sse4_1)
1962 ASSERT_MSG(0, "Trying to use SSE4.1 on a system that doesn't support it. Bad programmer.");
1963 WriteSSEOp(opPrefix, op, regOp, arg, extrabytes);
1964}
1965
1966void XEmitter::PSHUFB(X64Reg dest, const OpArg& arg) {
1967 WriteSSSE3Op(0x66, 0x3800, dest, arg);
1968}
1969void XEmitter::PTEST(X64Reg dest, const OpArg& arg) {
1970 WriteSSE41Op(0x66, 0x3817, dest, arg);
1971}
1972void XEmitter::PACKUSDW(X64Reg dest, const OpArg& arg) {
1973 WriteSSE41Op(0x66, 0x382b, dest, arg);
1974}
1975void XEmitter::DPPS(X64Reg dest, const OpArg& arg, u8 mask) {
1976 WriteSSE41Op(0x66, 0x3A40, dest, arg, 1);
1977 Write8(mask);
1978}
1979
1980void XEmitter::PMINSB(X64Reg dest, const OpArg& arg) {
1981 WriteSSE41Op(0x66, 0x3838, dest, arg);
1982}
1983void XEmitter::PMINSD(X64Reg dest, const OpArg& arg) {
1984 WriteSSE41Op(0x66, 0x3839, dest, arg);
1985}
1986void XEmitter::PMINUW(X64Reg dest, const OpArg& arg) {
1987 WriteSSE41Op(0x66, 0x383a, dest, arg);
1988}
1989void XEmitter::PMINUD(X64Reg dest, const OpArg& arg) {
1990 WriteSSE41Op(0x66, 0x383b, dest, arg);
1991}
1992void XEmitter::PMAXSB(X64Reg dest, const OpArg& arg) {
1993 WriteSSE41Op(0x66, 0x383c, dest, arg);
1994}
1995void XEmitter::PMAXSD(X64Reg dest, const OpArg& arg) {
1996 WriteSSE41Op(0x66, 0x383d, dest, arg);
1997}
1998void XEmitter::PMAXUW(X64Reg dest, const OpArg& arg) {
1999 WriteSSE41Op(0x66, 0x383e, dest, arg);
2000}
2001void XEmitter::PMAXUD(X64Reg dest, const OpArg& arg) {
2002 WriteSSE41Op(0x66, 0x383f, dest, arg);
2003}
2004
2005void XEmitter::PMOVSXBW(X64Reg dest, const OpArg& arg) {
2006 WriteSSE41Op(0x66, 0x3820, dest, arg);
2007}
2008void XEmitter::PMOVSXBD(X64Reg dest, const OpArg& arg) {
2009 WriteSSE41Op(0x66, 0x3821, dest, arg);
2010}
2011void XEmitter::PMOVSXBQ(X64Reg dest, const OpArg& arg) {
2012 WriteSSE41Op(0x66, 0x3822, dest, arg);
2013}
2014void XEmitter::PMOVSXWD(X64Reg dest, const OpArg& arg) {
2015 WriteSSE41Op(0x66, 0x3823, dest, arg);
2016}
2017void XEmitter::PMOVSXWQ(X64Reg dest, const OpArg& arg) {
2018 WriteSSE41Op(0x66, 0x3824, dest, arg);
2019}
2020void XEmitter::PMOVSXDQ(X64Reg dest, const OpArg& arg) {
2021 WriteSSE41Op(0x66, 0x3825, dest, arg);
2022}
2023void XEmitter::PMOVZXBW(X64Reg dest, const OpArg& arg) {
2024 WriteSSE41Op(0x66, 0x3830, dest, arg);
2025}
2026void XEmitter::PMOVZXBD(X64Reg dest, const OpArg& arg) {
2027 WriteSSE41Op(0x66, 0x3831, dest, arg);
2028}
2029void XEmitter::PMOVZXBQ(X64Reg dest, const OpArg& arg) {
2030 WriteSSE41Op(0x66, 0x3832, dest, arg);
2031}
2032void XEmitter::PMOVZXWD(X64Reg dest, const OpArg& arg) {
2033 WriteSSE41Op(0x66, 0x3833, dest, arg);
2034}
2035void XEmitter::PMOVZXWQ(X64Reg dest, const OpArg& arg) {
2036 WriteSSE41Op(0x66, 0x3834, dest, arg);
2037}
2038void XEmitter::PMOVZXDQ(X64Reg dest, const OpArg& arg) {
2039 WriteSSE41Op(0x66, 0x3835, dest, arg);
2040}
2041
2042void XEmitter::PBLENDVB(X64Reg dest, const OpArg& arg) {
2043 WriteSSE41Op(0x66, 0x3810, dest, arg);
2044}
2045void XEmitter::BLENDVPS(X64Reg dest, const OpArg& arg) {
2046 WriteSSE41Op(0x66, 0x3814, dest, arg);
2047}
2048void XEmitter::BLENDVPD(X64Reg dest, const OpArg& arg) {
2049 WriteSSE41Op(0x66, 0x3815, dest, arg);
2050}
2051void XEmitter::BLENDPS(X64Reg dest, const OpArg& arg, u8 blend) {
2052 WriteSSE41Op(0x66, 0x3A0C, dest, arg, 1);
2053 Write8(blend);
2054}
2055void XEmitter::BLENDPD(X64Reg dest, const OpArg& arg, u8 blend) {
2056 WriteSSE41Op(0x66, 0x3A0D, dest, arg, 1);
2057 Write8(blend);
2058}
2059
2060void XEmitter::ROUNDSS(X64Reg dest, const OpArg& arg, u8 mode) {
2061 WriteSSE41Op(0x66, 0x3A0A, dest, arg, 1);
2062 Write8(mode);
2063}
2064void XEmitter::ROUNDSD(X64Reg dest, const OpArg& arg, u8 mode) {
2065 WriteSSE41Op(0x66, 0x3A0B, dest, arg, 1);
2066 Write8(mode);
2067}
2068void XEmitter::ROUNDPS(X64Reg dest, const OpArg& arg, u8 mode) {
2069 WriteSSE41Op(0x66, 0x3A08, dest, arg, 1);
2070 Write8(mode);
2071}
2072void XEmitter::ROUNDPD(X64Reg dest, const OpArg& arg, u8 mode) {
2073 WriteSSE41Op(0x66, 0x3A09, dest, arg, 1);
2074 Write8(mode);
2075}
2076
2077void XEmitter::PAND(X64Reg dest, const OpArg& arg) {
2078 WriteSSEOp(0x66, 0xDB, dest, arg);
2079}
2080void XEmitter::PANDN(X64Reg dest, const OpArg& arg) {
2081 WriteSSEOp(0x66, 0xDF, dest, arg);
2082}
2083void XEmitter::PXOR(X64Reg dest, const OpArg& arg) {
2084 WriteSSEOp(0x66, 0xEF, dest, arg);
2085}
2086void XEmitter::POR(X64Reg dest, const OpArg& arg) {
2087 WriteSSEOp(0x66, 0xEB, dest, arg);
2088}
2089
2090void XEmitter::PADDB(X64Reg dest, const OpArg& arg) {
2091 WriteSSEOp(0x66, 0xFC, dest, arg);
2092}
2093void XEmitter::PADDW(X64Reg dest, const OpArg& arg) {
2094 WriteSSEOp(0x66, 0xFD, dest, arg);
2095}
2096void XEmitter::PADDD(X64Reg dest, const OpArg& arg) {
2097 WriteSSEOp(0x66, 0xFE, dest, arg);
2098}
2099void XEmitter::PADDQ(X64Reg dest, const OpArg& arg) {
2100 WriteSSEOp(0x66, 0xD4, dest, arg);
2101}
2102
2103void XEmitter::PADDSB(X64Reg dest, const OpArg& arg) {
2104 WriteSSEOp(0x66, 0xEC, dest, arg);
2105}
2106void XEmitter::PADDSW(X64Reg dest, const OpArg& arg) {
2107 WriteSSEOp(0x66, 0xED, dest, arg);
2108}
2109void XEmitter::PADDUSB(X64Reg dest, const OpArg& arg) {
2110 WriteSSEOp(0x66, 0xDC, dest, arg);
2111}
2112void XEmitter::PADDUSW(X64Reg dest, const OpArg& arg) {
2113 WriteSSEOp(0x66, 0xDD, dest, arg);
2114}
2115
2116void XEmitter::PSUBB(X64Reg dest, const OpArg& arg) {
2117 WriteSSEOp(0x66, 0xF8, dest, arg);
2118}
2119void XEmitter::PSUBW(X64Reg dest, const OpArg& arg) {
2120 WriteSSEOp(0x66, 0xF9, dest, arg);
2121}
2122void XEmitter::PSUBD(X64Reg dest, const OpArg& arg) {
2123 WriteSSEOp(0x66, 0xFA, dest, arg);
2124}
2125void XEmitter::PSUBQ(X64Reg dest, const OpArg& arg) {
2126 WriteSSEOp(0x66, 0xFB, dest, arg);
2127}
2128
2129void XEmitter::PSUBSB(X64Reg dest, const OpArg& arg) {
2130 WriteSSEOp(0x66, 0xE8, dest, arg);
2131}
2132void XEmitter::PSUBSW(X64Reg dest, const OpArg& arg) {
2133 WriteSSEOp(0x66, 0xE9, dest, arg);
2134}
2135void XEmitter::PSUBUSB(X64Reg dest, const OpArg& arg) {
2136 WriteSSEOp(0x66, 0xD8, dest, arg);
2137}
2138void XEmitter::PSUBUSW(X64Reg dest, const OpArg& arg) {
2139 WriteSSEOp(0x66, 0xD9, dest, arg);
2140}
2141
2142void XEmitter::PAVGB(X64Reg dest, const OpArg& arg) {
2143 WriteSSEOp(0x66, 0xE0, dest, arg);
2144}
2145void XEmitter::PAVGW(X64Reg dest, const OpArg& arg) {
2146 WriteSSEOp(0x66, 0xE3, dest, arg);
2147}
2148
2149void XEmitter::PCMPEQB(X64Reg dest, const OpArg& arg) {
2150 WriteSSEOp(0x66, 0x74, dest, arg);
2151}
2152void XEmitter::PCMPEQW(X64Reg dest, const OpArg& arg) {
2153 WriteSSEOp(0x66, 0x75, dest, arg);
2154}
2155void XEmitter::PCMPEQD(X64Reg dest, const OpArg& arg) {
2156 WriteSSEOp(0x66, 0x76, dest, arg);
2157}
2158
2159void XEmitter::PCMPGTB(X64Reg dest, const OpArg& arg) {
2160 WriteSSEOp(0x66, 0x64, dest, arg);
2161}
2162void XEmitter::PCMPGTW(X64Reg dest, const OpArg& arg) {
2163 WriteSSEOp(0x66, 0x65, dest, arg);
2164}
2165void XEmitter::PCMPGTD(X64Reg dest, const OpArg& arg) {
2166 WriteSSEOp(0x66, 0x66, dest, arg);
2167}
2168
2169void XEmitter::PEXTRW(X64Reg dest, const OpArg& arg, u8 subreg) {
2170 WriteSSEOp(0x66, 0xC5, dest, arg, 1);
2171 Write8(subreg);
2172}
2173void XEmitter::PINSRW(X64Reg dest, const OpArg& arg, u8 subreg) {
2174 WriteSSEOp(0x66, 0xC4, dest, arg, 1);
2175 Write8(subreg);
2176}
2177
2178void XEmitter::PMADDWD(X64Reg dest, const OpArg& arg) {
2179 WriteSSEOp(0x66, 0xF5, dest, arg);
2180}
2181void XEmitter::PSADBW(X64Reg dest, const OpArg& arg) {
2182 WriteSSEOp(0x66, 0xF6, dest, arg);
2183}
2184
2185void XEmitter::PMAXSW(X64Reg dest, const OpArg& arg) {
2186 WriteSSEOp(0x66, 0xEE, dest, arg);
2187}
2188void XEmitter::PMAXUB(X64Reg dest, const OpArg& arg) {
2189 WriteSSEOp(0x66, 0xDE, dest, arg);
2190}
2191void XEmitter::PMINSW(X64Reg dest, const OpArg& arg) {
2192 WriteSSEOp(0x66, 0xEA, dest, arg);
2193}
2194void XEmitter::PMINUB(X64Reg dest, const OpArg& arg) {
2195 WriteSSEOp(0x66, 0xDA, dest, arg);
2196}
2197
2198void XEmitter::PMOVMSKB(X64Reg dest, const OpArg& arg) {
2199 WriteSSEOp(0x66, 0xD7, dest, arg);
2200}
2201void XEmitter::PSHUFD(X64Reg regOp, const OpArg& arg, u8 shuffle) {
2202 WriteSSEOp(0x66, 0x70, regOp, arg, 1);
2203 Write8(shuffle);
2204}
2205void XEmitter::PSHUFLW(X64Reg regOp, const OpArg& arg, u8 shuffle) {
2206 WriteSSEOp(0xF2, 0x70, regOp, arg, 1);
2207 Write8(shuffle);
2208}
2209void XEmitter::PSHUFHW(X64Reg regOp, const OpArg& arg, u8 shuffle) {
2210 WriteSSEOp(0xF3, 0x70, regOp, arg, 1);
2211 Write8(shuffle);
2212}
2213
2214// VEX
2215void XEmitter::VADDSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2216 WriteAVXOp(0xF2, sseADD, regOp1, regOp2, arg);
2217}
2218void XEmitter::VSUBSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2219 WriteAVXOp(0xF2, sseSUB, regOp1, regOp2, arg);
2220}
2221void XEmitter::VMULSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2222 WriteAVXOp(0xF2, sseMUL, regOp1, regOp2, arg);
2223}
2224void XEmitter::VDIVSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2225 WriteAVXOp(0xF2, sseDIV, regOp1, regOp2, arg);
2226}
2227void XEmitter::VADDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2228 WriteAVXOp(0x66, sseADD, regOp1, regOp2, arg);
2229}
2230void XEmitter::VSUBPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2231 WriteAVXOp(0x66, sseSUB, regOp1, regOp2, arg);
2232}
2233void XEmitter::VMULPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2234 WriteAVXOp(0x66, sseMUL, regOp1, regOp2, arg);
2235}
2236void XEmitter::VDIVPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2237 WriteAVXOp(0x66, sseDIV, regOp1, regOp2, arg);
2238}
2239void XEmitter::VSQRTSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2240 WriteAVXOp(0xF2, sseSQRT, regOp1, regOp2, arg);
2241}
2242void XEmitter::VSHUFPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg, u8 shuffle) {
2243 WriteAVXOp(0x66, sseSHUF, regOp1, regOp2, arg, 1);
2244 Write8(shuffle);
2245}
2246void XEmitter::VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2247 WriteAVXOp(0x66, 0x14, regOp1, regOp2, arg);
2248}
2249void XEmitter::VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2250 WriteAVXOp(0x66, 0x15, regOp1, regOp2, arg);
2251}
2252
2253void XEmitter::VANDPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2254 WriteAVXOp(0x00, sseAND, regOp1, regOp2, arg);
2255}
2256void XEmitter::VANDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2257 WriteAVXOp(0x66, sseAND, regOp1, regOp2, arg);
2258}
2259void XEmitter::VANDNPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2260 WriteAVXOp(0x00, sseANDN, regOp1, regOp2, arg);
2261}
2262void XEmitter::VANDNPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2263 WriteAVXOp(0x66, sseANDN, regOp1, regOp2, arg);
2264}
2265void XEmitter::VORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2266 WriteAVXOp(0x00, sseOR, regOp1, regOp2, arg);
2267}
2268void XEmitter::VORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2269 WriteAVXOp(0x66, sseOR, regOp1, regOp2, arg);
2270}
2271void XEmitter::VXORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2272 WriteAVXOp(0x00, sseXOR, regOp1, regOp2, arg);
2273}
2274void XEmitter::VXORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2275 WriteAVXOp(0x66, sseXOR, regOp1, regOp2, arg);
2276}
2277
2278void XEmitter::VPAND(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2279 WriteAVXOp(0x66, 0xDB, regOp1, regOp2, arg);
2280}
2281void XEmitter::VPANDN(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2282 WriteAVXOp(0x66, 0xDF, regOp1, regOp2, arg);
2283}
2284void XEmitter::VPOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2285 WriteAVXOp(0x66, 0xEB, regOp1, regOp2, arg);
2286}
2287void XEmitter::VPXOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2288 WriteAVXOp(0x66, 0xEF, regOp1, regOp2, arg);
2289}
2290
2291void XEmitter::VFMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2292 WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg);
2293}
2294void XEmitter::VFMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2295 WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg);
2296}
2297void XEmitter::VFMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2298 WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg);
2299}
2300void XEmitter::VFMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2301 WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg, 1);
2302}
2303void XEmitter::VFMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2304 WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg, 1);
2305}
2306void XEmitter::VFMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2307 WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg, 1);
2308}
2309void XEmitter::VFMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2310 WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg);
2311}
2312void XEmitter::VFMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2313 WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg);
2314}
2315void XEmitter::VFMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2316 WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg);
2317}
2318void XEmitter::VFMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2319 WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg, 1);
2320}
2321void XEmitter::VFMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2322 WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg, 1);
2323}
2324void XEmitter::VFMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2325 WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg, 1);
2326}
2327void XEmitter::VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2328 WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg);
2329}
2330void XEmitter::VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2331 WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg);
2332}
2333void XEmitter::VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2334 WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg);
2335}
2336void XEmitter::VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2337 WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg, 1);
2338}
2339void XEmitter::VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2340 WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg, 1);
2341}
2342void XEmitter::VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2343 WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg, 1);
2344}
2345void XEmitter::VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2346 WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg);
2347}
2348void XEmitter::VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2349 WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg);
2350}
2351void XEmitter::VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2352 WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg);
2353}
2354void XEmitter::VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2355 WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg, 1);
2356}
2357void XEmitter::VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2358 WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg, 1);
2359}
2360void XEmitter::VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2361 WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg, 1);
2362}
2363void XEmitter::VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2364 WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg);
2365}
2366void XEmitter::VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2367 WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg);
2368}
2369void XEmitter::VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2370 WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg);
2371}
2372void XEmitter::VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2373 WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg, 1);
2374}
2375void XEmitter::VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2376 WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg, 1);
2377}
2378void XEmitter::VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2379 WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg, 1);
2380}
2381void XEmitter::VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2382 WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg);
2383}
2384void XEmitter::VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2385 WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg);
2386}
2387void XEmitter::VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2388 WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg);
2389}
2390void XEmitter::VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2391 WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg, 1);
2392}
2393void XEmitter::VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2394 WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg, 1);
2395}
2396void XEmitter::VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2397 WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg, 1);
2398}
2399void XEmitter::VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2400 WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg);
2401}
2402void XEmitter::VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2403 WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg);
2404}
2405void XEmitter::VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2406 WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg);
2407}
2408void XEmitter::VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2409 WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg, 1);
2410}
2411void XEmitter::VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2412 WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg, 1);
2413}
2414void XEmitter::VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2415 WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg, 1);
2416}
2417void XEmitter::VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2418 WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg);
2419}
2420void XEmitter::VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2421 WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg);
2422}
2423void XEmitter::VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2424 WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg);
2425}
2426void XEmitter::VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2427 WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg, 1);
2428}
2429void XEmitter::VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2430 WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg, 1);
2431}
2432void XEmitter::VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2433 WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg, 1);
2434}
2435void XEmitter::VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2436 WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg);
2437}
2438void XEmitter::VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2439 WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg);
2440}
2441void XEmitter::VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2442 WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg);
2443}
2444void XEmitter::VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2445 WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg, 1);
2446}
2447void XEmitter::VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2448 WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg, 1);
2449}
2450void XEmitter::VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2451 WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg, 1);
2452}
2453void XEmitter::VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2454 WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg);
2455}
2456void XEmitter::VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2457 WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg);
2458}
2459void XEmitter::VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2460 WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg);
2461}
2462void XEmitter::VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2463 WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg, 1);
2464}
2465void XEmitter::VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2466 WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg, 1);
2467}
2468void XEmitter::VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2469 WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg, 1);
2470}
2471
2472void XEmitter::SARX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {
2473 WriteBMI2Op(bits, 0xF3, 0x38F7, regOp1, regOp2, arg);
2474}
2475void XEmitter::SHLX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {
2476 WriteBMI2Op(bits, 0x66, 0x38F7, regOp1, regOp2, arg);
2477}
2478void XEmitter::SHRX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {
2479 WriteBMI2Op(bits, 0xF2, 0x38F7, regOp1, regOp2, arg);
2480}
2481void XEmitter::RORX(int bits, X64Reg regOp, const OpArg& arg, u8 rotate) {
2482 WriteBMI2Op(bits, 0xF2, 0x3AF0, regOp, INVALID_REG, arg, 1);
2483 Write8(rotate);
2484}
2485void XEmitter::PEXT(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2486 WriteBMI2Op(bits, 0xF3, 0x38F5, regOp1, regOp2, arg);
2487}
2488void XEmitter::PDEP(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2489 WriteBMI2Op(bits, 0xF2, 0x38F5, regOp1, regOp2, arg);
2490}
2491void XEmitter::MULX(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2492 WriteBMI2Op(bits, 0xF2, 0x38F6, regOp2, regOp1, arg);
2493}
2494void XEmitter::BZHI(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {
2495 WriteBMI2Op(bits, 0x00, 0x38F5, regOp1, regOp2, arg);
2496}
2497void XEmitter::BLSR(int bits, X64Reg regOp, const OpArg& arg) {
2498 WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x1, regOp, arg);
2499}
2500void XEmitter::BLSMSK(int bits, X64Reg regOp, const OpArg& arg) {
2501 WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x2, regOp, arg);
2502}
2503void XEmitter::BLSI(int bits, X64Reg regOp, const OpArg& arg) {
2504 WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x3, regOp, arg);
2505}
2506void XEmitter::BEXTR(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {
2507 WriteBMI1Op(bits, 0x00, 0x38F7, regOp1, regOp2, arg);
2508}
2509void XEmitter::ANDN(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2510 WriteBMI1Op(bits, 0x00, 0x38F2, regOp1, regOp2, arg);
2511}
2512
2513// Prefixes
2514
2515void XEmitter::LOCK() {
2516 Write8(0xF0);
2517}
2518void XEmitter::REP() {
2519 Write8(0xF3);
2520}
2521void XEmitter::REPNE() {
2522 Write8(0xF2);
2523}
2524void XEmitter::FSOverride() {
2525 Write8(0x64);
2526}
2527void XEmitter::GSOverride() {
2528 Write8(0x65);
2529}
2530
2531void XEmitter::FWAIT() {
2532 Write8(0x9B);
2533}
2534
2535// TODO: make this more generic
2536void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg) {
2537 int mf = 0;
2538 ASSERT_MSG(!(bits == 80 && op_80b == floatINVALID),
2539 "WriteFloatLoadStore: 80 bits not supported for this instruction");
2540 switch (bits) {
2541 case 32:
2542 mf = 0;
2543 break;
2544 case 64:
2545 mf = 4;
2546 break;
2547 case 80:
2548 mf = 2;
2549 break;
2550 default:
2551 ASSERT_MSG(0, "WriteFloatLoadStore: invalid bits (should be 32/64/80)");
2552 }
2553 Write8(0xd9 | mf);
2554 // x87 instructions use the reg field of the ModR/M byte as opcode:
2555 if (bits == 80)
2556 op = op_80b;
2557 arg.WriteRest(this, 0, (X64Reg)op);
2558}
2559
2560void XEmitter::FLD(int bits, const OpArg& src) {
2561 WriteFloatLoadStore(bits, floatLD, floatLD80, src);
2562}
2563void XEmitter::FST(int bits, const OpArg& dest) {
2564 WriteFloatLoadStore(bits, floatST, floatINVALID, dest);
2565}
2566void XEmitter::FSTP(int bits, const OpArg& dest) {
2567 WriteFloatLoadStore(bits, floatSTP, floatSTP80, dest);
2568}
2569void XEmitter::FNSTSW_AX() {
2570 Write8(0xDF);
2571 Write8(0xE0);
2572}
2573
2574void XEmitter::RDTSC() {
2575 Write8(0x0F);
2576 Write8(0x31);
2577}
2578
2579void XCodeBlock::PoisonMemory() {
2580 // x86/64: 0xCC = breakpoint
2581 memset(region, 0xCC, region_size);
2582}
2583}
diff --git a/src/common/x64/emitter.h b/src/common/x64/emitter.h
deleted file mode 100644
index 7d7cdde16..000000000
--- a/src/common/x64/emitter.h
+++ /dev/null
@@ -1,1206 +0,0 @@
1// Copyright (C) 2003 Dolphin Project.
2
3// This program is free software: you can redistribute it and/or modify
4// it under the terms of the GNU General Public License as published by
5// the Free Software Foundation, version 2.0 or later versions.
6
7// This program is distributed in the hope that it will be useful,
8// but WITHOUT ANY WARRANTY; without even the implied warranty of
9// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10// GNU General Public License 2.0 for more details.
11
12// A copy of the GPL 2.0 should have been included with the program.
13// If not, see http://www.gnu.org/licenses/
14
15// Official SVN repository and contact information can be found at
16// http://code.google.com/p/dolphin-emu/
17
18#pragma once
19
20#include <cstddef>
21#include "common/assert.h"
22#include "common/bit_set.h"
23#include "common/code_block.h"
24#include "common/common_types.h"
25
26#if defined(ARCHITECTURE_x86_64) && !defined(_ARCH_64)
27#define _ARCH_64
28#endif
29
30#ifdef _ARCH_64
31#define PTRBITS 64
32#else
33#define PTRBITS 32
34#endif
35
36namespace Gen {
37
38enum X64Reg {
39 EAX = 0,
40 EBX = 3,
41 ECX = 1,
42 EDX = 2,
43 ESI = 6,
44 EDI = 7,
45 EBP = 5,
46 ESP = 4,
47
48 RAX = 0,
49 RBX = 3,
50 RCX = 1,
51 RDX = 2,
52 RSI = 6,
53 RDI = 7,
54 RBP = 5,
55 RSP = 4,
56 R8 = 8,
57 R9 = 9,
58 R10 = 10,
59 R11 = 11,
60 R12 = 12,
61 R13 = 13,
62 R14 = 14,
63 R15 = 15,
64
65 AL = 0,
66 BL = 3,
67 CL = 1,
68 DL = 2,
69 SIL = 6,
70 DIL = 7,
71 BPL = 5,
72 SPL = 4,
73 AH = 0x104,
74 BH = 0x107,
75 CH = 0x105,
76 DH = 0x106,
77
78 AX = 0,
79 BX = 3,
80 CX = 1,
81 DX = 2,
82 SI = 6,
83 DI = 7,
84 BP = 5,
85 SP = 4,
86
87 XMM0 = 0,
88 XMM1,
89 XMM2,
90 XMM3,
91 XMM4,
92 XMM5,
93 XMM6,
94 XMM7,
95 XMM8,
96 XMM9,
97 XMM10,
98 XMM11,
99 XMM12,
100 XMM13,
101 XMM14,
102 XMM15,
103
104 YMM0 = 0,
105 YMM1,
106 YMM2,
107 YMM3,
108 YMM4,
109 YMM5,
110 YMM6,
111 YMM7,
112 YMM8,
113 YMM9,
114 YMM10,
115 YMM11,
116 YMM12,
117 YMM13,
118 YMM14,
119 YMM15,
120
121 INVALID_REG = 0xFFFFFFFF
122};
123
124enum CCFlags {
125 CC_O = 0,
126 CC_NO = 1,
127 CC_B = 2,
128 CC_C = 2,
129 CC_NAE = 2,
130 CC_NB = 3,
131 CC_NC = 3,
132 CC_AE = 3,
133 CC_Z = 4,
134 CC_E = 4,
135 CC_NZ = 5,
136 CC_NE = 5,
137 CC_BE = 6,
138 CC_NA = 6,
139 CC_NBE = 7,
140 CC_A = 7,
141 CC_S = 8,
142 CC_NS = 9,
143 CC_P = 0xA,
144 CC_PE = 0xA,
145 CC_NP = 0xB,
146 CC_PO = 0xB,
147 CC_L = 0xC,
148 CC_NGE = 0xC,
149 CC_NL = 0xD,
150 CC_GE = 0xD,
151 CC_LE = 0xE,
152 CC_NG = 0xE,
153 CC_NLE = 0xF,
154 CC_G = 0xF
155};
156
157enum {
158 NUMGPRs = 16,
159 NUMXMMs = 16,
160};
161
162enum {
163 SCALE_NONE = 0,
164 SCALE_1 = 1,
165 SCALE_2 = 2,
166 SCALE_4 = 4,
167 SCALE_8 = 8,
168 SCALE_ATREG = 16,
169 // SCALE_NOBASE_1 is not supported and can be replaced with SCALE_ATREG
170 SCALE_NOBASE_2 = 34,
171 SCALE_NOBASE_4 = 36,
172 SCALE_NOBASE_8 = 40,
173 SCALE_RIP = 0xFF,
174 SCALE_IMM8 = 0xF0,
175 SCALE_IMM16 = 0xF1,
176 SCALE_IMM32 = 0xF2,
177 SCALE_IMM64 = 0xF3,
178};
179
180enum NormalOp {
181 nrmADD,
182 nrmADC,
183 nrmSUB,
184 nrmSBB,
185 nrmAND,
186 nrmOR,
187 nrmXOR,
188 nrmMOV,
189 nrmTEST,
190 nrmCMP,
191 nrmXCHG,
192};
193
194enum {
195 CMP_EQ = 0,
196 CMP_LT = 1,
197 CMP_LE = 2,
198 CMP_UNORD = 3,
199 CMP_NEQ = 4,
200 CMP_NLT = 5,
201 CMP_NLE = 6,
202 CMP_ORD = 7,
203};
204
205enum FloatOp {
206 floatLD = 0,
207 floatST = 2,
208 floatSTP = 3,
209 floatLD80 = 5,
210 floatSTP80 = 7,
211
212 floatINVALID = -1,
213};
214
215enum FloatRound {
216 FROUND_NEAREST = 0,
217 FROUND_FLOOR = 1,
218 FROUND_CEIL = 2,
219 FROUND_ZERO = 3,
220 FROUND_MXCSR = 4,
221
222 FROUND_RAISE_PRECISION = 0,
223 FROUND_IGNORE_PRECISION = 8,
224};
225
226class XEmitter;
227
228// RIP addressing does not benefit from micro op fusion on Core arch
229struct OpArg {
230 friend class XEmitter;
231
232 constexpr OpArg() = default; // dummy op arg, used for storage
233 constexpr OpArg(u64 offset_, int scale_, X64Reg rmReg = RAX, X64Reg scaledReg = RAX)
234 : scale(static_cast<u8>(scale_)), offsetOrBaseReg(static_cast<u16>(rmReg)),
235 indexReg(static_cast<u16>(scaledReg)), offset(offset_) {}
236
237 constexpr bool operator==(const OpArg& b) const {
238 return operandReg == b.operandReg && scale == b.scale &&
239 offsetOrBaseReg == b.offsetOrBaseReg && indexReg == b.indexReg && offset == b.offset;
240 }
241
242 void WriteRex(XEmitter* emit, int opBits, int bits, int customOp = -1) const;
243 void WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp, int mmmmm,
244 int W = 0) const;
245 void WriteRest(XEmitter* emit, int extraBytes = 0, X64Reg operandReg = INVALID_REG,
246 bool warn_64bit_offset = true) const;
247 void WriteSingleByteOp(XEmitter* emit, u8 op, X64Reg operandReg, int bits);
248 void WriteNormalOp(XEmitter* emit, bool toRM, NormalOp op, const OpArg& operand,
249 int bits) const;
250
251 constexpr bool IsImm() const {
252 return scale == SCALE_IMM8 || scale == SCALE_IMM16 || scale == SCALE_IMM32 ||
253 scale == SCALE_IMM64;
254 }
255 constexpr bool IsSimpleReg() const {
256 return scale == SCALE_NONE;
257 }
258 constexpr bool IsSimpleReg(X64Reg reg) const {
259 return IsSimpleReg() && GetSimpleReg() == reg;
260 }
261
262 int GetImmBits() const {
263 switch (scale) {
264 case SCALE_IMM8:
265 return 8;
266 case SCALE_IMM16:
267 return 16;
268 case SCALE_IMM32:
269 return 32;
270 case SCALE_IMM64:
271 return 64;
272 default:
273 return -1;
274 }
275 }
276
277 void SetImmBits(int bits) {
278 switch (bits) {
279 case 8:
280 scale = SCALE_IMM8;
281 break;
282 case 16:
283 scale = SCALE_IMM16;
284 break;
285 case 32:
286 scale = SCALE_IMM32;
287 break;
288 case 64:
289 scale = SCALE_IMM64;
290 break;
291 }
292 }
293
294 constexpr X64Reg GetSimpleReg() const {
295 return scale == SCALE_NONE ? static_cast<X64Reg>(offsetOrBaseReg) : INVALID_REG;
296 }
297
298 constexpr u32 GetImmValue() const {
299 return static_cast<u32>(offset);
300 }
301
302 // For loops.
303 void IncreaseOffset(int sz) {
304 offset += sz;
305 }
306
307private:
308 u8 scale = 0;
309 u16 offsetOrBaseReg = 0;
310 u16 indexReg = 0;
311 u64 offset = 0; // use RIP-relative as much as possible - 64-bit immediates are not available.
312 u16 operandReg = 0;
313};
314
315template <typename T>
316inline OpArg M(const T* ptr) {
317 return OpArg(reinterpret_cast<u64>(ptr), static_cast<int>(SCALE_RIP));
318}
319constexpr OpArg R(X64Reg value) {
320 return OpArg(0, SCALE_NONE, value);
321}
322constexpr OpArg MatR(X64Reg value) {
323 return OpArg(0, SCALE_ATREG, value);
324}
325
326constexpr OpArg MDisp(X64Reg value, int offset) {
327 return OpArg(static_cast<u32>(offset), SCALE_ATREG, value);
328}
329
330constexpr OpArg MComplex(X64Reg base, X64Reg scaled, int scale, int offset) {
331 return OpArg(offset, scale, base, scaled);
332}
333
334constexpr OpArg MScaled(X64Reg scaled, int scale, int offset) {
335 return scale == SCALE_1 ? OpArg(offset, SCALE_ATREG, scaled)
336 : OpArg(offset, scale | 0x20, RAX, scaled);
337}
338
339constexpr OpArg MRegSum(X64Reg base, X64Reg offset) {
340 return MComplex(base, offset, 1, 0);
341}
342
343constexpr OpArg Imm8(u8 imm) {
344 return OpArg(imm, SCALE_IMM8);
345}
346constexpr OpArg Imm16(u16 imm) {
347 return OpArg(imm, SCALE_IMM16);
348} // rarely used
349constexpr OpArg Imm32(u32 imm) {
350 return OpArg(imm, SCALE_IMM32);
351}
352constexpr OpArg Imm64(u64 imm) {
353 return OpArg(imm, SCALE_IMM64);
354}
355constexpr OpArg UImmAuto(u32 imm) {
356 return OpArg(imm, imm >= 128 ? SCALE_IMM32 : SCALE_IMM8);
357}
358constexpr OpArg SImmAuto(s32 imm) {
359 return OpArg(imm, (imm >= 128 || imm < -128) ? SCALE_IMM32 : SCALE_IMM8);
360}
361
362template <typename T>
363OpArg ImmPtr(const T* imm) {
364#ifdef _ARCH_64
365 return Imm64(reinterpret_cast<u64>(imm));
366#else
367 return Imm32(reinterpret_cast<u32>(imm));
368#endif
369}
370
371inline u32 PtrOffset(const void* ptr, const void* base) {
372#ifdef _ARCH_64
373 s64 distance = (s64)ptr - (s64)base;
374 if (distance >= 0x80000000LL || distance < -0x80000000LL) {
375 ASSERT_MSG(0, "pointer offset out of range");
376 return 0;
377 }
378
379 return (u32)distance;
380#else
381 return (u32)ptr - (u32)base;
382#endif
383}
384
385// usage: int a[]; ARRAY_OFFSET(a,10)
386#define ARRAY_OFFSET(array, index) ((u32)((u64) & (array)[index] - (u64) & (array)[0]))
387// usage: struct {int e;} s; STRUCT_OFFSET(s,e)
388#define STRUCT_OFFSET(str, elem) ((u32)((u64) & (str).elem - (u64) & (str)))
389
390struct FixupBranch {
391 u8* ptr;
392 int type; // 0 = 8bit 1 = 32bit
393};
394
395enum SSECompare {
396 EQ = 0,
397 LT,
398 LE,
399 UNORD,
400 NEQ,
401 NLT,
402 NLE,
403 ORD,
404};
405
406class XEmitter {
407 friend struct OpArg; // for Write8 etc
408private:
409 u8* code;
410 bool flags_locked;
411
412 void CheckFlags();
413
414 void Rex(int w, int r, int x, int b);
415 void WriteSimple1Byte(int bits, u8 byte, X64Reg reg);
416 void WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg);
417 void WriteMulDivType(int bits, OpArg src, int ext);
418 void WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep = false);
419 void WriteShift(int bits, OpArg dest, const OpArg& shift, int ext);
420 void WriteBitTest(int bits, const OpArg& dest, const OpArg& index, int ext);
421 void WriteMXCSR(OpArg arg, int ext);
422 void WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0);
423 void WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0);
424 void WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0);
425 void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0);
426 void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg,
427 int extrabytes = 0);
428 void WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg,
429 int extrabytes = 0);
430 void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg,
431 int extrabytes = 0);
432 void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg,
433 int extrabytes = 0);
434 void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg);
435 void WriteNormalOp(XEmitter* emit, int bits, NormalOp op, const OpArg& a1, const OpArg& a2);
436
437 void ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size,
438 size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp);
439
440protected:
441 void Write8(u8 value);
442 void Write16(u16 value);
443 void Write32(u32 value);
444 void Write64(u64 value);
445
446public:
447 XEmitter() {
448 code = nullptr;
449 flags_locked = false;
450 }
451 XEmitter(u8* code_ptr) {
452 code = code_ptr;
453 flags_locked = false;
454 }
455 virtual ~XEmitter() {}
456
457 void WriteModRM(int mod, int rm, int reg);
458 void WriteSIB(int scale, int index, int base);
459
460 void SetCodePtr(u8* ptr);
461 void ReserveCodeSpace(int bytes);
462 const u8* AlignCode4();
463 const u8* AlignCode16();
464 const u8* AlignCodePage();
465 const u8* GetCodePtr() const;
466 u8* GetWritableCodePtr();
467
468 void LockFlags() {
469 flags_locked = true;
470 }
471 void UnlockFlags() {
472 flags_locked = false;
473 }
474
475 // Looking for one of these? It's BANNED!! Some instructions are slow on modern CPU
476 // INC, DEC, LOOP, LOOPNE, LOOPE, ENTER, LEAVE, XCHG, XLAT, REP MOVSB/MOVSD, REP SCASD + other
477 // string instr.,
478 // INC and DEC are slow on Intel Core, but not on AMD. They create a
479 // false flag dependency because they only update a subset of the flags.
480 // XCHG is SLOW and should be avoided.
481
482 // Debug breakpoint
483 void INT3();
484
485 // Do nothing
486 void NOP(size_t count = 1);
487
488 // Save energy in wait-loops on P4 only. Probably not too useful.
489 void PAUSE();
490
491 // Flag control
492 void STC();
493 void CLC();
494 void CMC();
495
496 // These two can not be executed in 64-bit mode on early Intel 64-bit CPU:s, only on Core2 and
497 // AMD!
498 void LAHF(); // 3 cycle vector path
499 void SAHF(); // direct path fast
500
501 // Stack control
502 void PUSH(X64Reg reg);
503 void POP(X64Reg reg);
504 void PUSH(int bits, const OpArg& reg);
505 void POP(int bits, const OpArg& reg);
506 void PUSHF();
507 void POPF();
508
509 // Flow control
510 void RET();
511 void RET_FAST();
512 void UD2();
513 FixupBranch J(bool force5bytes = false);
514
515 void JMP(const u8* addr, bool force5Bytes = false);
516 void JMPptr(const OpArg& arg);
517 void JMPself(); // infinite loop!
518#ifdef CALL
519#undef CALL
520#endif
521 void CALL(const void* fnptr);
522 FixupBranch CALL();
523 void CALLptr(OpArg arg);
524
525 FixupBranch J_CC(CCFlags conditionCode, bool force5bytes = false);
526 void J_CC(CCFlags conditionCode, const u8* addr, bool force5Bytes = false);
527
528 void SetJumpTarget(const FixupBranch& branch);
529 void SetJumpTarget(const FixupBranch& branch, const u8* target);
530
531 void SETcc(CCFlags flag, OpArg dest);
532 // Note: CMOV brings small if any benefit on current cpus.
533 void CMOVcc(int bits, X64Reg dest, OpArg src, CCFlags flag);
534
535 // Fences
536 void LFENCE();
537 void MFENCE();
538 void SFENCE();
539
540 // Bit scan
541 void BSF(int bits, X64Reg dest, const OpArg& src); // Bottom bit to top bit
542 void BSR(int bits, X64Reg dest, const OpArg& src); // Top bit to bottom bit
543
544 // Cache control
545 enum PrefetchLevel {
546 PF_NTA, // Non-temporal (data used once and only once)
547 PF_T0, // All cache levels
548 PF_T1, // Levels 2+ (aliased to T0 on AMD)
549 PF_T2, // Levels 3+ (aliased to T0 on AMD)
550 };
551 void PREFETCH(PrefetchLevel level, OpArg arg);
552 void MOVNTI(int bits, const OpArg& dest, X64Reg src);
553 void MOVNTDQ(const OpArg& arg, X64Reg regOp);
554 void MOVNTPS(const OpArg& arg, X64Reg regOp);
555 void MOVNTPD(const OpArg& arg, X64Reg regOp);
556
557 // Multiplication / division
558 void MUL(int bits, const OpArg& src); // UNSIGNED
559 void IMUL(int bits, const OpArg& src); // SIGNED
560 void IMUL(int bits, X64Reg regOp, const OpArg& src);
561 void IMUL(int bits, X64Reg regOp, const OpArg& src, const OpArg& imm);
562 void DIV(int bits, const OpArg& src);
563 void IDIV(int bits, const OpArg& src);
564
565 // Shift
566 void ROL(int bits, const OpArg& dest, const OpArg& shift);
567 void ROR(int bits, const OpArg& dest, const OpArg& shift);
568 void RCL(int bits, const OpArg& dest, const OpArg& shift);
569 void RCR(int bits, const OpArg& dest, const OpArg& shift);
570 void SHL(int bits, const OpArg& dest, const OpArg& shift);
571 void SHR(int bits, const OpArg& dest, const OpArg& shift);
572 void SAR(int bits, const OpArg& dest, const OpArg& shift);
573
574 // Bit Test
575 void BT(int bits, const OpArg& dest, const OpArg& index);
576 void BTS(int bits, const OpArg& dest, const OpArg& index);
577 void BTR(int bits, const OpArg& dest, const OpArg& index);
578 void BTC(int bits, const OpArg& dest, const OpArg& index);
579
580 // Double-Precision Shift
581 void SHRD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift);
582 void SHLD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift);
583
584 // Extend EAX into EDX in various ways
585 void CWD(int bits = 16);
586 void CDQ() {
587 CWD(32);
588 }
589 void CQO() {
590 CWD(64);
591 }
592 void CBW(int bits = 8);
593 void CWDE() {
594 CBW(16);
595 }
596 void CDQE() {
597 CBW(32);
598 }
599
600 // Load effective address
601 void LEA(int bits, X64Reg dest, OpArg src);
602
603 // Integer arithmetic
604 void NEG(int bits, const OpArg& src);
605 void ADD(int bits, const OpArg& a1, const OpArg& a2);
606 void ADC(int bits, const OpArg& a1, const OpArg& a2);
607 void SUB(int bits, const OpArg& a1, const OpArg& a2);
608 void SBB(int bits, const OpArg& a1, const OpArg& a2);
609 void AND(int bits, const OpArg& a1, const OpArg& a2);
610 void CMP(int bits, const OpArg& a1, const OpArg& a2);
611
612 // Bit operations
613 void NOT(int bits, const OpArg& src);
614 void OR(int bits, const OpArg& a1, const OpArg& a2);
615 void XOR(int bits, const OpArg& a1, const OpArg& a2);
616 void MOV(int bits, const OpArg& a1, const OpArg& a2);
617 void TEST(int bits, const OpArg& a1, const OpArg& a2);
618
619 // Are these useful at all? Consider removing.
620 void XCHG(int bits, const OpArg& a1, const OpArg& a2);
621 void XCHG_AHAL();
622
623 // Byte swapping (32 and 64-bit only).
624 void BSWAP(int bits, X64Reg reg);
625
626 // Sign/zero extension
627 void MOVSX(int dbits, int sbits, X64Reg dest,
628 OpArg src); // automatically uses MOVSXD if necessary
629 void MOVZX(int dbits, int sbits, X64Reg dest, OpArg src);
630
631 // Available only on Atom or >= Haswell so far. Test with GetCPUCaps().movbe.
632 void MOVBE(int dbits, const OpArg& dest, const OpArg& src);
633
634 // Available only on AMD >= Phenom or Intel >= Haswell
635 void LZCNT(int bits, X64Reg dest, const OpArg& src);
636 // Note: this one is actually part of BMI1
637 void TZCNT(int bits, X64Reg dest, const OpArg& src);
638
639 // WARNING - These two take 11-13 cycles and are VectorPath! (AMD64)
640 void STMXCSR(const OpArg& memloc);
641 void LDMXCSR(const OpArg& memloc);
642
643 // Prefixes
644 void LOCK();
645 void REP();
646 void REPNE();
647 void FSOverride();
648 void GSOverride();
649
650 // x87
651 enum x87StatusWordBits {
652 x87_InvalidOperation = 0x1,
653 x87_DenormalizedOperand = 0x2,
654 x87_DivisionByZero = 0x4,
655 x87_Overflow = 0x8,
656 x87_Underflow = 0x10,
657 x87_Precision = 0x20,
658 x87_StackFault = 0x40,
659 x87_ErrorSummary = 0x80,
660 x87_C0 = 0x100,
661 x87_C1 = 0x200,
662 x87_C2 = 0x400,
663 x87_TopOfStack = 0x2000 | 0x1000 | 0x800,
664 x87_C3 = 0x4000,
665 x87_FPUBusy = 0x8000,
666 };
667
668 void FLD(int bits, const OpArg& src);
669 void FST(int bits, const OpArg& dest);
670 void FSTP(int bits, const OpArg& dest);
671 void FNSTSW_AX();
672 void FWAIT();
673
674 // SSE/SSE2: Floating point arithmetic
675 void ADDSS(X64Reg regOp, const OpArg& arg);
676 void ADDSD(X64Reg regOp, const OpArg& arg);
677 void SUBSS(X64Reg regOp, const OpArg& arg);
678 void SUBSD(X64Reg regOp, const OpArg& arg);
679 void MULSS(X64Reg regOp, const OpArg& arg);
680 void MULSD(X64Reg regOp, const OpArg& arg);
681 void DIVSS(X64Reg regOp, const OpArg& arg);
682 void DIVSD(X64Reg regOp, const OpArg& arg);
683 void MINSS(X64Reg regOp, const OpArg& arg);
684 void MINSD(X64Reg regOp, const OpArg& arg);
685 void MAXSS(X64Reg regOp, const OpArg& arg);
686 void MAXSD(X64Reg regOp, const OpArg& arg);
687 void SQRTSS(X64Reg regOp, const OpArg& arg);
688 void SQRTSD(X64Reg regOp, const OpArg& arg);
689 void RCPSS(X64Reg regOp, const OpArg& arg);
690 void RSQRTSS(X64Reg regOp, const OpArg& arg);
691
692 // SSE/SSE2: Floating point bitwise (yes)
693 void CMPSS(X64Reg regOp, const OpArg& arg, u8 compare);
694 void CMPSD(X64Reg regOp, const OpArg& arg, u8 compare);
695
696 void CMPEQSS(X64Reg regOp, const OpArg& arg) {
697 CMPSS(regOp, arg, CMP_EQ);
698 }
699 void CMPLTSS(X64Reg regOp, const OpArg& arg) {
700 CMPSS(regOp, arg, CMP_LT);
701 }
702 void CMPLESS(X64Reg regOp, const OpArg& arg) {
703 CMPSS(regOp, arg, CMP_LE);
704 }
705 void CMPUNORDSS(X64Reg regOp, const OpArg& arg) {
706 CMPSS(regOp, arg, CMP_UNORD);
707 }
708 void CMPNEQSS(X64Reg regOp, const OpArg& arg) {
709 CMPSS(regOp, arg, CMP_NEQ);
710 }
711 void CMPNLTSS(X64Reg regOp, const OpArg& arg) {
712 CMPSS(regOp, arg, CMP_NLT);
713 }
714 void CMPORDSS(X64Reg regOp, const OpArg& arg) {
715 CMPSS(regOp, arg, CMP_ORD);
716 }
717
718 // SSE/SSE2: Floating point packed arithmetic (x4 for float, x2 for double)
719 void ADDPS(X64Reg regOp, const OpArg& arg);
720 void ADDPD(X64Reg regOp, const OpArg& arg);
721 void SUBPS(X64Reg regOp, const OpArg& arg);
722 void SUBPD(X64Reg regOp, const OpArg& arg);
723 void CMPPS(X64Reg regOp, const OpArg& arg, u8 compare);
724 void CMPPD(X64Reg regOp, const OpArg& arg, u8 compare);
725 void MULPS(X64Reg regOp, const OpArg& arg);
726 void MULPD(X64Reg regOp, const OpArg& arg);
727 void DIVPS(X64Reg regOp, const OpArg& arg);
728 void DIVPD(X64Reg regOp, const OpArg& arg);
729 void MINPS(X64Reg regOp, const OpArg& arg);
730 void MINPD(X64Reg regOp, const OpArg& arg);
731 void MAXPS(X64Reg regOp, const OpArg& arg);
732 void MAXPD(X64Reg regOp, const OpArg& arg);
733 void SQRTPS(X64Reg regOp, const OpArg& arg);
734 void SQRTPD(X64Reg regOp, const OpArg& arg);
735 void RCPPS(X64Reg regOp, const OpArg& arg);
736 void RSQRTPS(X64Reg regOp, const OpArg& arg);
737
738 // SSE/SSE2: Floating point packed bitwise (x4 for float, x2 for double)
739 void ANDPS(X64Reg regOp, const OpArg& arg);
740 void ANDPD(X64Reg regOp, const OpArg& arg);
741 void ANDNPS(X64Reg regOp, const OpArg& arg);
742 void ANDNPD(X64Reg regOp, const OpArg& arg);
743 void ORPS(X64Reg regOp, const OpArg& arg);
744 void ORPD(X64Reg regOp, const OpArg& arg);
745 void XORPS(X64Reg regOp, const OpArg& arg);
746 void XORPD(X64Reg regOp, const OpArg& arg);
747
748 // SSE/SSE2: Shuffle components. These are tricky - see Intel documentation.
749 void SHUFPS(X64Reg regOp, const OpArg& arg, u8 shuffle);
750 void SHUFPD(X64Reg regOp, const OpArg& arg, u8 shuffle);
751
752 // SSE/SSE2: Useful alternative to shuffle in some cases.
753 void MOVDDUP(X64Reg regOp, const OpArg& arg);
754
755 // SSE3: Horizontal operations in SIMD registers. Very slow! shufps-based code beats it handily
756 // on Ivy.
757 void HADDPS(X64Reg dest, const OpArg& src);
758
759 // SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg
760 // contains both a read mask and a write "mask".
761 void DPPS(X64Reg dest, const OpArg& src, u8 arg);
762
763 void UNPCKLPS(X64Reg dest, const OpArg& src);
764 void UNPCKHPS(X64Reg dest, const OpArg& src);
765 void UNPCKLPD(X64Reg dest, const OpArg& src);
766 void UNPCKHPD(X64Reg dest, const OpArg& src);
767
768 // SSE/SSE2: Compares.
769 void COMISS(X64Reg regOp, const OpArg& arg);
770 void COMISD(X64Reg regOp, const OpArg& arg);
771 void UCOMISS(X64Reg regOp, const OpArg& arg);
772 void UCOMISD(X64Reg regOp, const OpArg& arg);
773
774 // SSE/SSE2: Moves. Use the right data type for your data, in most cases.
775 void MOVAPS(X64Reg regOp, const OpArg& arg);
776 void MOVAPD(X64Reg regOp, const OpArg& arg);
777 void MOVAPS(const OpArg& arg, X64Reg regOp);
778 void MOVAPD(const OpArg& arg, X64Reg regOp);
779
780 void MOVUPS(X64Reg regOp, const OpArg& arg);
781 void MOVUPD(X64Reg regOp, const OpArg& arg);
782 void MOVUPS(const OpArg& arg, X64Reg regOp);
783 void MOVUPD(const OpArg& arg, X64Reg regOp);
784
785 void MOVDQA(X64Reg regOp, const OpArg& arg);
786 void MOVDQA(const OpArg& arg, X64Reg regOp);
787 void MOVDQU(X64Reg regOp, const OpArg& arg);
788 void MOVDQU(const OpArg& arg, X64Reg regOp);
789
790 void MOVSS(X64Reg regOp, const OpArg& arg);
791 void MOVSD(X64Reg regOp, const OpArg& arg);
792 void MOVSS(const OpArg& arg, X64Reg regOp);
793 void MOVSD(const OpArg& arg, X64Reg regOp);
794
795 void MOVLPS(X64Reg regOp, const OpArg& arg);
796 void MOVLPD(X64Reg regOp, const OpArg& arg);
797 void MOVLPS(const OpArg& arg, X64Reg regOp);
798 void MOVLPD(const OpArg& arg, X64Reg regOp);
799
800 void MOVHPS(X64Reg regOp, const OpArg& arg);
801 void MOVHPD(X64Reg regOp, const OpArg& arg);
802 void MOVHPS(const OpArg& arg, X64Reg regOp);
803 void MOVHPD(const OpArg& arg, X64Reg regOp);
804
805 void MOVHLPS(X64Reg regOp1, X64Reg regOp2);
806 void MOVLHPS(X64Reg regOp1, X64Reg regOp2);
807
808 void MOVD_xmm(X64Reg dest, const OpArg& arg);
809 void MOVQ_xmm(X64Reg dest, OpArg arg);
810 void MOVD_xmm(const OpArg& arg, X64Reg src);
811 void MOVQ_xmm(OpArg arg, X64Reg src);
812
813 // SSE/SSE2: Generates a mask from the high bits of the components of the packed register in
814 // question.
815 void MOVMSKPS(X64Reg dest, const OpArg& arg);
816 void MOVMSKPD(X64Reg dest, const OpArg& arg);
817
818 // SSE2: Selective byte store, mask in src register. EDI/RDI specifies store address. This is a
819 // weird one.
820 void MASKMOVDQU(X64Reg dest, X64Reg src);
821 void LDDQU(X64Reg dest, const OpArg& src);
822
823 // SSE/SSE2: Data type conversions.
824 void CVTPS2PD(X64Reg dest, const OpArg& src);
825 void CVTPD2PS(X64Reg dest, const OpArg& src);
826 void CVTSS2SD(X64Reg dest, const OpArg& src);
827 void CVTSI2SS(X64Reg dest, const OpArg& src);
828 void CVTSD2SS(X64Reg dest, const OpArg& src);
829 void CVTSI2SD(X64Reg dest, const OpArg& src);
830 void CVTDQ2PD(X64Reg regOp, const OpArg& arg);
831 void CVTPD2DQ(X64Reg regOp, const OpArg& arg);
832 void CVTDQ2PS(X64Reg regOp, const OpArg& arg);
833 void CVTPS2DQ(X64Reg regOp, const OpArg& arg);
834
835 void CVTTPS2DQ(X64Reg regOp, const OpArg& arg);
836 void CVTTPD2DQ(X64Reg regOp, const OpArg& arg);
837
838 // Destinations are X64 regs (rax, rbx, ...) for these instructions.
839 void CVTSS2SI(X64Reg xregdest, const OpArg& src);
840 void CVTSD2SI(X64Reg xregdest, const OpArg& src);
841 void CVTTSS2SI(X64Reg xregdest, const OpArg& arg);
842 void CVTTSD2SI(X64Reg xregdest, const OpArg& arg);
843
844 // SSE2: Packed integer instructions
845 void PACKSSDW(X64Reg dest, const OpArg& arg);
846 void PACKSSWB(X64Reg dest, const OpArg& arg);
847 void PACKUSDW(X64Reg dest, const OpArg& arg);
848 void PACKUSWB(X64Reg dest, const OpArg& arg);
849
850 void PUNPCKLBW(X64Reg dest, const OpArg& arg);
851 void PUNPCKLWD(X64Reg dest, const OpArg& arg);
852 void PUNPCKLDQ(X64Reg dest, const OpArg& arg);
853 void PUNPCKLQDQ(X64Reg dest, const OpArg& arg);
854
855 void PTEST(X64Reg dest, const OpArg& arg);
856 void PAND(X64Reg dest, const OpArg& arg);
857 void PANDN(X64Reg dest, const OpArg& arg);
858 void PXOR(X64Reg dest, const OpArg& arg);
859 void POR(X64Reg dest, const OpArg& arg);
860
861 void PADDB(X64Reg dest, const OpArg& arg);
862 void PADDW(X64Reg dest, const OpArg& arg);
863 void PADDD(X64Reg dest, const OpArg& arg);
864 void PADDQ(X64Reg dest, const OpArg& arg);
865
866 void PADDSB(X64Reg dest, const OpArg& arg);
867 void PADDSW(X64Reg dest, const OpArg& arg);
868 void PADDUSB(X64Reg dest, const OpArg& arg);
869 void PADDUSW(X64Reg dest, const OpArg& arg);
870
871 void PSUBB(X64Reg dest, const OpArg& arg);
872 void PSUBW(X64Reg dest, const OpArg& arg);
873 void PSUBD(X64Reg dest, const OpArg& arg);
874 void PSUBQ(X64Reg dest, const OpArg& arg);
875
876 void PSUBSB(X64Reg dest, const OpArg& arg);
877 void PSUBSW(X64Reg dest, const OpArg& arg);
878 void PSUBUSB(X64Reg dest, const OpArg& arg);
879 void PSUBUSW(X64Reg dest, const OpArg& arg);
880
881 void PAVGB(X64Reg dest, const OpArg& arg);
882 void PAVGW(X64Reg dest, const OpArg& arg);
883
884 void PCMPEQB(X64Reg dest, const OpArg& arg);
885 void PCMPEQW(X64Reg dest, const OpArg& arg);
886 void PCMPEQD(X64Reg dest, const OpArg& arg);
887
888 void PCMPGTB(X64Reg dest, const OpArg& arg);
889 void PCMPGTW(X64Reg dest, const OpArg& arg);
890 void PCMPGTD(X64Reg dest, const OpArg& arg);
891
892 void PEXTRW(X64Reg dest, const OpArg& arg, u8 subreg);
893 void PINSRW(X64Reg dest, const OpArg& arg, u8 subreg);
894
895 void PMADDWD(X64Reg dest, const OpArg& arg);
896 void PSADBW(X64Reg dest, const OpArg& arg);
897
898 void PMAXSW(X64Reg dest, const OpArg& arg);
899 void PMAXUB(X64Reg dest, const OpArg& arg);
900 void PMINSW(X64Reg dest, const OpArg& arg);
901 void PMINUB(X64Reg dest, const OpArg& arg);
902 // SSE4: More MAX/MIN instructions.
903 void PMINSB(X64Reg dest, const OpArg& arg);
904 void PMINSD(X64Reg dest, const OpArg& arg);
905 void PMINUW(X64Reg dest, const OpArg& arg);
906 void PMINUD(X64Reg dest, const OpArg& arg);
907 void PMAXSB(X64Reg dest, const OpArg& arg);
908 void PMAXSD(X64Reg dest, const OpArg& arg);
909 void PMAXUW(X64Reg dest, const OpArg& arg);
910 void PMAXUD(X64Reg dest, const OpArg& arg);
911
912 void PMOVMSKB(X64Reg dest, const OpArg& arg);
913 void PSHUFD(X64Reg dest, const OpArg& arg, u8 shuffle);
914 void PSHUFB(X64Reg dest, const OpArg& arg);
915
916 void PSHUFLW(X64Reg dest, const OpArg& arg, u8 shuffle);
917 void PSHUFHW(X64Reg dest, const OpArg& arg, u8 shuffle);
918
919 void PSRLW(X64Reg reg, int shift);
920 void PSRLD(X64Reg reg, int shift);
921 void PSRLQ(X64Reg reg, int shift);
922 void PSRLQ(X64Reg reg, const OpArg& arg);
923 void PSRLDQ(X64Reg reg, int shift);
924
925 void PSLLW(X64Reg reg, int shift);
926 void PSLLD(X64Reg reg, int shift);
927 void PSLLQ(X64Reg reg, int shift);
928 void PSLLDQ(X64Reg reg, int shift);
929
930 void PSRAW(X64Reg reg, int shift);
931 void PSRAD(X64Reg reg, int shift);
932
933 // SSE4: data type conversions
934 void PMOVSXBW(X64Reg dest, const OpArg& arg);
935 void PMOVSXBD(X64Reg dest, const OpArg& arg);
936 void PMOVSXBQ(X64Reg dest, const OpArg& arg);
937 void PMOVSXWD(X64Reg dest, const OpArg& arg);
938 void PMOVSXWQ(X64Reg dest, const OpArg& arg);
939 void PMOVSXDQ(X64Reg dest, const OpArg& arg);
940 void PMOVZXBW(X64Reg dest, const OpArg& arg);
941 void PMOVZXBD(X64Reg dest, const OpArg& arg);
942 void PMOVZXBQ(X64Reg dest, const OpArg& arg);
943 void PMOVZXWD(X64Reg dest, const OpArg& arg);
944 void PMOVZXWQ(X64Reg dest, const OpArg& arg);
945 void PMOVZXDQ(X64Reg dest, const OpArg& arg);
946
947 // SSE4: variable blend instructions (xmm0 implicit argument)
948 void PBLENDVB(X64Reg dest, const OpArg& arg);
949 void BLENDVPS(X64Reg dest, const OpArg& arg);
950 void BLENDVPD(X64Reg dest, const OpArg& arg);
951 void BLENDPS(X64Reg dest, const OpArg& arg, u8 blend);
952 void BLENDPD(X64Reg dest, const OpArg& arg, u8 blend);
953
954 // SSE4: rounding (see FloatRound for mode or use ROUNDNEARSS, etc. helpers.)
955 void ROUNDSS(X64Reg dest, const OpArg& arg, u8 mode);
956 void ROUNDSD(X64Reg dest, const OpArg& arg, u8 mode);
957 void ROUNDPS(X64Reg dest, const OpArg& arg, u8 mode);
958 void ROUNDPD(X64Reg dest, const OpArg& arg, u8 mode);
959
960 void ROUNDNEARSS(X64Reg dest, const OpArg& arg) {
961 ROUNDSS(dest, arg, FROUND_NEAREST);
962 }
963 void ROUNDFLOORSS(X64Reg dest, const OpArg& arg) {
964 ROUNDSS(dest, arg, FROUND_FLOOR);
965 }
966 void ROUNDCEILSS(X64Reg dest, const OpArg& arg) {
967 ROUNDSS(dest, arg, FROUND_CEIL);
968 }
969 void ROUNDZEROSS(X64Reg dest, const OpArg& arg) {
970 ROUNDSS(dest, arg, FROUND_ZERO);
971 }
972
973 void ROUNDNEARSD(X64Reg dest, const OpArg& arg) {
974 ROUNDSD(dest, arg, FROUND_NEAREST);
975 }
976 void ROUNDFLOORSD(X64Reg dest, const OpArg& arg) {
977 ROUNDSD(dest, arg, FROUND_FLOOR);
978 }
979 void ROUNDCEILSD(X64Reg dest, const OpArg& arg) {
980 ROUNDSD(dest, arg, FROUND_CEIL);
981 }
982 void ROUNDZEROSD(X64Reg dest, const OpArg& arg) {
983 ROUNDSD(dest, arg, FROUND_ZERO);
984 }
985
986 void ROUNDNEARPS(X64Reg dest, const OpArg& arg) {
987 ROUNDPS(dest, arg, FROUND_NEAREST);
988 }
989 void ROUNDFLOORPS(X64Reg dest, const OpArg& arg) {
990 ROUNDPS(dest, arg, FROUND_FLOOR);
991 }
992 void ROUNDCEILPS(X64Reg dest, const OpArg& arg) {
993 ROUNDPS(dest, arg, FROUND_CEIL);
994 }
995 void ROUNDZEROPS(X64Reg dest, const OpArg& arg) {
996 ROUNDPS(dest, arg, FROUND_ZERO);
997 }
998
999 void ROUNDNEARPD(X64Reg dest, const OpArg& arg) {
1000 ROUNDPD(dest, arg, FROUND_NEAREST);
1001 }
1002 void ROUNDFLOORPD(X64Reg dest, const OpArg& arg) {
1003 ROUNDPD(dest, arg, FROUND_FLOOR);
1004 }
1005 void ROUNDCEILPD(X64Reg dest, const OpArg& arg) {
1006 ROUNDPD(dest, arg, FROUND_CEIL);
1007 }
1008 void ROUNDZEROPD(X64Reg dest, const OpArg& arg) {
1009 ROUNDPD(dest, arg, FROUND_ZERO);
1010 }
1011
1012 // AVX
1013 void VADDSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1014 void VSUBSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1015 void VMULSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1016 void VDIVSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1017 void VADDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1018 void VSUBPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1019 void VMULPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1020 void VDIVPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1021 void VSQRTSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1022 void VSHUFPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg, u8 shuffle);
1023 void VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1024 void VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1025
1026 void VANDPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1027 void VANDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1028 void VANDNPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1029 void VANDNPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1030 void VORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1031 void VORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1032 void VXORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1033 void VXORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1034
1035 void VPAND(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1036 void VPANDN(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1037 void VPOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1038 void VPXOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1039
1040 // FMA3
1041 void VFMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1042 void VFMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1043 void VFMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1044 void VFMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1045 void VFMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1046 void VFMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1047 void VFMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1048 void VFMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1049 void VFMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1050 void VFMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1051 void VFMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1052 void VFMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1053 void VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1054 void VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1055 void VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1056 void VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1057 void VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1058 void VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1059 void VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1060 void VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1061 void VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1062 void VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1063 void VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1064 void VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1065 void VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1066 void VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1067 void VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1068 void VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1069 void VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1070 void VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1071 void VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1072 void VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1073 void VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1074 void VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1075 void VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1076 void VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1077 void VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1078 void VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1079 void VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1080 void VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1081 void VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1082 void VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1083 void VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1084 void VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1085 void VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1086 void VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1087 void VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1088 void VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1089 void VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1090 void VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1091 void VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1092 void VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1093 void VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1094 void VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1095 void VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1096 void VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1097 void VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1098 void VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1099 void VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1100 void VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1101
1102 // VEX GPR instructions
1103 void SARX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2);
1104 void SHLX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2);
1105 void SHRX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2);
1106 void RORX(int bits, X64Reg regOp, const OpArg& arg, u8 rotate);
1107 void PEXT(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1108 void PDEP(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1109 void MULX(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1110 void BZHI(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2);
1111 void BLSR(int bits, X64Reg regOp, const OpArg& arg);
1112 void BLSMSK(int bits, X64Reg regOp, const OpArg& arg);
1113 void BLSI(int bits, X64Reg regOp, const OpArg& arg);
1114 void BEXTR(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2);
1115 void ANDN(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1116
1117 void RDTSC();
1118
1119 // Utility functions
1120 // The difference between this and CALL is that this aligns the stack
1121 // where appropriate.
1122 void ABI_CallFunction(const void* func);
1123 template <typename T>
1124 void ABI_CallFunction(T (*func)()) {
1125 ABI_CallFunction((const void*)func);
1126 }
1127
1128 void ABI_CallFunction(const u8* func) {
1129 ABI_CallFunction((const void*)func);
1130 }
1131 void ABI_CallFunctionC16(const void* func, u16 param1);
1132 void ABI_CallFunctionCC16(const void* func, u32 param1, u16 param2);
1133
1134 // These only support u32 parameters, but that's enough for a lot of uses.
1135 // These will destroy the 1 or 2 first "parameter regs".
1136 void ABI_CallFunctionC(const void* func, u32 param1);
1137 void ABI_CallFunctionCC(const void* func, u32 param1, u32 param2);
1138 void ABI_CallFunctionCCC(const void* func, u32 param1, u32 param2, u32 param3);
1139 void ABI_CallFunctionCCP(const void* func, u32 param1, u32 param2, void* param3);
1140 void ABI_CallFunctionCCCP(const void* func, u32 param1, u32 param2, u32 param3, void* param4);
1141 void ABI_CallFunctionP(const void* func, void* param1);
1142 void ABI_CallFunctionPA(const void* func, void* param1, const OpArg& arg2);
1143 void ABI_CallFunctionPAA(const void* func, void* param1, const OpArg& arg2, const OpArg& arg3);
1144 void ABI_CallFunctionPPC(const void* func, void* param1, void* param2, u32 param3);
1145 void ABI_CallFunctionAC(const void* func, const OpArg& arg1, u32 param2);
1146 void ABI_CallFunctionACC(const void* func, const OpArg& arg1, u32 param2, u32 param3);
1147 void ABI_CallFunctionA(const void* func, const OpArg& arg1);
1148 void ABI_CallFunctionAA(const void* func, const OpArg& arg1, const OpArg& arg2);
1149
1150 // Pass a register as a parameter.
1151 void ABI_CallFunctionR(const void* func, X64Reg reg1);
1152 void ABI_CallFunctionRR(const void* func, X64Reg reg1, X64Reg reg2);
1153
1154 template <typename Tr, typename T1>
1155 void ABI_CallFunctionC(Tr (*func)(T1), u32 param1) {
1156 ABI_CallFunctionC((const void*)func, param1);
1157 }
1158
1159 /**
1160 * Saves specified registers and adjusts the stack to be 16-byte aligned as required by the ABI
1161 *
1162 * @param mask Registers to push on the stack (high 16 bits are XMMs, low 16 bits are GPRs)
1163 * @param rsp_alignment Current alignment of the stack pointer, must be 0 or 8
1164 * @param needed_frame_size Additional space needed, e.g., for function arguments passed on the
1165 * stack
1166 * @return Size of the shadow space, i.e., offset of the frame
1167 */
1168 size_t ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment,
1169 size_t needed_frame_size = 0);
1170
1171 /**
1172 * Restores specified registers and adjusts the stack to its original alignment, i.e., the
1173 * alignment before
1174 * the matching PushRegistersAndAdjustStack.
1175 *
1176 * @param mask Registers to restores from the stack (high 16 bits are XMMs, low 16 bits are
1177 * GPRs)
1178 * @param rsp_alignment Original alignment before the matching PushRegistersAndAdjustStack, must
1179 * be 0 or 8
1180 * @param needed_frame_size Additional space that was needed
1181 * @warning Stack must be currently 16-byte aligned
1182 */
1183 void ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment,
1184 size_t needed_frame_size = 0);
1185
1186#ifdef _M_IX86
1187 static int ABI_GetNumXMMRegs() {
1188 return 8;
1189 }
1190#else
1191 static int ABI_GetNumXMMRegs() {
1192 return 16;
1193 }
1194#endif
1195}; // class XEmitter
1196
1197// Everything that needs to generate X86 code should inherit from this.
1198// You get memory management for free, plus, you can use all the MOV etc functions without
1199// having to prefix them with gen-> or something similar.
1200
1201class XCodeBlock : public CodeBlock<XEmitter> {
1202public:
1203 void PoisonMemory() override;
1204};
1205
1206} // namespace
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index faad0a561..ffd67f074 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -2,6 +2,7 @@ set(SRCS
2 arm/disassembler/arm_disasm.cpp 2 arm/disassembler/arm_disasm.cpp
3 arm/disassembler/load_symbol_map.cpp 3 arm/disassembler/load_symbol_map.cpp
4 arm/dynarmic/arm_dynarmic.cpp 4 arm/dynarmic/arm_dynarmic.cpp
5 arm/dynarmic/arm_dynarmic_cp15.cpp
5 arm/dyncom/arm_dyncom.cpp 6 arm/dyncom/arm_dyncom.cpp
6 arm/dyncom/arm_dyncom_dec.cpp 7 arm/dyncom/arm_dyncom_dec.cpp
7 arm/dyncom/arm_dyncom_interpreter.cpp 8 arm/dyncom/arm_dyncom_interpreter.cpp
@@ -19,16 +20,19 @@ set(SRCS
19 file_sys/archive_extsavedata.cpp 20 file_sys/archive_extsavedata.cpp
20 file_sys/archive_ncch.cpp 21 file_sys/archive_ncch.cpp
21 file_sys/archive_other_savedata.cpp 22 file_sys/archive_other_savedata.cpp
22 file_sys/archive_romfs.cpp
23 file_sys/archive_savedata.cpp 23 file_sys/archive_savedata.cpp
24 file_sys/archive_sdmc.cpp 24 file_sys/archive_sdmc.cpp
25 file_sys/archive_sdmcwriteonly.cpp 25 file_sys/archive_sdmcwriteonly.cpp
26 file_sys/archive_selfncch.cpp
26 file_sys/archive_source_sd_savedata.cpp 27 file_sys/archive_source_sd_savedata.cpp
27 file_sys/archive_systemsavedata.cpp 28 file_sys/archive_systemsavedata.cpp
28 file_sys/disk_archive.cpp 29 file_sys/disk_archive.cpp
29 file_sys/ivfc_archive.cpp 30 file_sys/ivfc_archive.cpp
30 file_sys/path_parser.cpp 31 file_sys/path_parser.cpp
31 file_sys/savedata_archive.cpp 32 file_sys/savedata_archive.cpp
33 frontend/camera/blank_camera.cpp
34 frontend/camera/factory.cpp
35 frontend/camera/interface.cpp
32 frontend/emu_window.cpp 36 frontend/emu_window.cpp
33 frontend/key_map.cpp 37 frontend/key_map.cpp
34 frontend/motion_emu.cpp 38 frontend/motion_emu.cpp
@@ -37,6 +41,7 @@ set(SRCS
37 hle/applets/applet.cpp 41 hle/applets/applet.cpp
38 hle/applets/erreula.cpp 42 hle/applets/erreula.cpp
39 hle/applets/mii_selector.cpp 43 hle/applets/mii_selector.cpp
44 hle/applets/mint.cpp
40 hle/applets/swkbd.cpp 45 hle/applets/swkbd.cpp
41 hle/kernel/address_arbiter.cpp 46 hle/kernel/address_arbiter.cpp
42 hle/kernel/client_port.cpp 47 hle/kernel/client_port.cpp
@@ -154,6 +159,9 @@ set(SRCS
154 hle/service/y2r_u.cpp 159 hle/service/y2r_u.cpp
155 hle/shared_page.cpp 160 hle/shared_page.cpp
156 hle/svc.cpp 161 hle/svc.cpp
162 hw/aes/arithmetic128.cpp
163 hw/aes/ccm.cpp
164 hw/aes/key.cpp
157 hw/gpu.cpp 165 hw/gpu.cpp
158 hw/hw.cpp 166 hw/hw.cpp
159 hw/lcd.cpp 167 hw/lcd.cpp
@@ -165,6 +173,7 @@ set(SRCS
165 loader/smdh.cpp 173 loader/smdh.cpp
166 tracer/recorder.cpp 174 tracer/recorder.cpp
167 memory.cpp 175 memory.cpp
176 perf_stats.cpp
168 settings.cpp 177 settings.cpp
169 ) 178 )
170 179
@@ -173,6 +182,7 @@ set(HEADERS
173 arm/disassembler/arm_disasm.h 182 arm/disassembler/arm_disasm.h
174 arm/disassembler/load_symbol_map.h 183 arm/disassembler/load_symbol_map.h
175 arm/dynarmic/arm_dynarmic.h 184 arm/dynarmic/arm_dynarmic.h
185 arm/dynarmic/arm_dynarmic_cp15.h
176 arm/dyncom/arm_dyncom.h 186 arm/dyncom/arm_dyncom.h
177 arm/dyncom/arm_dyncom_dec.h 187 arm/dyncom/arm_dyncom_dec.h
178 arm/dyncom/arm_dyncom_interpreter.h 188 arm/dyncom/arm_dyncom_interpreter.h
@@ -191,18 +201,22 @@ set(HEADERS
191 file_sys/archive_extsavedata.h 201 file_sys/archive_extsavedata.h
192 file_sys/archive_ncch.h 202 file_sys/archive_ncch.h
193 file_sys/archive_other_savedata.h 203 file_sys/archive_other_savedata.h
194 file_sys/archive_romfs.h
195 file_sys/archive_savedata.h 204 file_sys/archive_savedata.h
196 file_sys/archive_sdmc.h 205 file_sys/archive_sdmc.h
197 file_sys/archive_sdmcwriteonly.h 206 file_sys/archive_sdmcwriteonly.h
207 file_sys/archive_selfncch.h
198 file_sys/archive_source_sd_savedata.h 208 file_sys/archive_source_sd_savedata.h
199 file_sys/archive_systemsavedata.h 209 file_sys/archive_systemsavedata.h
200 file_sys/directory_backend.h 210 file_sys/directory_backend.h
201 file_sys/disk_archive.h 211 file_sys/disk_archive.h
212 file_sys/errors.h
202 file_sys/file_backend.h 213 file_sys/file_backend.h
203 file_sys/ivfc_archive.h 214 file_sys/ivfc_archive.h
204 file_sys/path_parser.h 215 file_sys/path_parser.h
205 file_sys/savedata_archive.h 216 file_sys/savedata_archive.h
217 frontend/camera/blank_camera.h
218 frontend/camera/factory.h
219 frontend/camera/interface.h
206 frontend/emu_window.h 220 frontend/emu_window.h
207 frontend/key_map.h 221 frontend/key_map.h
208 frontend/motion_emu.h 222 frontend/motion_emu.h
@@ -210,9 +224,11 @@ set(HEADERS
210 hle/config_mem.h 224 hle/config_mem.h
211 hle/function_wrappers.h 225 hle/function_wrappers.h
212 hle/ipc.h 226 hle/ipc.h
227 hle/ipc_helpers.h
213 hle/applets/applet.h 228 hle/applets/applet.h
214 hle/applets/erreula.h 229 hle/applets/erreula.h
215 hle/applets/mii_selector.h 230 hle/applets/mii_selector.h
231 hle/applets/mint.h
216 hle/applets/swkbd.h 232 hle/applets/swkbd.h
217 hle/kernel/address_arbiter.h 233 hle/kernel/address_arbiter.h
218 hle/kernel/client_port.h 234 hle/kernel/client_port.h
@@ -331,6 +347,9 @@ set(HEADERS
331 hle/service/y2r_u.h 347 hle/service/y2r_u.h
332 hle/shared_page.h 348 hle/shared_page.h
333 hle/svc.h 349 hle/svc.h
350 hw/aes/arithmetic128.h
351 hw/aes/ccm.h
352 hw/aes/key.h
334 hw/gpu.h 353 hw/gpu.h
335 hw/hw.h 354 hw/hw.h
336 hw/lcd.h 355 hw/lcd.h
@@ -345,13 +364,15 @@ set(HEADERS
345 memory.h 364 memory.h
346 memory_setup.h 365 memory_setup.h
347 mmio.h 366 mmio.h
367 perf_stats.h
348 settings.h 368 settings.h
349 ) 369 )
350 370
351include_directories(../../externals/dynarmic/include) 371include_directories(../../externals/dynarmic/include)
372include_directories(../../externals/cryptopp)
352 373
353create_directory_groups(${SRCS} ${HEADERS}) 374create_directory_groups(${SRCS} ${HEADERS})
354 375
355add_library(core STATIC ${SRCS} ${HEADERS}) 376add_library(core STATIC ${SRCS} ${HEADERS})
356 377
357target_link_libraries(core dynarmic) 378target_link_libraries(core dynarmic cryptopp)
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp
index 9f25e3b00..7d2790b08 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -7,6 +7,7 @@
7#include "common/assert.h" 7#include "common/assert.h"
8#include "common/microprofile.h" 8#include "common/microprofile.h"
9#include "core/arm/dynarmic/arm_dynarmic.h" 9#include "core/arm/dynarmic/arm_dynarmic.h"
10#include "core/arm/dynarmic/arm_dynarmic_cp15.h"
10#include "core/arm/dyncom/arm_dyncom_interpreter.h" 11#include "core/arm/dyncom/arm_dyncom_interpreter.h"
11#include "core/core.h" 12#include "core/core.h"
12#include "core/core_timing.h" 13#include "core/core_timing.h"
@@ -39,28 +40,30 @@ static bool IsReadOnlyMemory(u32 vaddr) {
39 return false; 40 return false;
40} 41}
41 42
42static Dynarmic::UserCallbacks GetUserCallbacks(ARMul_State* interpeter_state) { 43static Dynarmic::UserCallbacks GetUserCallbacks(
44 const std::shared_ptr<ARMul_State>& interpeter_state) {
43 Dynarmic::UserCallbacks user_callbacks{}; 45 Dynarmic::UserCallbacks user_callbacks{};
44 user_callbacks.InterpreterFallback = &InterpreterFallback; 46 user_callbacks.InterpreterFallback = &InterpreterFallback;
45 user_callbacks.user_arg = static_cast<void*>(interpeter_state); 47 user_callbacks.user_arg = static_cast<void*>(interpeter_state.get());
46 user_callbacks.CallSVC = &SVC::CallSVC; 48 user_callbacks.CallSVC = &SVC::CallSVC;
47 user_callbacks.IsReadOnlyMemory = &IsReadOnlyMemory; 49 user_callbacks.memory.IsReadOnlyMemory = &IsReadOnlyMemory;
48 user_callbacks.MemoryReadCode = &Memory::Read32; 50 user_callbacks.memory.ReadCode = &Memory::Read32;
49 user_callbacks.MemoryRead8 = &Memory::Read8; 51 user_callbacks.memory.Read8 = &Memory::Read8;
50 user_callbacks.MemoryRead16 = &Memory::Read16; 52 user_callbacks.memory.Read16 = &Memory::Read16;
51 user_callbacks.MemoryRead32 = &Memory::Read32; 53 user_callbacks.memory.Read32 = &Memory::Read32;
52 user_callbacks.MemoryRead64 = &Memory::Read64; 54 user_callbacks.memory.Read64 = &Memory::Read64;
53 user_callbacks.MemoryWrite8 = &Memory::Write8; 55 user_callbacks.memory.Write8 = &Memory::Write8;
54 user_callbacks.MemoryWrite16 = &Memory::Write16; 56 user_callbacks.memory.Write16 = &Memory::Write16;
55 user_callbacks.MemoryWrite32 = &Memory::Write32; 57 user_callbacks.memory.Write32 = &Memory::Write32;
56 user_callbacks.MemoryWrite64 = &Memory::Write64; 58 user_callbacks.memory.Write64 = &Memory::Write64;
57 user_callbacks.page_table = Memory::GetCurrentPageTablePointers(); 59 user_callbacks.page_table = Memory::GetCurrentPageTablePointers();
60 user_callbacks.coprocessors[15] = std::make_shared<DynarmicCP15>(interpeter_state);
58 return user_callbacks; 61 return user_callbacks;
59} 62}
60 63
61ARM_Dynarmic::ARM_Dynarmic(PrivilegeMode initial_mode) { 64ARM_Dynarmic::ARM_Dynarmic(PrivilegeMode initial_mode) {
62 interpreter_state = std::make_unique<ARMul_State>(initial_mode); 65 interpreter_state = std::make_shared<ARMul_State>(initial_mode);
63 jit = std::make_unique<Dynarmic::Jit>(GetUserCallbacks(interpreter_state.get())); 66 jit = std::make_unique<Dynarmic::Jit>(GetUserCallbacks(interpreter_state));
64} 67}
65 68
66void ARM_Dynarmic::SetPC(u32 pc) { 69void ARM_Dynarmic::SetPC(u32 pc) {
diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic.h
index 87ab53d81..834dc989e 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.h
+++ b/src/core/arm/dynarmic/arm_dynarmic.h
@@ -39,5 +39,5 @@ public:
39 39
40private: 40private:
41 std::unique_ptr<Dynarmic::Jit> jit; 41 std::unique_ptr<Dynarmic::Jit> jit;
42 std::unique_ptr<ARMul_State> interpreter_state; 42 std::shared_ptr<ARMul_State> interpreter_state;
43}; 43};
diff --git a/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp b/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp
new file mode 100644
index 000000000..b1fdce096
--- /dev/null
+++ b/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp
@@ -0,0 +1,88 @@
1// Copyright 2017 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/arm/dynarmic/arm_dynarmic_cp15.h"
6#include "core/arm/skyeye_common/arm_regformat.h"
7#include "core/arm/skyeye_common/armstate.h"
8
9using Callback = Dynarmic::Coprocessor::Callback;
10using CallbackOrAccessOneWord = Dynarmic::Coprocessor::CallbackOrAccessOneWord;
11using CallbackOrAccessTwoWords = Dynarmic::Coprocessor::CallbackOrAccessTwoWords;
12
13DynarmicCP15::DynarmicCP15(const std::shared_ptr<ARMul_State>& state) : interpreter_state(state) {}
14
15DynarmicCP15::~DynarmicCP15() = default;
16
17boost::optional<Callback> DynarmicCP15::CompileInternalOperation(bool two, unsigned opc1,
18 CoprocReg CRd, CoprocReg CRn,
19 CoprocReg CRm, unsigned opc2) {
20 return boost::none;
21}
22
23CallbackOrAccessOneWord DynarmicCP15::CompileSendOneWord(bool two, unsigned opc1, CoprocReg CRn,
24 CoprocReg CRm, unsigned opc2) {
25 // TODO(merry): Privileged CP15 registers
26
27 if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C5 && opc2 == 4) {
28 // This is a dummy write, we ignore the value written here.
29 return &interpreter_state->CP15[CP15_FLUSH_PREFETCH_BUFFER];
30 }
31
32 if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C10) {
33 switch (opc2) {
34 case 4:
35 // This is a dummy write, we ignore the value written here.
36 return &interpreter_state->CP15[CP15_DATA_SYNC_BARRIER];
37 case 5:
38 // This is a dummy write, we ignore the value written here.
39 return &interpreter_state->CP15[CP15_DATA_MEMORY_BARRIER];
40 default:
41 return boost::blank{};
42 }
43 }
44
45 if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0 && opc2 == 2) {
46 return &interpreter_state->CP15[CP15_THREAD_UPRW];
47 }
48
49 return boost::blank{};
50}
51
52CallbackOrAccessTwoWords DynarmicCP15::CompileSendTwoWords(bool two, unsigned opc, CoprocReg CRm) {
53 return boost::blank{};
54}
55
56CallbackOrAccessOneWord DynarmicCP15::CompileGetOneWord(bool two, unsigned opc1, CoprocReg CRn,
57 CoprocReg CRm, unsigned opc2) {
58 // TODO(merry): Privileged CP15 registers
59
60 if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0) {
61 switch (opc2) {
62 case 2:
63 return &interpreter_state->CP15[CP15_THREAD_UPRW];
64 case 3:
65 return &interpreter_state->CP15[CP15_THREAD_URO];
66 default:
67 return boost::blank{};
68 }
69 }
70
71 return boost::blank{};
72}
73
74CallbackOrAccessTwoWords DynarmicCP15::CompileGetTwoWords(bool two, unsigned opc, CoprocReg CRm) {
75 return boost::blank{};
76}
77
78boost::optional<Callback> DynarmicCP15::CompileLoadWords(bool two, bool long_transfer,
79 CoprocReg CRd,
80 boost::optional<u8> option) {
81 return boost::none;
82}
83
84boost::optional<Callback> DynarmicCP15::CompileStoreWords(bool two, bool long_transfer,
85 CoprocReg CRd,
86 boost::optional<u8> option) {
87 return boost::none;
88}
diff --git a/src/core/arm/dynarmic/arm_dynarmic_cp15.h b/src/core/arm/dynarmic/arm_dynarmic_cp15.h
new file mode 100644
index 000000000..7fa54e14c
--- /dev/null
+++ b/src/core/arm/dynarmic/arm_dynarmic_cp15.h
@@ -0,0 +1,32 @@
1// Copyright 2017 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <memory>
6#include <dynarmic/coprocessor.h>
7#include "common/common_types.h"
8
9struct ARMul_State;
10
11class DynarmicCP15 final : public Dynarmic::Coprocessor {
12public:
13 explicit DynarmicCP15(const std::shared_ptr<ARMul_State>&);
14 ~DynarmicCP15() override;
15
16 boost::optional<Callback> CompileInternalOperation(bool two, unsigned opc1, CoprocReg CRd,
17 CoprocReg CRn, CoprocReg CRm,
18 unsigned opc2) override;
19 CallbackOrAccessOneWord CompileSendOneWord(bool two, unsigned opc1, CoprocReg CRn,
20 CoprocReg CRm, unsigned opc2) override;
21 CallbackOrAccessTwoWords CompileSendTwoWords(bool two, unsigned opc, CoprocReg CRm) override;
22 CallbackOrAccessOneWord CompileGetOneWord(bool two, unsigned opc1, CoprocReg CRn, CoprocReg CRm,
23 unsigned opc2) override;
24 CallbackOrAccessTwoWords CompileGetTwoWords(bool two, unsigned opc, CoprocReg CRm) override;
25 boost::optional<Callback> CompileLoadWords(bool two, bool long_transfer, CoprocReg CRd,
26 boost::optional<u8> option) override;
27 boost::optional<Callback> CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd,
28 boost::optional<u8> option) override;
29
30private:
31 std::shared_ptr<ARMul_State> interpreter_state;
32};
diff --git a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
index 67c45640a..273bc8167 100644
--- a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
+++ b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
@@ -3928,13 +3928,13 @@ SXTB16_INST : {
3928 if (inst_cream->Rn == 15) { 3928 if (inst_cream->Rn == 15) {
3929 u32 lo = (u32)(s8)rm_val; 3929 u32 lo = (u32)(s8)rm_val;
3930 u32 hi = (u32)(s8)(rm_val >> 16); 3930 u32 hi = (u32)(s8)(rm_val >> 16);
3931 RD = (lo | (hi << 16)); 3931 RD = (lo & 0xFFFF) | (hi << 16);
3932 } 3932 }
3933 // SXTAB16 3933 // SXTAB16
3934 else { 3934 else {
3935 u32 lo = (rn_val & 0xFFFF) + (u32)(s8)(rm_val & 0xFF); 3935 u32 lo = rn_val + (u32)(s8)(rm_val & 0xFF);
3936 u32 hi = ((rn_val >> 16) & 0xFFFF) + (u32)(s8)((rm_val >> 16) & 0xFF); 3936 u32 hi = (rn_val >> 16) + (u32)(s8)((rm_val >> 16) & 0xFF);
3937 RD = (lo | (hi << 16)); 3937 RD = (lo & 0xFFFF) | (hi << 16);
3938 } 3938 }
3939 } 3939 }
3940 3940
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 202cd332b..140ff6451 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -67,10 +67,6 @@ System::ResultStatus System::SingleStep() {
67} 67}
68 68
69System::ResultStatus System::Load(EmuWindow* emu_window, const std::string& filepath) { 69System::ResultStatus System::Load(EmuWindow* emu_window, const std::string& filepath) {
70 if (app_loader) {
71 app_loader.reset();
72 }
73
74 app_loader = Loader::GetLoader(filepath); 70 app_loader = Loader::GetLoader(filepath);
75 71
76 if (!app_loader) { 72 if (!app_loader) {
@@ -113,6 +109,10 @@ void System::PrepareReschedule() {
113 reschedule_pending = true; 109 reschedule_pending = true;
114} 110}
115 111
112PerfStats::Results System::GetAndResetPerfStats() {
113 return perf_stats.GetAndResetStats(CoreTiming::GetGlobalTimeUs());
114}
115
116void System::Reschedule() { 116void System::Reschedule() {
117 if (!reschedule_pending) { 117 if (!reschedule_pending) {
118 return; 118 return;
@@ -123,10 +123,6 @@ void System::Reschedule() {
123} 123}
124 124
125System::ResultStatus System::Init(EmuWindow* emu_window, u32 system_mode) { 125System::ResultStatus System::Init(EmuWindow* emu_window, u32 system_mode) {
126 if (cpu_core) {
127 cpu_core.reset();
128 }
129
130 Memory::Init(); 126 Memory::Init();
131 127
132 if (Settings::values.use_cpu_jit) { 128 if (Settings::values.use_cpu_jit) {
@@ -148,6 +144,10 @@ System::ResultStatus System::Init(EmuWindow* emu_window, u32 system_mode) {
148 144
149 LOG_DEBUG(Core, "Initialized OK"); 145 LOG_DEBUG(Core, "Initialized OK");
150 146
147 // Reset counters and set time origin to current frame
148 GetAndResetPerfStats();
149 perf_stats.BeginSystemFrame();
150
151 return ResultStatus::Success; 151 return ResultStatus::Success;
152} 152}
153 153
@@ -159,7 +159,8 @@ void System::Shutdown() {
159 Kernel::Shutdown(); 159 Kernel::Shutdown();
160 HW::Shutdown(); 160 HW::Shutdown();
161 CoreTiming::Shutdown(); 161 CoreTiming::Shutdown();
162 cpu_core.reset(); 162 cpu_core = nullptr;
163 app_loader = nullptr;
163 164
164 LOG_DEBUG(Core, "Shutdown OK"); 165 LOG_DEBUG(Core, "Shutdown OK");
165} 166}
diff --git a/src/core/core.h b/src/core/core.h
index 1015e8847..6c9c936b5 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -6,9 +6,9 @@
6 6
7#include <memory> 7#include <memory>
8#include <string> 8#include <string>
9
10#include "common/common_types.h" 9#include "common/common_types.h"
11#include "core/memory.h" 10#include "core/memory.h"
11#include "core/perf_stats.h"
12 12
13class EmuWindow; 13class EmuWindow;
14class ARM_Interface; 14class ARM_Interface;
@@ -83,6 +83,8 @@ public:
83 /// Prepare the core emulation for a reschedule 83 /// Prepare the core emulation for a reschedule
84 void PrepareReschedule(); 84 void PrepareReschedule();
85 85
86 PerfStats::Results GetAndResetPerfStats();
87
86 /** 88 /**
87 * Gets a reference to the emulated CPU. 89 * Gets a reference to the emulated CPU.
88 * @returns A reference to the emulated CPU. 90 * @returns A reference to the emulated CPU.
@@ -91,6 +93,9 @@ public:
91 return *cpu_core; 93 return *cpu_core;
92 } 94 }
93 95
96 PerfStats perf_stats;
97 FrameLimiter frame_limiter;
98
94private: 99private:
95 /** 100 /**
96 * Initialize the emulated system. 101 * Initialize the emulated system.
@@ -115,7 +120,7 @@ private:
115 static System s_instance; 120 static System s_instance;
116}; 121};
117 122
118static ARM_Interface& CPU() { 123inline ARM_Interface& CPU() {
119 return System::GetInstance().CPU(); 124 return System::GetInstance().CPU();
120} 125}
121 126
diff --git a/src/core/file_sys/archive_extsavedata.cpp b/src/core/file_sys/archive_extsavedata.cpp
index 51ce78435..dd2fb167f 100644
--- a/src/core/file_sys/archive_extsavedata.cpp
+++ b/src/core/file_sys/archive_extsavedata.cpp
@@ -107,6 +107,8 @@ public:
107 case PathParser::NotFound: 107 case PathParser::NotFound:
108 LOG_ERROR(Service_FS, "%s not found", full_path.c_str()); 108 LOG_ERROR(Service_FS, "%s not found", full_path.c_str());
109 return ERROR_FILE_NOT_FOUND; 109 return ERROR_FILE_NOT_FOUND;
110 case PathParser::FileFound:
111 break; // Expected 'success' case
110 } 112 }
111 113
112 FileUtil::IOFile file(full_path, "r+b"); 114 FileUtil::IOFile file(full_path, "r+b");
diff --git a/src/core/file_sys/archive_extsavedata.h b/src/core/file_sys/archive_extsavedata.h
index 6a3431e94..f705ade1c 100644
--- a/src/core/file_sys/archive_extsavedata.h
+++ b/src/core/file_sys/archive_extsavedata.h
@@ -52,7 +52,7 @@ private:
52 52
53 /** 53 /**
54 * This holds the full directory path for this archive, it is only set after a successful call 54 * This holds the full directory path for this archive, it is only set after a successful call
55 * to Open, this is formed as <base extsavedatapath>/<type>/<high>/<low>. 55 * to Open, this is formed as `<base extsavedatapath>/<type>/<high>/<low>`.
56 * See GetExtSaveDataPath for the code that extracts this data from an archive path. 56 * See GetExtSaveDataPath for the code that extracts this data from an archive path.
57 */ 57 */
58 std::string mount_point; 58 std::string mount_point;
diff --git a/src/core/file_sys/archive_romfs.cpp b/src/core/file_sys/archive_romfs.cpp
deleted file mode 100644
index 6c99ca5b4..000000000
--- a/src/core/file_sys/archive_romfs.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <memory>
7#include "common/common_types.h"
8#include "common/logging/log.h"
9#include "core/file_sys/archive_romfs.h"
10#include "core/file_sys/ivfc_archive.h"
11
12////////////////////////////////////////////////////////////////////////////////////////////////////
13// FileSys namespace
14
15namespace FileSys {
16
17ArchiveFactory_RomFS::ArchiveFactory_RomFS(Loader::AppLoader& app_loader) {
18 // Load the RomFS from the app
19 if (Loader::ResultStatus::Success != app_loader.ReadRomFS(romfs_file, data_offset, data_size)) {
20 LOG_ERROR(Service_FS, "Unable to read RomFS!");
21 }
22}
23
24ResultVal<std::unique_ptr<ArchiveBackend>> ArchiveFactory_RomFS::Open(const Path& path) {
25 auto archive = std::make_unique<IVFCArchive>(romfs_file, data_offset, data_size);
26 return MakeResult<std::unique_ptr<ArchiveBackend>>(std::move(archive));
27}
28
29ResultCode ArchiveFactory_RomFS::Format(const Path& path,
30 const FileSys::ArchiveFormatInfo& format_info) {
31 LOG_ERROR(Service_FS, "Attempted to format a RomFS archive.");
32 // TODO: Verify error code
33 return ResultCode(ErrorDescription::NotAuthorized, ErrorModule::FS, ErrorSummary::NotSupported,
34 ErrorLevel::Permanent);
35}
36
37ResultVal<ArchiveFormatInfo> ArchiveFactory_RomFS::GetFormatInfo(const Path& path) const {
38 // TODO(Subv): Implement
39 LOG_ERROR(Service_FS, "Unimplemented GetFormatInfo archive %s", GetName().c_str());
40 return ResultCode(-1);
41}
42
43} // namespace FileSys
diff --git a/src/core/file_sys/archive_sdmc.cpp b/src/core/file_sys/archive_sdmc.cpp
index 333dfb92e..72ff05c65 100644
--- a/src/core/file_sys/archive_sdmc.cpp
+++ b/src/core/file_sys/archive_sdmc.cpp
@@ -72,6 +72,8 @@ ResultVal<std::unique_ptr<FileBackend>> SDMCArchive::OpenFileBase(const Path& pa
72 FileUtil::CreateEmptyFile(full_path); 72 FileUtil::CreateEmptyFile(full_path);
73 } 73 }
74 break; 74 break;
75 case PathParser::FileFound:
76 break; // Expected 'success' case
75 } 77 }
76 78
77 FileUtil::IOFile file(full_path, mode.write_flag ? "r+b" : "rb"); 79 FileUtil::IOFile file(full_path, mode.write_flag ? "r+b" : "rb");
@@ -106,6 +108,8 @@ ResultCode SDMCArchive::DeleteFile(const Path& path) const {
106 case PathParser::DirectoryFound: 108 case PathParser::DirectoryFound:
107 LOG_ERROR(Service_FS, "%s is not a file", full_path.c_str()); 109 LOG_ERROR(Service_FS, "%s is not a file", full_path.c_str());
108 return ERROR_UNEXPECTED_FILE_OR_DIRECTORY_SDMC; 110 return ERROR_UNEXPECTED_FILE_OR_DIRECTORY_SDMC;
111 case PathParser::FileFound:
112 break; // Expected 'success' case
109 } 113 }
110 114
111 if (FileUtil::Delete(full_path)) { 115 if (FileUtil::Delete(full_path)) {
@@ -154,6 +158,8 @@ static ResultCode DeleteDirectoryHelper(const Path& path, const std::string& mou
154 case PathParser::FileFound: 158 case PathParser::FileFound:
155 LOG_ERROR(Service_FS, "Unexpected file in path %s", full_path.c_str()); 159 LOG_ERROR(Service_FS, "Unexpected file in path %s", full_path.c_str());
156 return ERROR_UNEXPECTED_FILE_OR_DIRECTORY_SDMC; 160 return ERROR_UNEXPECTED_FILE_OR_DIRECTORY_SDMC;
161 case PathParser::DirectoryFound:
162 break; // Expected 'success' case
157 } 163 }
158 164
159 if (deleter(full_path)) { 165 if (deleter(full_path)) {
@@ -197,6 +203,8 @@ ResultCode SDMCArchive::CreateFile(const FileSys::Path& path, u64 size) const {
197 case PathParser::FileFound: 203 case PathParser::FileFound:
198 LOG_ERROR(Service_FS, "%s already exists", full_path.c_str()); 204 LOG_ERROR(Service_FS, "%s already exists", full_path.c_str());
199 return ERROR_ALREADY_EXISTS; 205 return ERROR_ALREADY_EXISTS;
206 case PathParser::NotFound:
207 break; // Expected 'success' case
200 } 208 }
201 209
202 if (size == 0) { 210 if (size == 0) {
@@ -238,6 +246,8 @@ ResultCode SDMCArchive::CreateDirectory(const Path& path) const {
238 case PathParser::FileFound: 246 case PathParser::FileFound:
239 LOG_ERROR(Service_FS, "%s already exists", full_path.c_str()); 247 LOG_ERROR(Service_FS, "%s already exists", full_path.c_str());
240 return ERROR_ALREADY_EXISTS; 248 return ERROR_ALREADY_EXISTS;
249 case PathParser::NotFound:
250 break; // Expected 'success' case
241 } 251 }
242 252
243 if (FileUtil::CreateDir(mount_point + path.AsString())) { 253 if (FileUtil::CreateDir(mount_point + path.AsString())) {
@@ -281,6 +291,8 @@ ResultVal<std::unique_ptr<DirectoryBackend>> SDMCArchive::OpenDirectory(const Pa
281 case PathParser::FileInPath: 291 case PathParser::FileInPath:
282 LOG_ERROR(Service_FS, "Unexpected file in path %s", full_path.c_str()); 292 LOG_ERROR(Service_FS, "Unexpected file in path %s", full_path.c_str());
283 return ERROR_UNEXPECTED_FILE_OR_DIRECTORY_SDMC; 293 return ERROR_UNEXPECTED_FILE_OR_DIRECTORY_SDMC;
294 case PathParser::DirectoryFound:
295 break; // Expected 'success' case
284 } 296 }
285 297
286 auto directory = std::make_unique<DiskDirectory>(full_path); 298 auto directory = std::make_unique<DiskDirectory>(full_path);
diff --git a/src/core/file_sys/archive_selfncch.cpp b/src/core/file_sys/archive_selfncch.cpp
new file mode 100644
index 000000000..298a37a44
--- /dev/null
+++ b/src/core/file_sys/archive_selfncch.cpp
@@ -0,0 +1,257 @@
1// Copyright 2017 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include "common/common_types.h"
7#include "common/logging/log.h"
8#include "common/swap.h"
9#include "core/file_sys/archive_selfncch.h"
10#include "core/file_sys/errors.h"
11#include "core/file_sys/ivfc_archive.h"
12
13////////////////////////////////////////////////////////////////////////////////////////////////////
14// FileSys namespace
15
16namespace FileSys {
17
18enum class SelfNCCHFilePathType : u32 {
19 RomFS = 0,
20 Code = 1, // This is not supported by SelfNCCHArchive but by archive 0x2345678E
21 ExeFS = 2,
22 UpdateRomFS = 5, // This is presumably for accessing the RomFS of the update patch.
23};
24
25struct SelfNCCHFilePath {
26 u32_le type;
27 std::array<char, 8> exefs_filename;
28};
29static_assert(sizeof(SelfNCCHFilePath) == 12, "NCCHFilePath has wrong size!");
30
31// A read-only file created from a block of data. It only allows you to read the entire file at
32// once, in a single read operation.
33class ExeFSSectionFile final : public FileBackend {
34public:
35 explicit ExeFSSectionFile(std::shared_ptr<std::vector<u8>> data_) : data(std::move(data_)) {}
36
37 ResultVal<size_t> Read(u64 offset, size_t length, u8* buffer) const override {
38 if (offset != 0) {
39 LOG_ERROR(Service_FS, "offset must be zero!");
40 return ERROR_UNSUPPORTED_OPEN_FLAGS;
41 }
42
43 if (length != data->size()) {
44 LOG_ERROR(Service_FS, "size must match the file size!");
45 return ERROR_INCORRECT_EXEFS_READ_SIZE;
46 }
47
48 std::memcpy(buffer, data->data(), data->size());
49 return MakeResult<size_t>(data->size());
50 }
51
52 ResultVal<size_t> Write(u64 offset, size_t length, bool flush,
53 const u8* buffer) const override {
54 LOG_ERROR(Service_FS, "The file is read-only!");
55 return ERROR_UNSUPPORTED_OPEN_FLAGS;
56 }
57
58 u64 GetSize() const override {
59 return data->size();
60 }
61
62 bool SetSize(u64 size) const override {
63 return false;
64 }
65
66 bool Close() const override {
67 return true;
68 }
69
70 void Flush() const override {}
71
72private:
73 std::shared_ptr<std::vector<u8>> data;
74};
75
76// SelfNCCHArchive represents the running application itself. From this archive the application can
77// open RomFS and ExeFS, excluding the .code section.
78class SelfNCCHArchive final : public ArchiveBackend {
79public:
80 explicit SelfNCCHArchive(const NCCHData& ncch_data_) : ncch_data(ncch_data_) {}
81
82 std::string GetName() const override {
83 return "SelfNCCHArchive";
84 }
85
86 ResultVal<std::unique_ptr<FileBackend>> OpenFile(const Path& path, const Mode&) const override {
87 // Note: SelfNCCHArchive doesn't check the open mode.
88
89 if (path.GetType() != LowPathType::Binary) {
90 LOG_ERROR(Service_FS, "Path need to be Binary");
91 return ERROR_INVALID_PATH;
92 }
93
94 std::vector<u8> binary = path.AsBinary();
95 if (binary.size() != sizeof(SelfNCCHFilePath)) {
96 LOG_ERROR(Service_FS, "Wrong path size %zu", binary.size());
97 return ERROR_INVALID_PATH;
98 }
99
100 SelfNCCHFilePath file_path;
101 std::memcpy(&file_path, binary.data(), sizeof(SelfNCCHFilePath));
102
103 switch (static_cast<SelfNCCHFilePathType>(file_path.type)) {
104 case SelfNCCHFilePathType::UpdateRomFS:
105 LOG_WARNING(Service_FS, "(STUBBED) open update RomFS");
106 return OpenRomFS();
107
108 case SelfNCCHFilePathType::RomFS:
109 return OpenRomFS();
110
111 case SelfNCCHFilePathType::Code:
112 LOG_ERROR(Service_FS, "Reading the code section is not supported!");
113 return ERROR_COMMAND_NOT_ALLOWED;
114
115 case SelfNCCHFilePathType::ExeFS: {
116 const auto& raw = file_path.exefs_filename;
117 auto end = std::find(raw.begin(), raw.end(), '\0');
118 std::string filename(raw.begin(), end);
119 return OpenExeFS(filename);
120 }
121 default:
122 LOG_ERROR(Service_FS, "Unknown file type %u!", static_cast<u32>(file_path.type));
123 return ERROR_INVALID_PATH;
124 }
125 }
126
127 ResultCode DeleteFile(const Path& path) const override {
128 LOG_ERROR(Service_FS, "Unsupported");
129 return ERROR_UNSUPPORTED_OPEN_FLAGS;
130 }
131
132 ResultCode RenameFile(const Path& src_path, const Path& dest_path) const override {
133 LOG_ERROR(Service_FS, "Unsupported");
134 return ERROR_UNSUPPORTED_OPEN_FLAGS;
135 }
136
137 ResultCode DeleteDirectory(const Path& path) const override {
138 LOG_ERROR(Service_FS, "Unsupported");
139 return ERROR_UNSUPPORTED_OPEN_FLAGS;
140 }
141
142 ResultCode DeleteDirectoryRecursively(const Path& path) const override {
143 LOG_ERROR(Service_FS, "Unsupported");
144 return ERROR_UNSUPPORTED_OPEN_FLAGS;
145 }
146
147 ResultCode CreateFile(const Path& path, u64 size) const override {
148 LOG_ERROR(Service_FS, "Unsupported");
149 return ERROR_UNSUPPORTED_OPEN_FLAGS;
150 }
151
152 ResultCode CreateDirectory(const Path& path) const override {
153 LOG_ERROR(Service_FS, "Unsupported");
154 return ERROR_UNSUPPORTED_OPEN_FLAGS;
155 }
156
157 ResultCode RenameDirectory(const Path& src_path, const Path& dest_path) const override {
158 LOG_ERROR(Service_FS, "Unsupported");
159 return ERROR_UNSUPPORTED_OPEN_FLAGS;
160 }
161
162 ResultVal<std::unique_ptr<DirectoryBackend>> OpenDirectory(const Path& path) const override {
163 LOG_ERROR(Service_FS, "Unsupported");
164 return ERROR_UNSUPPORTED_OPEN_FLAGS;
165 }
166
167 u64 GetFreeBytes() const override {
168 return 0;
169 }
170
171private:
172 ResultVal<std::unique_ptr<FileBackend>> OpenRomFS() const {
173 if (ncch_data.romfs_file) {
174 return MakeResult<std::unique_ptr<FileBackend>>(std::make_unique<IVFCFile>(
175 ncch_data.romfs_file, ncch_data.romfs_offset, ncch_data.romfs_size));
176 } else {
177 LOG_INFO(Service_FS, "Unable to read RomFS");
178 return ERROR_ROMFS_NOT_FOUND;
179 }
180 }
181
182 ResultVal<std::unique_ptr<FileBackend>> OpenExeFS(const std::string& filename) const {
183 if (filename == "icon") {
184 if (ncch_data.icon) {
185 return MakeResult<std::unique_ptr<FileBackend>>(
186 std::make_unique<ExeFSSectionFile>(ncch_data.icon));
187 }
188
189 LOG_WARNING(Service_FS, "Unable to read icon");
190 return ERROR_EXEFS_SECTION_NOT_FOUND;
191 }
192
193 if (filename == "logo") {
194 if (ncch_data.logo) {
195 return MakeResult<std::unique_ptr<FileBackend>>(
196 std::make_unique<ExeFSSectionFile>(ncch_data.logo));
197 }
198
199 LOG_WARNING(Service_FS, "Unable to read logo");
200 return ERROR_EXEFS_SECTION_NOT_FOUND;
201 }
202
203 if (filename == "banner") {
204 if (ncch_data.banner) {
205 return MakeResult<std::unique_ptr<FileBackend>>(
206 std::make_unique<ExeFSSectionFile>(ncch_data.banner));
207 }
208
209 LOG_WARNING(Service_FS, "Unable to read banner");
210 return ERROR_EXEFS_SECTION_NOT_FOUND;
211 }
212
213 LOG_ERROR(Service_FS, "Unknown ExeFS section %s!", filename.c_str());
214 return ERROR_INVALID_PATH;
215 }
216
217 NCCHData ncch_data;
218};
219
220ArchiveFactory_SelfNCCH::ArchiveFactory_SelfNCCH(Loader::AppLoader& app_loader) {
221 std::shared_ptr<FileUtil::IOFile> romfs_file_;
222 if (Loader::ResultStatus::Success ==
223 app_loader.ReadRomFS(romfs_file_, ncch_data.romfs_offset, ncch_data.romfs_size)) {
224
225 ncch_data.romfs_file = std::move(romfs_file_);
226 }
227
228 std::vector<u8> buffer;
229
230 if (Loader::ResultStatus::Success == app_loader.ReadIcon(buffer))
231 ncch_data.icon = std::make_shared<std::vector<u8>>(std::move(buffer));
232
233 buffer.clear();
234 if (Loader::ResultStatus::Success == app_loader.ReadLogo(buffer))
235 ncch_data.logo = std::make_shared<std::vector<u8>>(std::move(buffer));
236
237 buffer.clear();
238 if (Loader::ResultStatus::Success == app_loader.ReadBanner(buffer))
239 ncch_data.banner = std::make_shared<std::vector<u8>>(std::move(buffer));
240}
241
242ResultVal<std::unique_ptr<ArchiveBackend>> ArchiveFactory_SelfNCCH::Open(const Path& path) {
243 auto archive = std::make_unique<SelfNCCHArchive>(ncch_data);
244 return MakeResult<std::unique_ptr<ArchiveBackend>>(std::move(archive));
245}
246
247ResultCode ArchiveFactory_SelfNCCH::Format(const Path&, const FileSys::ArchiveFormatInfo&) {
248 LOG_ERROR(Service_FS, "Attempted to format a SelfNCCH archive.");
249 return ERROR_INVALID_PATH;
250}
251
252ResultVal<ArchiveFormatInfo> ArchiveFactory_SelfNCCH::GetFormatInfo(const Path&) const {
253 LOG_ERROR(Service_FS, "Attempted to get format info of a SelfNCCH archive");
254 return ERROR_INVALID_PATH;
255}
256
257} // namespace FileSys
diff --git a/src/core/file_sys/archive_romfs.h b/src/core/file_sys/archive_selfncch.h
index 1eaf99b54..f1b971296 100644
--- a/src/core/file_sys/archive_romfs.h
+++ b/src/core/file_sys/archive_selfncch.h
@@ -1,4 +1,4 @@
1// Copyright 2014 Citra Emulator Project 1// Copyright 2017 Citra Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
@@ -17,22 +17,29 @@
17 17
18namespace FileSys { 18namespace FileSys {
19 19
20/// File system interface to the RomFS archive 20struct NCCHData {
21class ArchiveFactory_RomFS final : public ArchiveFactory { 21 std::shared_ptr<std::vector<u8>> icon;
22 std::shared_ptr<std::vector<u8>> logo;
23 std::shared_ptr<std::vector<u8>> banner;
24 std::shared_ptr<FileUtil::IOFile> romfs_file;
25 u64 romfs_offset = 0;
26 u64 romfs_size = 0;
27};
28
29/// File system interface to the SelfNCCH archive
30class ArchiveFactory_SelfNCCH final : public ArchiveFactory {
22public: 31public:
23 explicit ArchiveFactory_RomFS(Loader::AppLoader& app_loader); 32 explicit ArchiveFactory_SelfNCCH(Loader::AppLoader& app_loader);
24 33
25 std::string GetName() const override { 34 std::string GetName() const override {
26 return "RomFS"; 35 return "SelfNCCH";
27 } 36 }
28 ResultVal<std::unique_ptr<ArchiveBackend>> Open(const Path& path) override; 37 ResultVal<std::unique_ptr<ArchiveBackend>> Open(const Path& path) override;
29 ResultCode Format(const Path& path, const FileSys::ArchiveFormatInfo& format_info) override; 38 ResultCode Format(const Path& path, const FileSys::ArchiveFormatInfo& format_info) override;
30 ResultVal<ArchiveFormatInfo> GetFormatInfo(const Path& path) const override; 39 ResultVal<ArchiveFormatInfo> GetFormatInfo(const Path& path) const override;
31 40
32private: 41private:
33 std::shared_ptr<FileUtil::IOFile> romfs_file; 42 NCCHData ncch_data;
34 u64 data_offset;
35 u64 data_size;
36}; 43};
37 44
38} // namespace FileSys 45} // namespace FileSys
diff --git a/src/core/file_sys/errors.h b/src/core/file_sys/errors.h
index 4d5f62b08..9fc8d753b 100644
--- a/src/core/file_sys/errors.h
+++ b/src/core/file_sys/errors.h
@@ -39,5 +39,15 @@ const ResultCode ERROR_DIRECTORY_NOT_EMPTY(ErrorDescription::FS_DirectoryNotEmpt
39const ResultCode ERROR_GAMECARD_NOT_INSERTED(ErrorDescription::FS_GameCardNotInserted, 39const ResultCode ERROR_GAMECARD_NOT_INSERTED(ErrorDescription::FS_GameCardNotInserted,
40 ErrorModule::FS, ErrorSummary::NotFound, 40 ErrorModule::FS, ErrorSummary::NotFound,
41 ErrorLevel::Status); 41 ErrorLevel::Status);
42const ResultCode ERROR_INCORRECT_EXEFS_READ_SIZE(ErrorDescription::FS_IncorrectExeFSReadSize,
43 ErrorModule::FS, ErrorSummary::NotSupported,
44 ErrorLevel::Usage);
45const ResultCode ERROR_ROMFS_NOT_FOUND(ErrorDescription::FS_RomFSNotFound, ErrorModule::FS,
46 ErrorSummary::NotFound, ErrorLevel::Status);
47const ResultCode ERROR_COMMAND_NOT_ALLOWED(ErrorDescription::FS_CommandNotAllowed, ErrorModule::FS,
48 ErrorSummary::WrongArgument, ErrorLevel::Permanent);
49const ResultCode ERROR_EXEFS_SECTION_NOT_FOUND(ErrorDescription::FS_ExeFSSectionNotFound,
50 ErrorModule::FS, ErrorSummary::NotFound,
51 ErrorLevel::Status);
42 52
43} // namespace FileSys 53} // namespace FileSys
diff --git a/src/core/file_sys/savedata_archive.cpp b/src/core/file_sys/savedata_archive.cpp
index f2e6a06bc..f540c4a93 100644
--- a/src/core/file_sys/savedata_archive.cpp
+++ b/src/core/file_sys/savedata_archive.cpp
@@ -57,6 +57,8 @@ ResultVal<std::unique_ptr<FileBackend>> SaveDataArchive::OpenFile(const Path& pa
57 FileUtil::CreateEmptyFile(full_path); 57 FileUtil::CreateEmptyFile(full_path);
58 } 58 }
59 break; 59 break;
60 case PathParser::FileFound:
61 break; // Expected 'success' case
60 } 62 }
61 63
62 FileUtil::IOFile file(full_path, mode.write_flag ? "r+b" : "rb"); 64 FileUtil::IOFile file(full_path, mode.write_flag ? "r+b" : "rb");
@@ -91,6 +93,8 @@ ResultCode SaveDataArchive::DeleteFile(const Path& path) const {
91 case PathParser::NotFound: 93 case PathParser::NotFound:
92 LOG_ERROR(Service_FS, "File not found %s", full_path.c_str()); 94 LOG_ERROR(Service_FS, "File not found %s", full_path.c_str());
93 return ERROR_FILE_NOT_FOUND; 95 return ERROR_FILE_NOT_FOUND;
96 case PathParser::FileFound:
97 break; // Expected 'success' case
94 } 98 }
95 99
96 if (FileUtil::Delete(full_path)) { 100 if (FileUtil::Delete(full_path)) {
@@ -139,6 +143,8 @@ static ResultCode DeleteDirectoryHelper(const Path& path, const std::string& mou
139 case PathParser::FileFound: 143 case PathParser::FileFound:
140 LOG_ERROR(Service_FS, "Unexpected file or directory %s", full_path.c_str()); 144 LOG_ERROR(Service_FS, "Unexpected file or directory %s", full_path.c_str());
141 return ERROR_UNEXPECTED_FILE_OR_DIRECTORY; 145 return ERROR_UNEXPECTED_FILE_OR_DIRECTORY;
146 case PathParser::DirectoryFound:
147 break; // Expected 'success' case
142 } 148 }
143 149
144 if (deleter(full_path)) { 150 if (deleter(full_path)) {
@@ -182,6 +188,8 @@ ResultCode SaveDataArchive::CreateFile(const FileSys::Path& path, u64 size) cons
182 case PathParser::FileFound: 188 case PathParser::FileFound:
183 LOG_ERROR(Service_FS, "%s already exists", full_path.c_str()); 189 LOG_ERROR(Service_FS, "%s already exists", full_path.c_str());
184 return ERROR_FILE_ALREADY_EXISTS; 190 return ERROR_FILE_ALREADY_EXISTS;
191 case PathParser::NotFound:
192 break; // Expected 'success' case
185 } 193 }
186 194
187 if (size == 0) { 195 if (size == 0) {
@@ -225,6 +233,8 @@ ResultCode SaveDataArchive::CreateDirectory(const Path& path) const {
225 case PathParser::FileFound: 233 case PathParser::FileFound:
226 LOG_ERROR(Service_FS, "%s already exists", full_path.c_str()); 234 LOG_ERROR(Service_FS, "%s already exists", full_path.c_str());
227 return ERROR_DIRECTORY_ALREADY_EXISTS; 235 return ERROR_DIRECTORY_ALREADY_EXISTS;
236 case PathParser::NotFound:
237 break; // Expected 'success' case
228 } 238 }
229 239
230 if (FileUtil::CreateDir(mount_point + path.AsString())) { 240 if (FileUtil::CreateDir(mount_point + path.AsString())) {
@@ -269,6 +279,8 @@ ResultVal<std::unique_ptr<DirectoryBackend>> SaveDataArchive::OpenDirectory(
269 case PathParser::FileFound: 279 case PathParser::FileFound:
270 LOG_ERROR(Service_FS, "Unexpected file in path %s", full_path.c_str()); 280 LOG_ERROR(Service_FS, "Unexpected file in path %s", full_path.c_str());
271 return ERROR_UNEXPECTED_FILE_OR_DIRECTORY; 281 return ERROR_UNEXPECTED_FILE_OR_DIRECTORY;
282 case PathParser::DirectoryFound:
283 break; // Expected 'success' case
272 } 284 }
273 285
274 auto directory = std::make_unique<DiskDirectory>(full_path); 286 auto directory = std::make_unique<DiskDirectory>(full_path);
diff --git a/src/core/frontend/camera/blank_camera.cpp b/src/core/frontend/camera/blank_camera.cpp
new file mode 100644
index 000000000..7995abcbd
--- /dev/null
+++ b/src/core/frontend/camera/blank_camera.cpp
@@ -0,0 +1,31 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/frontend/camera/blank_camera.h"
6
7namespace Camera {
8
9void BlankCamera::StartCapture() {}
10
11void BlankCamera::StopCapture() {}
12
13void BlankCamera::SetFormat(Service::CAM::OutputFormat output_format) {
14 output_rgb = output_format == Service::CAM::OutputFormat::RGB565;
15}
16
17void BlankCamera::SetResolution(const Service::CAM::Resolution& resolution) {
18 width = resolution.width;
19 height = resolution.height;
20};
21
22void BlankCamera::SetFlip(Service::CAM::Flip) {}
23
24void BlankCamera::SetEffect(Service::CAM::Effect) {}
25
26std::vector<u16> BlankCamera::ReceiveFrame() const {
27 // Note: 0x80008000 stands for two black pixels in YUV422
28 return std::vector<u16>(width * height, output_rgb ? 0 : 0x8000);
29}
30
31} // namespace Camera
diff --git a/src/core/frontend/camera/blank_camera.h b/src/core/frontend/camera/blank_camera.h
new file mode 100644
index 000000000..c6619bd88
--- /dev/null
+++ b/src/core/frontend/camera/blank_camera.h
@@ -0,0 +1,28 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "core/frontend/camera/factory.h"
8#include "core/frontend/camera/interface.h"
9
10namespace Camera {
11
12class BlankCamera final : public CameraInterface {
13public:
14 void StartCapture() override;
15 void StopCapture() override;
16 void SetResolution(const Service::CAM::Resolution&) override;
17 void SetFlip(Service::CAM::Flip) override;
18 void SetEffect(Service::CAM::Effect) override;
19 void SetFormat(Service::CAM::OutputFormat) override;
20 std::vector<u16> ReceiveFrame() const override;
21
22private:
23 int width = 0;
24 int height = 0;
25 bool output_rgb = false;
26};
27
28} // namespace Camera
diff --git a/src/core/frontend/camera/factory.cpp b/src/core/frontend/camera/factory.cpp
new file mode 100644
index 000000000..4b4da50dd
--- /dev/null
+++ b/src/core/frontend/camera/factory.cpp
@@ -0,0 +1,32 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <unordered_map>
6#include "common/logging/log.h"
7#include "core/frontend/camera/blank_camera.h"
8#include "core/frontend/camera/factory.h"
9
10namespace Camera {
11
12static std::unordered_map<std::string, std::unique_ptr<CameraFactory>> factories;
13
14CameraFactory::~CameraFactory() = default;
15
16void RegisterFactory(const std::string& name, std::unique_ptr<CameraFactory> factory) {
17 factories[name] = std::move(factory);
18}
19
20std::unique_ptr<CameraInterface> CreateCamera(const std::string& name, const std::string& config) {
21 auto pair = factories.find(name);
22 if (pair != factories.end()) {
23 return pair->second->Create(config);
24 }
25
26 if (name != "blank") {
27 LOG_ERROR(Service_CAM, "Unknown camera \"%s\"", name.c_str());
28 }
29 return std::make_unique<BlankCamera>();
30}
31
32} // namespace Camera
diff --git a/src/core/frontend/camera/factory.h b/src/core/frontend/camera/factory.h
new file mode 100644
index 000000000..f46413fa7
--- /dev/null
+++ b/src/core/frontend/camera/factory.h
@@ -0,0 +1,41 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <string>
9#include "core/frontend/camera/interface.h"
10
11namespace Camera {
12
13class CameraFactory {
14public:
15 virtual ~CameraFactory();
16
17 /**
18 * Creates a camera object based on the configuration string.
19 * @param config Configuration string to create the camera. The implementation can decide the
20 * meaning of this string.
21 * @returns a unique_ptr to the created camera object.
22 */
23 virtual std::unique_ptr<CameraInterface> Create(const std::string& config) const = 0;
24};
25
26/**
27 * Registers an external camera factory.
28 * @param name Identifier of the camera factory.
29 * @param factory Camera factory to register.
30 */
31void RegisterFactory(const std::string& name, std::unique_ptr<CameraFactory> factory);
32
33/**
34 * Creates a camera from the factory.
35 * @param name Identifier of the camera factory.
36 * @param config Configuration string to create the camera. The meaning of this string is
37 * defined by the factory.
38 */
39std::unique_ptr<CameraInterface> CreateCamera(const std::string& name, const std::string& config);
40
41} // namespace Camera
diff --git a/src/core/frontend/camera/interface.cpp b/src/core/frontend/camera/interface.cpp
new file mode 100644
index 000000000..9aec9e7f1
--- /dev/null
+++ b/src/core/frontend/camera/interface.cpp
@@ -0,0 +1,11 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/frontend/camera/interface.h"
6
7namespace Camera {
8
9CameraInterface::~CameraInterface() = default;
10
11} // namespace Camera
diff --git a/src/core/frontend/camera/interface.h b/src/core/frontend/camera/interface.h
new file mode 100644
index 000000000..a55a495c9
--- /dev/null
+++ b/src/core/frontend/camera/interface.h
@@ -0,0 +1,61 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <vector>
8#include "common/common_types.h"
9#include "core/hle/service/cam/cam.h"
10
11namespace Camera {
12
13/// An abstract class standing for a camera. All camera implementations should inherit from this.
14class CameraInterface {
15public:
16 virtual ~CameraInterface();
17
18 /// Starts the camera for video capturing.
19 virtual void StartCapture() = 0;
20
21 /// Stops the camera for video capturing.
22 virtual void StopCapture() = 0;
23
24 /**
25 * Sets the video resolution from raw CAM service parameters.
26 * For the meaning of the parameters, please refer to Service::CAM::Resolution. Note that the
27 * actual camera implementation doesn't need to respect all the parameters. However, the width
28 * and the height parameters must be respected and be used to determine the size of output
29 * frames.
30 * @param resolution The resolution parameters to set
31 */
32 virtual void SetResolution(const Service::CAM::Resolution& resolution) = 0;
33
34 /**
35 * Configures how received frames should be flipped by the camera.
36 * @param flip Flip applying to the frame
37 */
38 virtual void SetFlip(Service::CAM::Flip flip) = 0;
39
40 /**
41 * Configures what effect should be applied to received frames by the camera.
42 * @param effect Effect applying to the frame
43 */
44 virtual void SetEffect(Service::CAM::Effect effect) = 0;
45
46 /**
47 * Sets the output format of the all frames received after this function is called.
48 * @param format Output format of the frame
49 */
50 virtual void SetFormat(Service::CAM::OutputFormat format) = 0;
51
52 /**
53 * Receives a frame from the camera.
54 * This function should be only called between a StartCapture call and a StopCapture call.
55 * @returns A std::vector<u16> containing pixels. The total size of the vector is width * height
56 * where width and height are set by a call to SetResolution.
57 */
58 virtual std::vector<u16> ReceiveFrame() const = 0;
59};
60
61} // namespace Camera
diff --git a/src/core/frontend/emu_window.cpp b/src/core/frontend/emu_window.cpp
index 1541cc39d..a155b657d 100644
--- a/src/core/frontend/emu_window.cpp
+++ b/src/core/frontend/emu_window.cpp
@@ -5,7 +5,7 @@
5#include <algorithm> 5#include <algorithm>
6#include <cmath> 6#include <cmath>
7#include "common/assert.h" 7#include "common/assert.h"
8#include "common/profiler_reporting.h" 8#include "core/core.h"
9#include "core/frontend/emu_window.h" 9#include "core/frontend/emu_window.h"
10#include "core/frontend/key_map.h" 10#include "core/frontend/key_map.h"
11#include "video_core/video_core.h" 11#include "video_core/video_core.h"
@@ -70,14 +70,12 @@ void EmuWindow::TouchPressed(unsigned framebuffer_x, unsigned framebuffer_y) {
70 (framebuffer_layout.bottom_screen.bottom - framebuffer_layout.bottom_screen.top); 70 (framebuffer_layout.bottom_screen.bottom - framebuffer_layout.bottom_screen.top);
71 71
72 touch_pressed = true; 72 touch_pressed = true;
73 pad_state.touch.Assign(1);
74} 73}
75 74
76void EmuWindow::TouchReleased() { 75void EmuWindow::TouchReleased() {
77 touch_pressed = false; 76 touch_pressed = false;
78 touch_x = 0; 77 touch_x = 0;
79 touch_y = 0; 78 touch_y = 0;
80 pad_state.touch.Assign(0);
81} 79}
82 80
83void EmuWindow::TouchMoved(unsigned framebuffer_x, unsigned framebuffer_y) { 81void EmuWindow::TouchMoved(unsigned framebuffer_x, unsigned framebuffer_y) {
@@ -98,20 +96,19 @@ void EmuWindow::AccelerometerChanged(float x, float y, float z) {
98 // TODO(wwylele): do a time stretch as it in GyroscopeChanged 96 // TODO(wwylele): do a time stretch as it in GyroscopeChanged
99 // The time stretch formula should be like 97 // The time stretch formula should be like
100 // stretched_vector = (raw_vector - gravity) * stretch_ratio + gravity 98 // stretched_vector = (raw_vector - gravity) * stretch_ratio + gravity
101 accel_x = x * coef; 99 accel_x = static_cast<s16>(x * coef);
102 accel_y = y * coef; 100 accel_y = static_cast<s16>(y * coef);
103 accel_z = z * coef; 101 accel_z = static_cast<s16>(z * coef);
104} 102}
105 103
106void EmuWindow::GyroscopeChanged(float x, float y, float z) { 104void EmuWindow::GyroscopeChanged(float x, float y, float z) {
107 constexpr float FULL_FPS = 60; 105 constexpr float FULL_FPS = 60;
108 float coef = GetGyroscopeRawToDpsCoefficient(); 106 float coef = GetGyroscopeRawToDpsCoefficient();
109 float stretch = 107 float stretch = Core::System::GetInstance().perf_stats.GetLastFrameTimeScale();
110 FULL_FPS / Common::Profiling::GetTimingResultsAggregator()->GetAggregatedResults().fps;
111 std::lock_guard<std::mutex> lock(gyro_mutex); 108 std::lock_guard<std::mutex> lock(gyro_mutex);
112 gyro_x = x * coef * stretch; 109 gyro_x = static_cast<s16>(x * coef * stretch);
113 gyro_y = y * coef * stretch; 110 gyro_y = static_cast<s16>(y * coef * stretch);
114 gyro_z = z * coef * stretch; 111 gyro_z = static_cast<s16>(z * coef * stretch);
115} 112}
116 113
117void EmuWindow::UpdateCurrentFramebufferLayout(unsigned width, unsigned height) { 114void EmuWindow::UpdateCurrentFramebufferLayout(unsigned width, unsigned height) {
diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp
index 5cf45ada5..123fe7cd4 100644
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -230,6 +230,7 @@ static void GdbHexToMem(u8* dest, const u8* src, size_t len) {
230 * Convert a u32 into a gdb-formatted hex string. 230 * Convert a u32 into a gdb-formatted hex string.
231 * 231 *
232 * @param dest Pointer to buffer to store output hex string characters. 232 * @param dest Pointer to buffer to store output hex string characters.
233 * @param v Value to convert.
233 */ 234 */
234static void IntToGdbHex(u8* dest, u32 v) { 235static void IntToGdbHex(u8* dest, u32 v) {
235 for (int i = 0; i < 8; i += 2) { 236 for (int i = 0; i < 8; i += 2) {
diff --git a/src/core/hle/applets/applet.cpp b/src/core/hle/applets/applet.cpp
index 645b2d5fe..9c43ed2fd 100644
--- a/src/core/hle/applets/applet.cpp
+++ b/src/core/hle/applets/applet.cpp
@@ -12,6 +12,7 @@
12#include "core/hle/applets/applet.h" 12#include "core/hle/applets/applet.h"
13#include "core/hle/applets/erreula.h" 13#include "core/hle/applets/erreula.h"
14#include "core/hle/applets/mii_selector.h" 14#include "core/hle/applets/mii_selector.h"
15#include "core/hle/applets/mint.h"
15#include "core/hle/applets/swkbd.h" 16#include "core/hle/applets/swkbd.h"
16#include "core/hle/result.h" 17#include "core/hle/result.h"
17#include "core/hle/service/apt/apt.h" 18#include "core/hle/service/apt/apt.h"
@@ -56,6 +57,10 @@ ResultCode Applet::Create(Service::APT::AppletId id) {
56 case Service::APT::AppletId::Error2: 57 case Service::APT::AppletId::Error2:
57 applets[id] = std::make_shared<ErrEula>(id); 58 applets[id] = std::make_shared<ErrEula>(id);
58 break; 59 break;
60 case Service::APT::AppletId::Mint:
61 case Service::APT::AppletId::Mint2:
62 applets[id] = std::make_shared<Mint>(id);
63 break;
59 default: 64 default:
60 LOG_ERROR(Service_APT, "Could not create applet %u", id); 65 LOG_ERROR(Service_APT, "Could not create applet %u", id);
61 // TODO(Subv): Find the right error code 66 // TODO(Subv): Find the right error code
diff --git a/src/core/hle/applets/mint.cpp b/src/core/hle/applets/mint.cpp
new file mode 100644
index 000000000..31a79ea17
--- /dev/null
+++ b/src/core/hle/applets/mint.cpp
@@ -0,0 +1,72 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/string_util.h"
6#include "core/hle/applets/mint.h"
7#include "core/hle/service/apt/apt.h"
8
9namespace HLE {
10namespace Applets {
11
12ResultCode Mint::ReceiveParameter(const Service::APT::MessageParameter& parameter) {
13 if (parameter.signal != static_cast<u32>(Service::APT::SignalType::Request)) {
14 LOG_ERROR(Service_APT, "unsupported signal %u", parameter.signal);
15 UNIMPLEMENTED();
16 // TODO(Subv): Find the right error code
17 return ResultCode(-1);
18 }
19
20 // The Request message contains a buffer with the size of the framebuffer shared
21 // memory.
22 // Create the SharedMemory that will hold the framebuffer data
23 Service::APT::CaptureBufferInfo capture_info;
24 ASSERT(sizeof(capture_info) == parameter.buffer.size());
25
26 memcpy(&capture_info, parameter.buffer.data(), sizeof(capture_info));
27
28 // TODO: allocated memory never released
29 using Kernel::MemoryPermission;
30 // Allocate a heap block of the required size for this applet.
31 heap_memory = std::make_shared<std::vector<u8>>(capture_info.size);
32 // Create a SharedMemory that directly points to this heap block.
33 framebuffer_memory = Kernel::SharedMemory::CreateForApplet(
34 heap_memory, 0, heap_memory->size(), MemoryPermission::ReadWrite,
35 MemoryPermission::ReadWrite, "Mint Memory");
36
37 // Send the response message with the newly created SharedMemory
38 Service::APT::MessageParameter result;
39 result.signal = static_cast<u32>(Service::APT::SignalType::Response);
40 result.buffer.clear();
41 result.destination_id = static_cast<u32>(Service::APT::AppletId::Application);
42 result.sender_id = static_cast<u32>(id);
43 result.object = framebuffer_memory;
44
45 Service::APT::SendParameter(result);
46 return RESULT_SUCCESS;
47}
48
49ResultCode Mint::StartImpl(const Service::APT::AppletStartupParameter& parameter) {
50 is_running = true;
51
52 // TODO(Subv): Set the expected fields in the response buffer before resending it to the
53 // application.
54 // TODO(Subv): Reverse the parameter format for the Mint applet
55
56 // Let the application know that we're closing
57 Service::APT::MessageParameter message;
58 message.buffer.resize(parameter.buffer.size());
59 std::fill(message.buffer.begin(), message.buffer.end(), 0);
60 message.signal = static_cast<u32>(Service::APT::SignalType::WakeupByExit);
61 message.destination_id = static_cast<u32>(Service::APT::AppletId::Application);
62 message.sender_id = static_cast<u32>(id);
63 Service::APT::SendParameter(message);
64
65 is_running = false;
66 return RESULT_SUCCESS;
67}
68
69void Mint::Update() {}
70
71} // namespace Applets
72} // namespace HLE
diff --git a/src/core/hle/applets/mint.h b/src/core/hle/applets/mint.h
new file mode 100644
index 000000000..d23dc40f9
--- /dev/null
+++ b/src/core/hle/applets/mint.h
@@ -0,0 +1,29 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "core/hle/applets/applet.h"
8#include "core/hle/kernel/shared_memory.h"
9
10namespace HLE {
11namespace Applets {
12
13class Mint final : public Applet {
14public:
15 explicit Mint(Service::APT::AppletId id) : Applet(id) {}
16
17 ResultCode ReceiveParameter(const Service::APT::MessageParameter& parameter) override;
18 ResultCode StartImpl(const Service::APT::AppletStartupParameter& parameter) override;
19 void Update() override;
20
21private:
22 /// This SharedMemory will be created when we receive the Request message.
23 /// It holds the framebuffer info retrieved by the application with
24 /// GSPGPU::ImportDisplayCaptureInfo
25 Kernel::SharedPtr<Kernel::SharedMemory> framebuffer_memory;
26};
27
28} // namespace Applets
29} // namespace HLE
diff --git a/src/core/hle/config_mem.cpp b/src/core/hle/config_mem.cpp
index ccd73cfcb..e386ccdc6 100644
--- a/src/core/hle/config_mem.cpp
+++ b/src/core/hle/config_mem.cpp
@@ -14,15 +14,18 @@ ConfigMemDef config_mem;
14void Init() { 14void Init() {
15 std::memset(&config_mem, 0, sizeof(config_mem)); 15 std::memset(&config_mem, 0, sizeof(config_mem));
16 16
17 config_mem.update_flag = 0; // No update 17 // Values extracted from firmware 11.2.0-35E
18 config_mem.kernel_version_min = 0x34;
19 config_mem.kernel_version_maj = 0x2;
20 config_mem.ns_tid = 0x0004013000008002;
18 config_mem.sys_core_ver = 0x2; 21 config_mem.sys_core_ver = 0x2;
19 config_mem.unit_info = 0x1; // Bit 0 set for Retail 22 config_mem.unit_info = 0x1; // Bit 0 set for Retail
20 config_mem.prev_firm = 0; 23 config_mem.prev_firm = 0x1;
21 config_mem.firm_unk = 0; 24 config_mem.ctr_sdk_ver = 0x0000F297;
22 config_mem.firm_version_rev = 0; 25 config_mem.firm_version_min = 0x34;
23 config_mem.firm_version_min = 0x40;
24 config_mem.firm_version_maj = 0x2; 26 config_mem.firm_version_maj = 0x2;
25 config_mem.firm_sys_core_ver = 0x2; 27 config_mem.firm_sys_core_ver = 0x2;
28 config_mem.firm_ctr_sdk_ver = 0x0000F297;
26} 29}
27 30
28} // namespace 31} // namespace
diff --git a/src/core/hle/ipc.h b/src/core/hle/ipc.h
index 4e094faa7..cd9a5863d 100644
--- a/src/core/hle/ipc.h
+++ b/src/core/hle/ipc.h
@@ -18,12 +18,26 @@ static const int kCommandHeaderOffset = 0x80; ///< Offset into command buffer of
18 * the thread's TLS to an intermediate buffer in kernel memory, and then copied again to 18 * the thread's TLS to an intermediate buffer in kernel memory, and then copied again to
19 * the service handler process' memory. 19 * the service handler process' memory.
20 * @param offset Optional offset into command buffer 20 * @param offset Optional offset into command buffer
21 * @param offset Optional offset into command buffer (in bytes)
21 * @return Pointer to command buffer 22 * @return Pointer to command buffer
22 */ 23 */
23inline u32* GetCommandBuffer(const int offset = 0) { 24inline u32* GetCommandBuffer(const int offset = 0) {
24 return (u32*)Memory::GetPointer(GetCurrentThread()->GetTLSAddress() + kCommandHeaderOffset + 25 return (u32*)Memory::GetPointer(GetCurrentThread()->GetTLSAddress() + kCommandHeaderOffset +
25 offset); 26 offset);
26} 27}
28
29static const int kStaticBuffersOffset =
30 0x100; ///< Offset into static buffers, relative to command buffer header
31
32/**
33 * Returns a pointer to the static buffers area in the current thread's TLS
34 * TODO(Subv): cf. GetCommandBuffer
35 * @param offset Optional offset into static buffers area (in bytes)
36 * @return Pointer to static buffers area
37 */
38inline u32* GetStaticBuffers(const int offset = 0) {
39 return GetCommandBuffer(kStaticBuffersOffset + offset);
40}
27} 41}
28 42
29namespace IPC { 43namespace IPC {
@@ -40,10 +54,17 @@ enum DescriptorType : u32 {
40 CallingPid = 0x20, 54 CallingPid = 0x20,
41}; 55};
42 56
57union Header {
58 u32 raw;
59 BitField<0, 6, u32> translate_params_size;
60 BitField<6, 6, u32> normal_params_size;
61 BitField<16, 16, u32> command_id;
62};
63
43/** 64/**
44 * @brief Creates a command header to be used for IPC 65 * @brief Creates a command header to be used for IPC
45 * @param command_id ID of the command to create a header for. 66 * @param command_id ID of the command to create a header for.
46 * @param normal_params Size of the normal parameters in words. Up to 63. 67 * @param normal_params_size Size of the normal parameters in words. Up to 63.
47 * @param translate_params_size Size of the translate parameters in words. Up to 63. 68 * @param translate_params_size Size of the translate parameters in words. Up to 63.
48 * @return The created IPC header. 69 * @return The created IPC header.
49 * 70 *
@@ -51,24 +72,16 @@ enum DescriptorType : u32 {
51 * through modifications and checks by the kernel. 72 * through modifications and checks by the kernel.
52 * The translate parameters are described by headers generated with the IPC::*Desc functions. 73 * The translate parameters are described by headers generated with the IPC::*Desc functions.
53 * 74 *
54 * @note While #normal_params is equivalent to the number of normal parameters, 75 * @note While @p normal_params_size is equivalent to the number of normal parameters,
55 * #translate_params_size includes the size occupied by the translate parameters headers. 76 * @p translate_params_size includes the size occupied by the translate parameters headers.
56 */ 77 */
57constexpr u32 MakeHeader(u16 command_id, unsigned int normal_params, 78inline u32 MakeHeader(u16 command_id, unsigned int normal_params_size,
58 unsigned int translate_params_size) { 79 unsigned int translate_params_size) {
59 return (u32(command_id) << 16) | ((u32(normal_params) & 0x3F) << 6) | 80 Header header{};
60 (u32(translate_params_size) & 0x3F); 81 header.command_id.Assign(command_id);
61} 82 header.normal_params_size.Assign(normal_params_size);
62 83 header.translate_params_size.Assign(translate_params_size);
63union Header { 84 return header.raw;
64 u32 raw;
65 BitField<0, 6, u32> translate_params_size;
66 BitField<6, 6, u32> normal_params;
67 BitField<16, 16, u32> command_id;
68};
69
70inline Header ParseHeader(u32 header) {
71 return {header};
72} 85}
73 86
74constexpr u32 MoveHandleDesc(u32 num_handles = 1) { 87constexpr u32 MoveHandleDesc(u32 num_handles = 1) {
@@ -83,7 +96,7 @@ constexpr u32 CallingPidDesc() {
83 return CallingPid; 96 return CallingPid;
84} 97}
85 98
86constexpr bool isHandleDescriptor(u32 descriptor) { 99constexpr bool IsHandleDescriptor(u32 descriptor) {
87 return (descriptor & 0xF) == 0x0; 100 return (descriptor & 0xF) == 0x0;
88} 101}
89 102
@@ -91,18 +104,19 @@ constexpr u32 HandleNumberFromDesc(u32 handle_descriptor) {
91 return (handle_descriptor >> 26) + 1; 104 return (handle_descriptor >> 26) + 1;
92} 105}
93 106
94constexpr u32 StaticBufferDesc(u32 size, u8 buffer_id) {
95 return StaticBuffer | (size << 14) | ((buffer_id & 0xF) << 10);
96}
97
98union StaticBufferDescInfo { 107union StaticBufferDescInfo {
99 u32 raw; 108 u32 raw;
109 BitField<0, 4, u32> descriptor_type;
100 BitField<10, 4, u32> buffer_id; 110 BitField<10, 4, u32> buffer_id;
101 BitField<14, 18, u32> size; 111 BitField<14, 18, u32> size;
102}; 112};
103 113
104inline StaticBufferDescInfo ParseStaticBufferDesc(const u32 desc) { 114inline u32 StaticBufferDesc(u32 size, u8 buffer_id) {
105 return {desc}; 115 StaticBufferDescInfo info{};
116 info.descriptor_type.Assign(StaticBuffer);
117 info.buffer_id.Assign(buffer_id);
118 info.size.Assign(size);
119 return info.raw;
106} 120}
107 121
108/** 122/**
@@ -122,29 +136,30 @@ inline u32 PXIBufferDesc(u32 size, unsigned buffer_id, bool is_read_only) {
122 return type | (size << 8) | ((buffer_id & 0xF) << 4); 136 return type | (size << 8) | ((buffer_id & 0xF) << 4);
123} 137}
124 138
125enum MappedBufferPermissions { 139enum MappedBufferPermissions : u32 {
126 R = 1, 140 R = 1,
127 W = 2, 141 W = 2,
128 RW = R | W, 142 RW = R | W,
129}; 143};
130 144
131constexpr u32 MappedBufferDesc(u32 size, MappedBufferPermissions perms) {
132 return MappedBuffer | (size << 4) | (u32(perms) << 1);
133}
134
135union MappedBufferDescInfo { 145union MappedBufferDescInfo {
136 u32 raw; 146 u32 raw;
137 BitField<4, 28, u32> size; 147 BitField<0, 4, u32> flags;
138 BitField<1, 2, MappedBufferPermissions> perms; 148 BitField<1, 2, MappedBufferPermissions> perms;
149 BitField<4, 28, u32> size;
139}; 150};
140 151
141inline MappedBufferDescInfo ParseMappedBufferDesc(const u32 desc) { 152inline u32 MappedBufferDesc(u32 size, MappedBufferPermissions perms) {
142 return {desc}; 153 MappedBufferDescInfo info{};
154 info.flags.Assign(MappedBuffer);
155 info.perms.Assign(perms);
156 info.size.Assign(size);
157 return info.raw;
143} 158}
144 159
145inline DescriptorType GetDescriptorType(u32 descriptor) { 160inline DescriptorType GetDescriptorType(u32 descriptor) {
146 // Note: Those checks must be done in this order 161 // Note: Those checks must be done in this order
147 if (isHandleDescriptor(descriptor)) 162 if (IsHandleDescriptor(descriptor))
148 return (DescriptorType)(descriptor & 0x30); 163 return (DescriptorType)(descriptor & 0x30);
149 164
150 // handle the fact that the following descriptors can have rights 165 // handle the fact that the following descriptors can have rights
diff --git a/src/core/hle/ipc_helpers.h b/src/core/hle/ipc_helpers.h
new file mode 100644
index 000000000..323158bb5
--- /dev/null
+++ b/src/core/hle/ipc_helpers.h
@@ -0,0 +1,275 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6#include "core/hle/ipc.h"
7#include "core/hle/kernel/kernel.h"
8
9namespace IPC {
10
11class RequestHelperBase {
12protected:
13 u32* cmdbuf;
14 ptrdiff_t index = 1;
15 Header header;
16
17public:
18 RequestHelperBase(u32* command_buffer, Header command_header)
19 : cmdbuf(command_buffer), header(command_header) {}
20
21 /// Returns the total size of the request in words
22 size_t TotalSize() const {
23 return 1 /* command header */ + header.normal_params_size + header.translate_params_size;
24 }
25
26 void ValidateHeader() {
27 DEBUG_ASSERT_MSG(index == TotalSize(), "Operations do not match the header (cmd 0x%x)",
28 header.raw);
29 }
30};
31
32class RequestBuilder : public RequestHelperBase {
33public:
34 RequestBuilder(u32* command_buffer, Header command_header)
35 : RequestHelperBase(command_buffer, command_header) {
36 cmdbuf[0] = header.raw;
37 }
38 explicit RequestBuilder(u32* command_buffer, u32 command_header)
39 : RequestBuilder(command_buffer, Header{command_header}) {}
40 RequestBuilder(u32* command_buffer, u16 command_id, unsigned normal_params_size,
41 unsigned translate_params_size)
42 : RequestBuilder(command_buffer,
43 MakeHeader(command_id, normal_params_size, translate_params_size)) {}
44
45 // Validate on destruction, as there shouldn't be any case where we don't want it
46 ~RequestBuilder() {
47 ValidateHeader();
48 }
49
50 template <typename T>
51 void Push(T value);
52
53 void Push(u32 value) {
54 cmdbuf[index++] = value;
55 }
56 template <typename First, typename... Other>
57 void Push(const First& first_value, const Other&... other_values) {
58 Push(first_value);
59 Push(other_values...);
60 }
61
62 /**
63 * @brief Copies the content of the given trivially copyable class to the buffer as a normal
64 * param
65 * @note: The input class must be correctly packed/padded to fit hardware layout.
66 */
67 template <typename T>
68 void PushRaw(const T& value) {
69 static_assert(std::is_trivially_copyable<T>(), "Raw types should be trivially copyable");
70 std::memcpy(cmdbuf + index, &value, sizeof(T));
71 index += (sizeof(T) + 3) / 4; // round up to word length
72 }
73
74 // TODO : ensure that translate params are added after all regular params
75 template <typename... H>
76 void PushCopyHandles(H... handles) {
77 Push(CopyHandleDesc(sizeof...(H)));
78 Push(static_cast<Kernel::Handle>(handles)...);
79 }
80
81 template <typename... H>
82 void PushMoveHandles(H... handles) {
83 Push(MoveHandleDesc(sizeof...(H)));
84 Push(static_cast<Kernel::Handle>(handles)...);
85 }
86
87 void PushCurrentPIDHandle() {
88 Push(CallingPidDesc());
89 Push(u32(0));
90 }
91
92 void PushStaticBuffer(VAddr buffer_vaddr, u32 size, u8 buffer_id) {
93 Push(StaticBufferDesc(size, buffer_id));
94 Push(buffer_vaddr);
95 }
96
97 void PushMappedBuffer(VAddr buffer_vaddr, u32 size, MappedBufferPermissions perms) {
98 Push(MappedBufferDesc(size, perms));
99 Push(buffer_vaddr);
100 }
101};
102
103/// Push ///
104
105template <>
106inline void RequestBuilder::Push<u32>(u32 value) {
107 Push(value);
108}
109
110template <>
111inline void RequestBuilder::Push<u64>(u64 value) {
112 Push(static_cast<u32>(value));
113 Push(static_cast<u32>(value >> 32));
114}
115
116template <>
117inline void RequestBuilder::Push<ResultCode>(ResultCode value) {
118 Push(value.raw);
119}
120
121class RequestParser : public RequestHelperBase {
122public:
123 RequestParser(u32* command_buffer, Header command_header)
124 : RequestHelperBase(command_buffer, command_header) {}
125 explicit RequestParser(u32* command_buffer, u32 command_header)
126 : RequestParser(command_buffer, Header{command_header}) {}
127 RequestParser(u32* command_buffer, u16 command_id, unsigned normal_params_size,
128 unsigned translate_params_size)
129 : RequestParser(command_buffer,
130 MakeHeader(command_id, normal_params_size, translate_params_size)) {}
131
132 RequestBuilder MakeBuilder(u32 normal_params_size, u32 translate_params_size,
133 bool validateHeader = true) {
134 if (validateHeader)
135 ValidateHeader();
136 Header builderHeader{
137 MakeHeader(header.command_id, normal_params_size, translate_params_size)};
138 return {cmdbuf, builderHeader};
139 }
140
141 template <typename T>
142 T Pop();
143
144 template <typename T>
145 void Pop(T& value);
146
147 template <typename First, typename... Other>
148 void Pop(First& first_value, Other&... other_values);
149
150 Kernel::Handle PopHandle();
151
152 template <typename... H>
153 void PopHandles(H&... handles);
154
155 /**
156 * @brief Pops the static buffer vaddr
157 * @return The virtual address of the buffer
158 * @param[out] data_size If non-null, the pointed value will be set to the size of the data
159 * @param[out] useStaticBuffersToGetVaddr Indicates if we should read the vaddr from the static
160 * buffers (which is the correct thing to do, but no service presently implement it) instead of
161 * using the same value as the process who sent the request
162 * given by the source process
163 *
164 * Static buffers must be set up before any IPC request using those is sent.
165 * It is the duty of the process (usually services) to allocate and set up the receiving static
166 * buffer information
167 * Please note that the setup uses virtual addresses.
168 */
169 VAddr PopStaticBuffer(size_t* data_size = nullptr, bool useStaticBuffersToGetVaddr = false);
170
171 /**
172 * @brief Pops the mapped buffer vaddr
173 * @return The virtual address of the buffer
174 * @param[out] data_size If non-null, the pointed value will be set to the size of the data
175 * given by the source process
176 * @param[out] buffer_perms If non-null, the pointed value will be set to the permissions of the
177 * buffer
178 */
179 VAddr PopMappedBuffer(size_t* data_size = nullptr,
180 MappedBufferPermissions* buffer_perms = nullptr);
181
182 /**
183 * @brief Reads the next normal parameters as a struct, by copying it
184 * @note: The output class must be correctly packed/padded to fit hardware layout.
185 */
186 template <typename T>
187 void PopRaw(T& value);
188};
189
190/// Pop ///
191
192template <>
193inline u32 RequestParser::Pop<u32>() {
194 return cmdbuf[index++];
195}
196
197template <>
198inline u64 RequestParser::Pop<u64>() {
199 const u64 lsw = Pop<u32>();
200 const u64 msw = Pop<u32>();
201 return msw << 32 | lsw;
202}
203
204template <>
205inline ResultCode RequestParser::Pop<ResultCode>() {
206 return ResultCode{Pop<u32>()};
207}
208
209template <typename T>
210void RequestParser::Pop(T& value) {
211 value = Pop<T>();
212}
213
214template <typename First, typename... Other>
215void RequestParser::Pop(First& first_value, Other&... other_values) {
216 first_value = Pop<First>();
217 Pop(other_values...);
218}
219
220inline Kernel::Handle RequestParser::PopHandle() {
221 const u32 handle_descriptor = Pop<u32>();
222 DEBUG_ASSERT_MSG(IsHandleDescriptor(handle_descriptor),
223 "Tried to pop handle(s) but the descriptor is not a handle descriptor");
224 DEBUG_ASSERT_MSG(HandleNumberFromDesc(handle_descriptor) == 1,
225 "Descriptor indicates that there isn't exactly one handle");
226 return Pop<Kernel::Handle>();
227}
228
229template <typename... H>
230void RequestParser::PopHandles(H&... handles) {
231 const u32 handle_descriptor = Pop<u32>();
232 const int handles_number = sizeof...(H);
233 DEBUG_ASSERT_MSG(IsHandleDescriptor(handle_descriptor),
234 "Tried to pop handle(s) but the descriptor is not a handle descriptor");
235 DEBUG_ASSERT_MSG(handles_number == HandleNumberFromDesc(handle_descriptor),
236 "Number of handles doesn't match the descriptor");
237 Pop(static_cast<Kernel::Handle&>(handles)...);
238}
239
240inline VAddr RequestParser::PopStaticBuffer(size_t* data_size, bool useStaticBuffersToGetVaddr) {
241 const u32 sbuffer_descriptor = Pop<u32>();
242 StaticBufferDescInfo bufferInfo{sbuffer_descriptor};
243 if (data_size != nullptr)
244 *data_size = bufferInfo.size;
245 if (!useStaticBuffersToGetVaddr)
246 return Pop<VAddr>();
247 else {
248 ASSERT_MSG(0, "remove the assert if multiprocess/IPC translation are implemented.");
249 // The buffer has already been copied to the static buffer by the kernel during
250 // translation
251 Pop<VAddr>(); // Pop the calling process buffer address
252 // and get the vaddr from the static buffers
253 return cmdbuf[(0x100 >> 2) + bufferInfo.buffer_id * 2 + 1];
254 }
255}
256
257inline VAddr RequestParser::PopMappedBuffer(size_t* data_size,
258 MappedBufferPermissions* buffer_perms) {
259 const u32 sbuffer_descriptor = Pop<u32>();
260 MappedBufferDescInfo bufferInfo{sbuffer_descriptor};
261 if (data_size != nullptr)
262 *data_size = bufferInfo.size;
263 if (buffer_perms != nullptr)
264 *buffer_perms = bufferInfo.perms;
265 return Pop<VAddr>();
266}
267
268template <typename T>
269void RequestParser::PopRaw(T& value) {
270 static_assert(std::is_trivially_copyable<T>(), "Raw types should be trivially copyable");
271 std::memcpy(&value, cmdbuf + index, sizeof(T));
272 index += (sizeof(T) + 3) / 4; // round up to word length
273}
274
275} // namespace IPC
diff --git a/src/core/hle/kernel/server_session.h b/src/core/hle/kernel/server_session.h
index c088b9a19..761fc4781 100644
--- a/src/core/hle/kernel/server_session.h
+++ b/src/core/hle/kernel/server_session.h
@@ -4,6 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <memory>
7#include <string> 8#include <string>
8#include "common/assert.h" 9#include "common/assert.h"
9#include "common/common_types.h" 10#include "common/common_types.h"
@@ -44,7 +45,8 @@ public:
44 45
45 /** 46 /**
46 * Creates a pair of ServerSession and an associated ClientSession. 47 * Creates a pair of ServerSession and an associated ClientSession.
47 * @param name Optional name of the ports 48 * @param name Optional name of the ports.
49 * @param hle_handler Optional HLE handler for this server session.
48 * @return The created session tuple 50 * @return The created session tuple
49 */ 51 */
50 static SessionPair CreateSessionPair( 52 static SessionPair CreateSessionPair(
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index c557a2279..6ab31c70b 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -11,7 +11,6 @@
11#include <boost/container/flat_set.hpp> 11#include <boost/container/flat_set.hpp>
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "core/arm/arm_interface.h" 13#include "core/arm/arm_interface.h"
14#include "core/core.h"
15#include "core/hle/kernel/kernel.h" 14#include "core/hle/kernel/kernel.h"
16#include "core/hle/result.h" 15#include "core/hle/result.h"
17 16
diff --git a/src/core/hle/kernel/timer.cpp b/src/core/hle/kernel/timer.cpp
index 60537f355..a00c75679 100644
--- a/src/core/hle/kernel/timer.cpp
+++ b/src/core/hle/kernel/timer.cpp
@@ -52,9 +52,14 @@ void Timer::Set(s64 initial, s64 interval) {
52 initial_delay = initial; 52 initial_delay = initial;
53 interval_delay = interval; 53 interval_delay = interval;
54 54
55 u64 initial_microseconds = initial / 1000; 55 if (initial == 0) {
56 CoreTiming::ScheduleEvent(usToCycles(initial_microseconds), timer_callback_event_type, 56 // Immediately invoke the callback
57 callback_handle); 57 Signal(0);
58 } else {
59 u64 initial_microseconds = initial / 1000;
60 CoreTiming::ScheduleEvent(usToCycles(initial_microseconds), timer_callback_event_type,
61 callback_handle);
62 }
58} 63}
59 64
60void Timer::Cancel() { 65void Timer::Cancel() {
@@ -72,6 +77,22 @@ void Timer::WakeupAllWaitingThreads() {
72 signaled = false; 77 signaled = false;
73} 78}
74 79
80void Timer::Signal(int cycles_late) {
81 LOG_TRACE(Kernel, "Timer %u fired", GetObjectId());
82
83 signaled = true;
84
85 // Resume all waiting threads
86 WakeupAllWaitingThreads();
87
88 if (interval_delay != 0) {
89 // Reschedule the timer with the interval delay
90 u64 interval_microseconds = interval_delay / 1000;
91 CoreTiming::ScheduleEvent(usToCycles(interval_microseconds) - cycles_late,
92 timer_callback_event_type, callback_handle);
93 }
94}
95
75/// The timer callback event, called when a timer is fired 96/// The timer callback event, called when a timer is fired
76static void TimerCallback(u64 timer_handle, int cycles_late) { 97static void TimerCallback(u64 timer_handle, int cycles_late) {
77 SharedPtr<Timer> timer = 98 SharedPtr<Timer> timer =
@@ -82,19 +103,7 @@ static void TimerCallback(u64 timer_handle, int cycles_late) {
82 return; 103 return;
83 } 104 }
84 105
85 LOG_TRACE(Kernel, "Timer %08" PRIx64 " fired", timer_handle); 106 timer->Signal(cycles_late);
86
87 timer->signaled = true;
88
89 // Resume all waiting threads
90 timer->WakeupAllWaitingThreads();
91
92 if (timer->interval_delay != 0) {
93 // Reschedule the timer with the interval delay
94 u64 interval_microseconds = timer->interval_delay / 1000;
95 CoreTiming::ScheduleEvent(usToCycles(interval_microseconds) - cycles_late,
96 timer_callback_event_type, timer_handle);
97 }
98} 107}
99 108
100void TimersInit() { 109void TimersInit() {
diff --git a/src/core/hle/kernel/timer.h b/src/core/hle/kernel/timer.h
index c174f5664..b0f818933 100644
--- a/src/core/hle/kernel/timer.h
+++ b/src/core/hle/kernel/timer.h
@@ -54,6 +54,14 @@ public:
54 void Cancel(); 54 void Cancel();
55 void Clear(); 55 void Clear();
56 56
57 /**
58 * Signals the timer, waking up any waiting threads and rescheduling it
59 * for the next interval.
60 * This method should not be called from outside the timer callback handler,
61 * lest multiple callback events get scheduled.
62 */
63 void Signal(int cycles_late);
64
57private: 65private:
58 Timer(); 66 Timer();
59 ~Timer() override; 67 ~Timer() override;
diff --git a/src/core/hle/result.h b/src/core/hle/result.h
index 53864a3a7..cfefbbc64 100644
--- a/src/core/hle/result.h
+++ b/src/core/hle/result.h
@@ -20,6 +20,7 @@ enum class ErrorDescription : u32 {
20 OS_InvalidBufferDescriptor = 48, 20 OS_InvalidBufferDescriptor = 48,
21 MaxConnectionsReached = 52, 21 MaxConnectionsReached = 52,
22 WrongAddress = 53, 22 WrongAddress = 53,
23 FS_RomFSNotFound = 100,
23 FS_ArchiveNotMounted = 101, 24 FS_ArchiveNotMounted = 101,
24 FS_FileNotFound = 112, 25 FS_FileNotFound = 112,
25 FS_PathNotFound = 113, 26 FS_PathNotFound = 113,
@@ -35,10 +36,13 @@ enum class ErrorDescription : u32 {
35 OutofRangeOrMisalignedAddress = 36 OutofRangeOrMisalignedAddress =
36 513, // TODO(purpasmart): Check if this name fits its actual usage 37 513, // TODO(purpasmart): Check if this name fits its actual usage
37 GPU_FirstInitialization = 519, 38 GPU_FirstInitialization = 519,
39 FS_ExeFSSectionNotFound = 567,
40 FS_CommandNotAllowed = 630,
38 FS_InvalidReadFlag = 700, 41 FS_InvalidReadFlag = 700,
39 FS_InvalidPath = 702, 42 FS_InvalidPath = 702,
40 FS_WriteBeyondEnd = 705, 43 FS_WriteBeyondEnd = 705,
41 FS_UnsupportedOpenFlags = 760, 44 FS_UnsupportedOpenFlags = 760,
45 FS_IncorrectExeFSReadSize = 761,
42 FS_UnexpectedFileOrDirectory = 770, 46 FS_UnexpectedFileOrDirectory = 770,
43 InvalidSection = 1000, 47 InvalidSection = 1000,
44 TooLarge = 1001, 48 TooLarge = 1001,
diff --git a/src/core/hle/service/apt/apt.cpp b/src/core/hle/service/apt/apt.cpp
index 615fe31ea..e57b19c2d 100644
--- a/src/core/hle/service/apt/apt.cpp
+++ b/src/core/hle/service/apt/apt.cpp
@@ -18,6 +18,8 @@
18#include "core/hle/service/fs/archive.h" 18#include "core/hle/service/fs/archive.h"
19#include "core/hle/service/ptm/ptm.h" 19#include "core/hle/service/ptm/ptm.h"
20#include "core/hle/service/service.h" 20#include "core/hle/service/service.h"
21#include "core/hw/aes/ccm.h"
22#include "core/hw/aes/key.h"
21 23
22namespace Service { 24namespace Service {
23namespace APT { 25namespace APT {
@@ -470,6 +472,107 @@ void GetStartupArgument(Service::Interface* self) {
470 cmd_buff[2] = 0; 472 cmd_buff[2] = 0;
471} 473}
472 474
475void Wrap(Service::Interface* self) {
476 IPC::RequestParser rp(Kernel::GetCommandBuffer(), 0x46, 4, 4);
477 const u32 output_size = rp.Pop<u32>();
478 const u32 input_size = rp.Pop<u32>();
479 const u32 nonce_offset = rp.Pop<u32>();
480 u32 nonce_size = rp.Pop<u32>();
481 size_t desc_size;
482 IPC::MappedBufferPermissions desc_permission;
483 const VAddr input = rp.PopMappedBuffer(&desc_size, &desc_permission);
484 ASSERT(desc_size == input_size && desc_permission == IPC::MappedBufferPermissions::R);
485 const VAddr output = rp.PopMappedBuffer(&desc_size, &desc_permission);
486 ASSERT(desc_size == output_size && desc_permission == IPC::MappedBufferPermissions::W);
487
488 // Note: real 3DS still returns SUCCESS when the sizes don't match. It seems that it doesn't
489 // check the buffer size and writes data with potential overflow.
490 ASSERT_MSG(output_size == input_size + HW::AES::CCM_MAC_SIZE,
491 "input_size (%d) doesn't match to output_size (%d)", input_size, output_size);
492
493 LOG_DEBUG(Service_APT, "called, output_size=%u, input_size=%u, nonce_offset=%u, nonce_size=%u",
494 output_size, input_size, nonce_offset, nonce_size);
495
496 // Note: This weird nonce size modification is verified against real 3DS
497 nonce_size = std::min<u32>(nonce_size & ~3, HW::AES::CCM_NONCE_SIZE);
498
499 // Reads nonce and concatenates the rest of the input as plaintext
500 HW::AES::CCMNonce nonce{};
501 Memory::ReadBlock(input + nonce_offset, nonce.data(), nonce_size);
502 u32 pdata_size = input_size - nonce_size;
503 std::vector<u8> pdata(pdata_size);
504 Memory::ReadBlock(input, pdata.data(), nonce_offset);
505 Memory::ReadBlock(input + nonce_offset + nonce_size, pdata.data() + nonce_offset,
506 pdata_size - nonce_offset);
507
508 // Encrypts the plaintext using AES-CCM
509 auto cipher = HW::AES::EncryptSignCCM(pdata, nonce, HW::AES::KeySlotID::APTWrap);
510
511 // Puts the nonce to the beginning of the output, with ciphertext followed
512 Memory::WriteBlock(output, nonce.data(), nonce_size);
513 Memory::WriteBlock(output + nonce_size, cipher.data(), cipher.size());
514
515 IPC::RequestBuilder rb = rp.MakeBuilder(1, 4);
516 rb.Push(RESULT_SUCCESS);
517
518 // Unmap buffer
519 rb.PushMappedBuffer(input, input_size, IPC::MappedBufferPermissions::R);
520 rb.PushMappedBuffer(output, output_size, IPC::MappedBufferPermissions::W);
521}
522
523void Unwrap(Service::Interface* self) {
524 IPC::RequestParser rp(Kernel::GetCommandBuffer(), 0x47, 4, 4);
525 const u32 output_size = rp.Pop<u32>();
526 const u32 input_size = rp.Pop<u32>();
527 const u32 nonce_offset = rp.Pop<u32>();
528 u32 nonce_size = rp.Pop<u32>();
529 size_t desc_size;
530 IPC::MappedBufferPermissions desc_permission;
531 const VAddr input = rp.PopMappedBuffer(&desc_size, &desc_permission);
532 ASSERT(desc_size == input_size && desc_permission == IPC::MappedBufferPermissions::R);
533 const VAddr output = rp.PopMappedBuffer(&desc_size, &desc_permission);
534 ASSERT(desc_size == output_size && desc_permission == IPC::MappedBufferPermissions::W);
535
536 // Note: real 3DS still returns SUCCESS when the sizes don't match. It seems that it doesn't
537 // check the buffer size and writes data with potential overflow.
538 ASSERT_MSG(output_size == input_size - HW::AES::CCM_MAC_SIZE,
539 "input_size (%d) doesn't match to output_size (%d)", input_size, output_size);
540
541 LOG_DEBUG(Service_APT, "called, output_size=%u, input_size=%u, nonce_offset=%u, nonce_size=%u",
542 output_size, input_size, nonce_offset, nonce_size);
543
544 // Note: This weird nonce size modification is verified against real 3DS
545 nonce_size = std::min<u32>(nonce_size & ~3, HW::AES::CCM_NONCE_SIZE);
546
547 // Reads nonce and cipher text
548 HW::AES::CCMNonce nonce{};
549 Memory::ReadBlock(input, nonce.data(), nonce_size);
550 u32 cipher_size = input_size - nonce_size;
551 std::vector<u8> cipher(cipher_size);
552 Memory::ReadBlock(input + nonce_size, cipher.data(), cipher_size);
553
554 // Decrypts the ciphertext using AES-CCM
555 auto pdata = HW::AES::DecryptVerifyCCM(cipher, nonce, HW::AES::KeySlotID::APTWrap);
556
557 IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
558 if (!pdata.empty()) {
559 // Splits the plaintext and put the nonce in between
560 Memory::WriteBlock(output, pdata.data(), nonce_offset);
561 Memory::WriteBlock(output + nonce_offset, nonce.data(), nonce_size);
562 Memory::WriteBlock(output + nonce_offset + nonce_size, pdata.data() + nonce_offset,
563 pdata.size() - nonce_offset);
564 rb.Push(RESULT_SUCCESS);
565 } else {
566 LOG_ERROR(Service_APT, "Failed to decrypt data");
567 rb.Push(ResultCode(static_cast<ErrorDescription>(1), ErrorModule::PS,
568 ErrorSummary::WrongArgument, ErrorLevel::Status));
569 }
570
571 // Unmap buffer
572 rb.PushMappedBuffer(input, input_size, IPC::MappedBufferPermissions::R);
573 rb.PushMappedBuffer(output, output_size, IPC::MappedBufferPermissions::W);
574}
575
473void CheckNew3DSApp(Service::Interface* self) { 576void CheckNew3DSApp(Service::Interface* self) {
474 u32* cmd_buff = Kernel::GetCommandBuffer(); 577 u32* cmd_buff = Kernel::GetCommandBuffer();
475 578
diff --git a/src/core/hle/service/apt/apt.h b/src/core/hle/service/apt/apt.h
index 80325361f..e63b61450 100644
--- a/src/core/hle/service/apt/apt.h
+++ b/src/core/hle/service/apt/apt.h
@@ -137,6 +137,46 @@ void Initialize(Service::Interface* self);
137void GetSharedFont(Service::Interface* self); 137void GetSharedFont(Service::Interface* self);
138 138
139/** 139/**
140 * APT::Wrap service function
141 * Inputs:
142 * 1 : Output buffer size
143 * 2 : Input buffer size
144 * 3 : Nonce offset to the input buffer
145 * 4 : Nonce size
146 * 5 : Buffer mapping descriptor ((input_buffer_size << 4) | 0xA)
147 * 6 : Input buffer address
148 * 7 : Buffer mapping descriptor ((input_buffer_size << 4) | 0xC)
149 * 8 : Output buffer address
150 * Outputs:
151 * 1 : Result of function, 0 on success, otherwise error code
152 * 2 : Buffer unmapping descriptor ((input_buffer_size << 4) | 0xA)
153 * 3 : Input buffer address
154 * 4 : Buffer unmapping descriptor ((input_buffer_size << 4) | 0xC)
155 * 5 : Output buffer address
156 */
157void Wrap(Service::Interface* self);
158
159/**
160 * APT::Unwrap service function
161 * Inputs:
162 * 1 : Output buffer size
163 * 2 : Input buffer size
164 * 3 : Nonce offset to the output buffer
165 * 4 : Nonce size
166 * 5 : Buffer mapping descriptor ((input_buffer_size << 4) | 0xA)
167 * 6 : Input buffer address
168 * 7 : Buffer mapping descriptor ((input_buffer_size << 4) | 0xC)
169 * 8 : Output buffer address
170 * Outputs:
171 * 1 : Result of function, 0 on success, otherwise error code
172 * 2 : Buffer unmapping descriptor ((input_buffer_size << 4) | 0xA)
173 * 3 : Input buffer address
174 * 4 : Buffer unmapping descriptor ((input_buffer_size << 4) | 0xC)
175 * 5 : Output buffer address
176 */
177void Unwrap(Service::Interface* self);
178
179/**
140 * APT::NotifyToWait service function 180 * APT::NotifyToWait service function
141 * Inputs: 181 * Inputs:
142 * 1 : AppID 182 * 1 : AppID
diff --git a/src/core/hle/service/apt/apt_a.cpp b/src/core/hle/service/apt/apt_a.cpp
index 62dc2d61d..c496cba8d 100644
--- a/src/core/hle/service/apt/apt_a.cpp
+++ b/src/core/hle/service/apt/apt_a.cpp
@@ -78,8 +78,8 @@ const Interface::FunctionInfo FunctionTable[] = {
78 {0x00430040, NotifyToWait, "NotifyToWait"}, 78 {0x00430040, NotifyToWait, "NotifyToWait"},
79 {0x00440000, GetSharedFont, "GetSharedFont"}, 79 {0x00440000, GetSharedFont, "GetSharedFont"},
80 {0x00450040, nullptr, "GetWirelessRebootInfo"}, 80 {0x00450040, nullptr, "GetWirelessRebootInfo"},
81 {0x00460104, nullptr, "Wrap"}, 81 {0x00460104, Wrap, "Wrap"},
82 {0x00470104, nullptr, "Unwrap"}, 82 {0x00470104, Unwrap, "Unwrap"},
83 {0x00480100, nullptr, "GetProgramInfo"}, 83 {0x00480100, nullptr, "GetProgramInfo"},
84 {0x00490180, nullptr, "Reboot"}, 84 {0x00490180, nullptr, "Reboot"},
85 {0x004A0040, nullptr, "GetCaptureInfo"}, 85 {0x004A0040, nullptr, "GetCaptureInfo"},
diff --git a/src/core/hle/service/apt/apt_s.cpp b/src/core/hle/service/apt/apt_s.cpp
index effd23dce..ec5668d05 100644
--- a/src/core/hle/service/apt/apt_s.cpp
+++ b/src/core/hle/service/apt/apt_s.cpp
@@ -78,8 +78,8 @@ const Interface::FunctionInfo FunctionTable[] = {
78 {0x00430040, NotifyToWait, "NotifyToWait"}, 78 {0x00430040, NotifyToWait, "NotifyToWait"},
79 {0x00440000, GetSharedFont, "GetSharedFont"}, 79 {0x00440000, GetSharedFont, "GetSharedFont"},
80 {0x00450040, nullptr, "GetWirelessRebootInfo"}, 80 {0x00450040, nullptr, "GetWirelessRebootInfo"},
81 {0x00460104, nullptr, "Wrap"}, 81 {0x00460104, Wrap, "Wrap"},
82 {0x00470104, nullptr, "Unwrap"}, 82 {0x00470104, Unwrap, "Unwrap"},
83 {0x00480100, nullptr, "GetProgramInfo"}, 83 {0x00480100, nullptr, "GetProgramInfo"},
84 {0x00490180, nullptr, "Reboot"}, 84 {0x00490180, nullptr, "Reboot"},
85 {0x004A0040, nullptr, "GetCaptureInfo"}, 85 {0x004A0040, nullptr, "GetCaptureInfo"},
diff --git a/src/core/hle/service/apt/apt_u.cpp b/src/core/hle/service/apt/apt_u.cpp
index e06084a1e..9dd002590 100644
--- a/src/core/hle/service/apt/apt_u.cpp
+++ b/src/core/hle/service/apt/apt_u.cpp
@@ -78,8 +78,8 @@ const Interface::FunctionInfo FunctionTable[] = {
78 {0x00430040, NotifyToWait, "NotifyToWait"}, 78 {0x00430040, NotifyToWait, "NotifyToWait"},
79 {0x00440000, GetSharedFont, "GetSharedFont"}, 79 {0x00440000, GetSharedFont, "GetSharedFont"},
80 {0x00450040, nullptr, "GetWirelessRebootInfo"}, 80 {0x00450040, nullptr, "GetWirelessRebootInfo"},
81 {0x00460104, nullptr, "Wrap"}, 81 {0x00460104, Wrap, "Wrap"},
82 {0x00470104, nullptr, "Unwrap"}, 82 {0x00470104, Unwrap, "Unwrap"},
83 {0x00480100, nullptr, "GetProgramInfo"}, 83 {0x00480100, nullptr, "GetProgramInfo"},
84 {0x00490180, nullptr, "Reboot"}, 84 {0x00490180, nullptr, "Reboot"},
85 {0x004A0040, nullptr, "GetCaptureInfo"}, 85 {0x004A0040, nullptr, "GetCaptureInfo"},
diff --git a/src/core/hle/service/cam/cam.cpp b/src/core/hle/service/cam/cam.cpp
index 5594aedab..95665e754 100644
--- a/src/core/hle/service/cam/cam.cpp
+++ b/src/core/hle/service/cam/cam.cpp
@@ -2,7 +2,15 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
6#include <array>
7#include <future>
8#include <memory>
9#include <vector>
10#include "common/bit_set.h"
5#include "common/logging/log.h" 11#include "common/logging/log.h"
12#include "core/core_timing.h"
13#include "core/frontend/camera/factory.h"
6#include "core/hle/kernel/event.h" 14#include "core/hle/kernel/event.h"
7#include "core/hle/service/cam/cam.h" 15#include "core/hle/service/cam/cam.h"
8#include "core/hle/service/cam/cam_c.h" 16#include "core/hle/service/cam/cam_c.h"
@@ -10,206 +18,924 @@
10#include "core/hle/service/cam/cam_s.h" 18#include "core/hle/service/cam/cam_s.h"
11#include "core/hle/service/cam/cam_u.h" 19#include "core/hle/service/cam/cam_u.h"
12#include "core/hle/service/service.h" 20#include "core/hle/service/service.h"
21#include "core/settings.h"
13 22
14namespace Service { 23namespace Service {
15namespace CAM { 24namespace CAM {
16 25
17static const u32 TRANSFER_BYTES = 5 * 1024; 26namespace {
27
28struct ContextConfig {
29 Flip flip;
30 Effect effect;
31 OutputFormat format;
32 Resolution resolution;
33};
34
35struct CameraConfig {
36 std::unique_ptr<Camera::CameraInterface> impl;
37 std::array<ContextConfig, 2> contexts;
38 int current_context;
39 FrameRate frame_rate;
40};
41
42struct PortConfig {
43 int camera_id;
44
45 bool is_active; // set when the port is activated by an Activate call.
46 bool is_pending_receiving; // set if SetReceiving is called when is_busy = false. When
47 // StartCapture is called then, this will trigger a receiving
48 // process and reset itself.
49 bool is_busy; // set when StartCapture is called and reset when StopCapture is called.
50 bool is_receiving; // set when there is an ongoing receiving process.
51
52 bool is_trimming;
53 u16 x0; // x-coordinate of starting position for trimming
54 u16 y0; // y-coordinate of starting position for trimming
55 u16 x1; // x-coordinate of ending position for trimming
56 u16 y1; // y-coordinate of ending position for trimming
57
58 u32 transfer_bytes;
59
60 Kernel::SharedPtr<Kernel::Event> completion_event;
61 Kernel::SharedPtr<Kernel::Event> buffer_error_interrupt_event;
62 Kernel::SharedPtr<Kernel::Event> vsync_interrupt_event;
63
64 std::future<std::vector<u16>> capture_result; // will hold the received frame.
65 VAddr dest; // the destination address of a receiving process
66 u32 dest_size; // the destination size of a receiving process
67
68 void Clear() {
69 completion_event->Clear();
70 buffer_error_interrupt_event->Clear();
71 vsync_interrupt_event->Clear();
72 is_receiving = false;
73 is_active = false;
74 is_pending_receiving = false;
75 is_busy = false;
76 is_trimming = false;
77 x0 = 0;
78 y0 = 0;
79 x1 = 0;
80 y1 = 0;
81 transfer_bytes = 256;
82 }
83};
84
85// built-in resolution parameters
86constexpr std::array<Resolution, 8> PRESET_RESOLUTION{{
87 {640, 480, 0, 0, 639, 479}, // VGA
88 {320, 240, 0, 0, 639, 479}, // QVGA
89 {160, 120, 0, 0, 639, 479}, // QQVGA
90 {352, 288, 26, 0, 613, 479}, // CIF
91 {176, 144, 26, 0, 613, 479}, // QCIF
92 {256, 192, 0, 0, 639, 479}, // DS_LCD
93 {512, 384, 0, 0, 639, 479}, // DS_LCDx4
94 {400, 240, 0, 48, 639, 431}, // CTR_TOP_LCD
95}};
96
97// latency in ms for each frame rate option
98constexpr std::array<int, 13> LATENCY_BY_FRAME_RATE{{
99 67, // Rate_15
100 67, // Rate_15_To_5
101 67, // Rate_15_To_2
102 100, // Rate_10
103 118, // Rate_8_5
104 200, // Rate_5
105 50, // Rate_20
106 50, // Rate_20_To_5
107 33, // Rate_30
108 33, // Rate_30_To_5
109 67, // Rate_15_To_10
110 50, // Rate_20_To_10
111 33, // Rate_30_To_10
112}};
113
114std::array<CameraConfig, NumCameras> cameras;
115std::array<PortConfig, 2> ports;
116int completion_event_callback;
117
118const ResultCode ERROR_INVALID_ENUM_VALUE(ErrorDescription::InvalidEnumValue, ErrorModule::CAM,
119 ErrorSummary::InvalidArgument, ErrorLevel::Usage);
120const ResultCode ERROR_OUT_OF_RANGE(ErrorDescription::OutOfRange, ErrorModule::CAM,
121 ErrorSummary::InvalidArgument, ErrorLevel::Usage);
122
123void CompletionEventCallBack(u64 port_id, int) {
124 PortConfig& port = ports[port_id];
125 const CameraConfig& camera = cameras[port.camera_id];
126 const auto buffer = port.capture_result.get();
127
128 if (port.is_trimming) {
129 u32 trim_width;
130 u32 trim_height;
131 const int original_width = camera.contexts[camera.current_context].resolution.width;
132 const int original_height = camera.contexts[camera.current_context].resolution.height;
133 if (port.x1 <= port.x0 || port.y1 <= port.y0 || port.x1 > original_width ||
134 port.y1 > original_height) {
135 LOG_ERROR(Service_CAM, "Invalid trimming coordinates x0=%u, y0=%u, x1=%u, y1=%u",
136 port.x0, port.y0, port.x1, port.y1);
137 trim_width = 0;
138 trim_height = 0;
139 } else {
140 trim_width = port.x1 - port.x0;
141 trim_height = port.y1 - port.y0;
142 }
143
144 u32 trim_size = (port.x1 - port.x0) * (port.y1 - port.y0) * 2;
145 if (port.dest_size != trim_size) {
146 LOG_ERROR(Service_CAM, "The destination size (%u) doesn't match the source (%u)!",
147 port.dest_size, trim_size);
148 }
149
150 const u32 src_offset = port.y0 * original_width + port.x0;
151 const u16* src_ptr = buffer.data() + src_offset;
152 // Note: src_size_left is int because it can be negative if the buffer size doesn't match.
153 int src_size_left = static_cast<int>((buffer.size() - src_offset) * sizeof(u16));
154 VAddr dest_ptr = port.dest;
155 // Note: dest_size_left and line_bytes are int to match the type of src_size_left.
156 int dest_size_left = static_cast<int>(port.dest_size);
157 const int line_bytes = static_cast<int>(trim_width * sizeof(u16));
158
159 for (u32 y = 0; y < trim_height; ++y) {
160 int copy_length = std::min({line_bytes, dest_size_left, src_size_left});
161 if (copy_length <= 0) {
162 break;
163 }
164 Memory::WriteBlock(dest_ptr, src_ptr, copy_length);
165 dest_ptr += copy_length;
166 dest_size_left -= copy_length;
167 src_ptr += original_width;
168 src_size_left -= original_width * sizeof(u16);
169 }
170 } else {
171 std::size_t buffer_size = buffer.size() * sizeof(u16);
172 if (port.dest_size != buffer_size) {
173 LOG_ERROR(Service_CAM, "The destination size (%u) doesn't match the source (%zu)!",
174 port.dest_size, buffer_size);
175 }
176 Memory::WriteBlock(port.dest, buffer.data(), std::min<u32>(port.dest_size, buffer_size));
177 }
178
179 port.is_receiving = false;
180 port.completion_event->Signal();
181}
182
183// Starts a receiving process on the specified port. This can only be called when is_busy = true and
184// is_receiving = false.
185void StartReceiving(int port_id) {
186 PortConfig& port = ports[port_id];
187 port.is_receiving = true;
188
189 // launches a capture task asynchronously
190 const CameraConfig& camera = cameras[port.camera_id];
191 port.capture_result =
192 std::async(std::launch::async, &Camera::CameraInterface::ReceiveFrame, camera.impl.get());
193
194 // schedules a completion event according to the frame rate. The event will block on the
195 // capture task if it is not finished within the expected time
196 CoreTiming::ScheduleEvent(
197 msToCycles(LATENCY_BY_FRAME_RATE[static_cast<int>(camera.frame_rate)]),
198 completion_event_callback, port_id);
199}
200
201// Cancels any ongoing receiving processes at the specified port. This is used by functions that
202// stop capturing.
203// TODO: what is the exact behaviour on real 3DS when stopping capture during an ongoing process?
204// Will the completion event still be signaled?
205void CancelReceiving(int port_id) {
206 if (!ports[port_id].is_receiving)
207 return;
208 LOG_WARNING(Service_CAM, "tries to cancel an ongoing receiving process.");
209 CoreTiming::UnscheduleEvent(completion_event_callback, port_id);
210 ports[port_id].capture_result.wait();
211 ports[port_id].is_receiving = false;
212}
213
214// Activates the specified port with the specfied camera.
215static void ActivatePort(int port_id, int camera_id) {
216 if (ports[port_id].is_busy && ports[port_id].camera_id != camera_id) {
217 CancelReceiving(port_id);
218 cameras[ports[port_id].camera_id].impl->StopCapture();
219 ports[port_id].is_busy = false;
220 }
221 ports[port_id].is_active = true;
222 ports[port_id].camera_id = camera_id;
223}
224
225template <int max_index>
226class CommandParamBitSet : public BitSet8 {
227public:
228 explicit CommandParamBitSet(u32 command_param)
229 : BitSet8(static_cast<u8>(command_param & 0xFF)) {}
18 230
19static Kernel::SharedPtr<Kernel::Event> completion_event_cam1; 231 bool IsValid() const {
20static Kernel::SharedPtr<Kernel::Event> completion_event_cam2; 232 return m_val < (1 << max_index);
21static Kernel::SharedPtr<Kernel::Event> interrupt_error_event; 233 }
22static Kernel::SharedPtr<Kernel::Event> vsync_interrupt_error_event; 234
235 bool IsSingle() const {
236 return IsValid() && Count() == 1;
237 }
238};
239
240using PortSet = CommandParamBitSet<2>;
241using ContextSet = CommandParamBitSet<2>;
242using CameraSet = CommandParamBitSet<3>;
243
244} // namespace
23 245
24void StartCapture(Service::Interface* self) { 246void StartCapture(Service::Interface* self) {
25 u32* cmd_buff = Kernel::GetCommandBuffer(); 247 u32* cmd_buff = Kernel::GetCommandBuffer();
26 248
27 u8 port = cmd_buff[1] & 0xFF; 249 const PortSet port_select(cmd_buff[1]);
250
251 if (port_select.IsValid()) {
252 for (int i : port_select) {
253 if (!ports[i].is_busy) {
254 if (!ports[i].is_active) {
255 // This doesn't return an error, but seems to put the camera in an undefined
256 // state
257 LOG_ERROR(Service_CAM, "port %u hasn't been activated", i);
258 } else {
259 cameras[ports[i].camera_id].impl->StartCapture();
260 ports[i].is_busy = true;
261 if (ports[i].is_pending_receiving) {
262 ports[i].is_pending_receiving = false;
263 StartReceiving(i);
264 }
265 }
266 } else {
267 LOG_WARNING(Service_CAM, "port %u already started", i);
268 }
269 }
270 cmd_buff[1] = RESULT_SUCCESS.raw;
271 } else {
272 LOG_ERROR(Service_CAM, "invalid port_select=%u", port_select.m_val);
273 cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw;
274 }
28 275
29 cmd_buff[0] = IPC::MakeHeader(0x1, 1, 0); 276 cmd_buff[0] = IPC::MakeHeader(0x1, 1, 0);
30 cmd_buff[1] = RESULT_SUCCESS.raw;
31 277
32 LOG_WARNING(Service_CAM, "(STUBBED) called, port=%d", port); 278 LOG_DEBUG(Service_CAM, "called, port_select=%u", port_select.m_val);
33} 279}
34 280
35void StopCapture(Service::Interface* self) { 281void StopCapture(Service::Interface* self) {
36 u32* cmd_buff = Kernel::GetCommandBuffer(); 282 u32* cmd_buff = Kernel::GetCommandBuffer();
37 283
38 u8 port = cmd_buff[1] & 0xFF; 284 const PortSet port_select(cmd_buff[1]);
285
286 if (port_select.IsValid()) {
287 for (int i : port_select) {
288 if (ports[i].is_busy) {
289 CancelReceiving(i);
290 cameras[ports[i].camera_id].impl->StopCapture();
291 ports[i].is_busy = false;
292 } else {
293 LOG_WARNING(Service_CAM, "port %u already stopped", i);
294 }
295 }
296 cmd_buff[1] = RESULT_SUCCESS.raw;
297 } else {
298 LOG_ERROR(Service_CAM, "invalid port_select=%u", port_select.m_val);
299 cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw;
300 }
39 301
40 cmd_buff[0] = IPC::MakeHeader(0x2, 1, 0); 302 cmd_buff[0] = IPC::MakeHeader(0x2, 1, 0);
303
304 LOG_DEBUG(Service_CAM, "called, port_select=%u", port_select.m_val);
305}
306
307void IsBusy(Service::Interface* self) {
308 u32* cmd_buff = Kernel::GetCommandBuffer();
309
310 const PortSet port_select(cmd_buff[1]);
311
312 if (port_select.IsValid()) {
313 bool is_busy = true;
314 // Note: the behaviour on no or both ports selected are verified against real 3DS.
315 for (int i : port_select) {
316 is_busy &= ports[i].is_busy;
317 }
318 cmd_buff[1] = RESULT_SUCCESS.raw;
319 cmd_buff[2] = is_busy ? 1 : 0;
320 } else {
321 LOG_ERROR(Service_CAM, "invalid port_select=%u", port_select.m_val);
322 cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw;
323 }
324
325 cmd_buff[0] = IPC::MakeHeader(0x3, 2, 0);
326
327 LOG_DEBUG(Service_CAM, "called, port_select=%u", port_select.m_val);
328}
329
330void ClearBuffer(Service::Interface* self) {
331 u32* cmd_buff = Kernel::GetCommandBuffer();
332
333 const PortSet port_select(cmd_buff[1]);
334
335 cmd_buff[0] = IPC::MakeHeader(0x4, 1, 0);
41 cmd_buff[1] = RESULT_SUCCESS.raw; 336 cmd_buff[1] = RESULT_SUCCESS.raw;
42 337
43 LOG_WARNING(Service_CAM, "(STUBBED) called, port=%d", port); 338 LOG_WARNING(Service_CAM, "(STUBBED) called, port_select=%u", port_select.m_val);
44} 339}
45 340
46void GetVsyncInterruptEvent(Service::Interface* self) { 341void GetVsyncInterruptEvent(Service::Interface* self) {
47 u32* cmd_buff = Kernel::GetCommandBuffer(); 342 u32* cmd_buff = Kernel::GetCommandBuffer();
48 343
49 u8 port = cmd_buff[1] & 0xFF; 344 const PortSet port_select(cmd_buff[1]);
345
346 if (port_select.IsSingle()) {
347 int port = *port_select.begin();
348 cmd_buff[1] = RESULT_SUCCESS.raw;
349 cmd_buff[2] = IPC::CopyHandleDesc();
350 cmd_buff[3] = Kernel::g_handle_table.Create(ports[port].vsync_interrupt_event).MoveFrom();
351 } else {
352 LOG_ERROR(Service_CAM, "invalid port_select=%u", port_select.m_val);
353 cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw;
354 cmd_buff[2] = IPC::CopyHandleDesc();
355 cmd_buff[2] = 0;
356 }
50 357
51 cmd_buff[0] = IPC::MakeHeader(0x5, 1, 2); 358 cmd_buff[0] = IPC::MakeHeader(0x5, 1, 2);
52 cmd_buff[1] = RESULT_SUCCESS.raw;
53 cmd_buff[2] = IPC::CopyHandleDesc();
54 cmd_buff[3] = Kernel::g_handle_table.Create(vsync_interrupt_error_event).MoveFrom();
55 359
56 LOG_WARNING(Service_CAM, "(STUBBED) called, port=%d", port); 360 LOG_WARNING(Service_CAM, "(STUBBED) called, port_select=%u", port_select.m_val);
57} 361}
58 362
59void GetBufferErrorInterruptEvent(Service::Interface* self) { 363void GetBufferErrorInterruptEvent(Service::Interface* self) {
60 u32* cmd_buff = Kernel::GetCommandBuffer(); 364 u32* cmd_buff = Kernel::GetCommandBuffer();
61 365
62 u8 port = cmd_buff[1] & 0xFF; 366 const PortSet port_select(cmd_buff[1]);
63 367
64 cmd_buff[0] = IPC::MakeHeader(0x6, 1, 2); 368 if (port_select.IsSingle()) {
65 cmd_buff[1] = RESULT_SUCCESS.raw; 369 int port = *port_select.begin();
66 cmd_buff[2] = IPC::CopyHandleDesc(); 370 cmd_buff[1] = RESULT_SUCCESS.raw;
67 cmd_buff[3] = Kernel::g_handle_table.Create(interrupt_error_event).MoveFrom(); 371 cmd_buff[2] = IPC::CopyHandleDesc();
68 372 cmd_buff[3] =
69 LOG_WARNING(Service_CAM, "(STUBBED) called, port=%d", port); 373 Kernel::g_handle_table.Create(ports[port].buffer_error_interrupt_event).MoveFrom();
374 } else {
375 LOG_ERROR(Service_CAM, "invalid port_select=%u", port_select.m_val);
376 cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw;
377 cmd_buff[2] = IPC::CopyHandleDesc();
378 cmd_buff[2] = 0;
379 }
380
381 LOG_WARNING(Service_CAM, "(STUBBED) called, port_select=%u", port_select.m_val);
70} 382}
71 383
72void SetReceiving(Service::Interface* self) { 384void SetReceiving(Service::Interface* self) {
73 u32* cmd_buff = Kernel::GetCommandBuffer(); 385 u32* cmd_buff = Kernel::GetCommandBuffer();
74 386
75 VAddr dest = cmd_buff[1]; 387 const VAddr dest = cmd_buff[1];
76 u8 port = cmd_buff[2] & 0xFF; 388 const PortSet port_select(cmd_buff[2]);
77 u32 image_size = cmd_buff[3]; 389 const u32 image_size = cmd_buff[3];
78 u16 trans_unit = cmd_buff[4] & 0xFFFF; 390 const u32 trans_unit = cmd_buff[4] & 0xFFFF;
391
392 if (port_select.IsSingle()) {
393 int port_id = *port_select.begin();
394 PortConfig& port = ports[port_id];
395 CancelReceiving(port_id);
396 port.completion_event->Clear();
397 port.dest = dest;
398 port.dest_size = image_size;
399
400 if (port.is_busy) {
401 StartReceiving(port_id);
402 } else {
403 port.is_pending_receiving = true;
404 }
405
406 cmd_buff[1] = RESULT_SUCCESS.raw;
407 cmd_buff[2] = IPC::CopyHandleDesc();
408 cmd_buff[3] = Kernel::g_handle_table.Create(port.completion_event).MoveFrom();
409 } else {
410 LOG_ERROR(Service_CAM, "invalid port_select=%u", port_select.m_val);
411 cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw;
412 }
413
414 cmd_buff[0] = IPC::MakeHeader(0x7, 1, 2);
79 415
80 Kernel::Event* completion_event = 416 LOG_DEBUG(Service_CAM, "called, addr=0x%X, port_select=%u, image_size=%u, trans_unit=%u", dest,
81 (Port)port == Port::Cam2 ? completion_event_cam2.get() : completion_event_cam1.get(); 417 port_select.m_val, image_size, trans_unit);
418}
82 419
83 completion_event->Signal(); 420void IsFinishedReceiving(Service::Interface* self) {
421 u32* cmd_buff = Kernel::GetCommandBuffer();
84 422
85 cmd_buff[0] = IPC::MakeHeader(0x7, 1, 2); 423 const PortSet port_select(cmd_buff[1]);
86 cmd_buff[1] = RESULT_SUCCESS.raw; 424
87 cmd_buff[2] = IPC::CopyHandleDesc(); 425 if (port_select.IsSingle()) {
88 cmd_buff[3] = Kernel::g_handle_table.Create(completion_event).MoveFrom(); 426 int port = *port_select.begin();
427 cmd_buff[1] = RESULT_SUCCESS.raw;
428 cmd_buff[2] = (ports[port].is_receiving || ports[port].is_pending_receiving) ? 0 : 1;
429 } else {
430 LOG_ERROR(Service_CAM, "invalid port_select=%u", port_select.m_val);
431 cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw;
432 }
89 433
90 LOG_WARNING(Service_CAM, "(STUBBED) called, addr=0x%X, port=%d, image_size=%d, trans_unit=%d", 434 cmd_buff[0] = IPC::MakeHeader(0x8, 2, 0);
91 dest, port, image_size, trans_unit); 435
436 LOG_DEBUG(Service_CAM, "called, port_select=%u", port_select.m_val);
92} 437}
93 438
94void SetTransferLines(Service::Interface* self) { 439void SetTransferLines(Service::Interface* self) {
95 u32* cmd_buff = Kernel::GetCommandBuffer(); 440 u32* cmd_buff = Kernel::GetCommandBuffer();
96 441
97 u8 port = cmd_buff[1] & 0xFF; 442 const PortSet port_select(cmd_buff[1]);
98 u16 transfer_lines = cmd_buff[2] & 0xFFFF; 443 const u32 transfer_lines = cmd_buff[2] & 0xFFFF;
99 u16 width = cmd_buff[3] & 0xFFFF; 444 const u32 width = cmd_buff[3] & 0xFFFF;
100 u16 height = cmd_buff[4] & 0xFFFF; 445 const u32 height = cmd_buff[4] & 0xFFFF;
446
447 if (port_select.IsValid()) {
448 for (int i : port_select) {
449 ports[i].transfer_bytes = transfer_lines * width * 2;
450 }
451 cmd_buff[1] = RESULT_SUCCESS.raw;
452 } else {
453 LOG_ERROR(Service_CAM, "invalid port_select=%u", port_select.m_val);
454 cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw;
455 }
101 456
102 cmd_buff[0] = IPC::MakeHeader(0x9, 1, 0); 457 cmd_buff[0] = IPC::MakeHeader(0x9, 1, 0);
103 cmd_buff[1] = RESULT_SUCCESS.raw;
104 458
105 LOG_WARNING(Service_CAM, "(STUBBED) called, port=%d, lines=%d, width=%d, height=%d", port, 459 LOG_WARNING(Service_CAM, "(STUBBED) called, port_select=%u, lines=%u, width=%u, height=%u",
106 transfer_lines, width, height); 460 port_select.m_val, transfer_lines, width, height);
107} 461}
108 462
109void GetMaxLines(Service::Interface* self) { 463void GetMaxLines(Service::Interface* self) {
110 u32* cmd_buff = Kernel::GetCommandBuffer(); 464 u32* cmd_buff = Kernel::GetCommandBuffer();
111 465
112 u16 width = cmd_buff[1] & 0xFFFF; 466 const u32 width = cmd_buff[1] & 0xFFFF;
113 u16 height = cmd_buff[2] & 0xFFFF; 467 const u32 height = cmd_buff[2] & 0xFFFF;
468
469 // Note: the result of the algorithm below are hwtested with width < 640 and with height < 480
470 constexpr u32 MIN_TRANSFER_UNIT = 256;
471 constexpr u32 MAX_BUFFER_SIZE = 2560;
472 if (width * height * 2 % MIN_TRANSFER_UNIT != 0) {
473 cmd_buff[1] = ERROR_OUT_OF_RANGE.raw;
474 } else {
475 u32 lines = MAX_BUFFER_SIZE / width;
476 if (lines > height) {
477 lines = height;
478 }
479 cmd_buff[1] = RESULT_SUCCESS.raw;
480 while (height % lines != 0 || (lines * width * 2 % MIN_TRANSFER_UNIT != 0)) {
481 --lines;
482 if (lines == 0) {
483 cmd_buff[1] = ERROR_OUT_OF_RANGE.raw;
484 break;
485 }
486 }
487 cmd_buff[2] = lines;
488 }
114 489
115 cmd_buff[0] = IPC::MakeHeader(0xA, 2, 0); 490 cmd_buff[0] = IPC::MakeHeader(0xA, 2, 0);
116 cmd_buff[1] = RESULT_SUCCESS.raw;
117 cmd_buff[2] = TRANSFER_BYTES / (2 * width);
118 491
119 LOG_WARNING(Service_CAM, "(STUBBED) called, width=%d, height=%d, lines = %d", width, height, 492 LOG_DEBUG(Service_CAM, "called, width=%u, height=%u", width, height);
120 cmd_buff[2]); 493}
494
495void SetTransferBytes(Service::Interface* self) {
496 u32* cmd_buff = Kernel::GetCommandBuffer();
497
498 const PortSet port_select(cmd_buff[1]);
499 const u32 transfer_bytes = cmd_buff[2] & 0xFFFF;
500 const u32 width = cmd_buff[3] & 0xFFFF;
501 const u32 height = cmd_buff[4] & 0xFFFF;
502
503 if (port_select.IsValid()) {
504 for (int i : port_select) {
505 ports[i].transfer_bytes = transfer_bytes;
506 }
507 cmd_buff[1] = RESULT_SUCCESS.raw;
508 } else {
509 LOG_ERROR(Service_CAM, "invalid port_select=%u", port_select.m_val);
510 cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw;
511 }
512
513 cmd_buff[0] = IPC::MakeHeader(0xB, 1, 0);
514
515 LOG_WARNING(Service_CAM, "(STUBBED)called, port_select=%u, bytes=%u, width=%u, height=%u",
516 port_select.m_val, transfer_bytes, width, height);
121} 517}
122 518
123void GetTransferBytes(Service::Interface* self) { 519void GetTransferBytes(Service::Interface* self) {
124 u32* cmd_buff = Kernel::GetCommandBuffer(); 520 u32* cmd_buff = Kernel::GetCommandBuffer();
125 521
126 u8 port = cmd_buff[1] & 0xFF; 522 const PortSet port_select(cmd_buff[1]);
523
524 if (port_select.IsSingle()) {
525 int port = *port_select.begin();
526 cmd_buff[1] = RESULT_SUCCESS.raw;
527 cmd_buff[2] = ports[port].transfer_bytes;
528 } else {
529 LOG_ERROR(Service_CAM, "invalid port_select=%u", port_select.m_val);
530 cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw;
531 }
127 532
128 cmd_buff[0] = IPC::MakeHeader(0xC, 2, 0); 533 cmd_buff[0] = IPC::MakeHeader(0xC, 2, 0);
129 cmd_buff[1] = RESULT_SUCCESS.raw;
130 cmd_buff[2] = TRANSFER_BYTES;
131 534
132 LOG_WARNING(Service_CAM, "(STUBBED) called, port=%d", port); 535 LOG_WARNING(Service_CAM, "(STUBBED)called, port_select=%u", port_select.m_val);
536}
537
538void GetMaxBytes(Service::Interface* self) {
539 u32* cmd_buff = Kernel::GetCommandBuffer();
540
541 const u32 width = cmd_buff[1] & 0xFFFF;
542 const u32 height = cmd_buff[2] & 0xFFFF;
543
544 // Note: the result of the algorithm below are hwtested with width < 640 and with height < 480
545 constexpr u32 MIN_TRANSFER_UNIT = 256;
546 constexpr u32 MAX_BUFFER_SIZE = 2560;
547 if (width * height * 2 % MIN_TRANSFER_UNIT != 0) {
548 cmd_buff[1] = ERROR_OUT_OF_RANGE.raw;
549 } else {
550 u32 bytes = MAX_BUFFER_SIZE;
551
552 while (width * height * 2 % bytes != 0) {
553 bytes -= MIN_TRANSFER_UNIT;
554 }
555
556 cmd_buff[1] = RESULT_SUCCESS.raw;
557 cmd_buff[2] = bytes;
558 }
559 cmd_buff[0] = IPC::MakeHeader(0xD, 2, 0);
560
561 LOG_DEBUG(Service_CAM, "called, width=%u, height=%u", width, height);
133} 562}
134 563
135void SetTrimming(Service::Interface* self) { 564void SetTrimming(Service::Interface* self) {
136 u32* cmd_buff = Kernel::GetCommandBuffer(); 565 u32* cmd_buff = Kernel::GetCommandBuffer();
137 566
138 u8 port = cmd_buff[1] & 0xFF; 567 const PortSet port_select(cmd_buff[1]);
139 bool trim = (cmd_buff[2] & 0xFF) != 0; 568 const bool trim = (cmd_buff[2] & 0xFF) != 0;
569
570 if (port_select.IsValid()) {
571 for (int i : port_select) {
572 ports[i].is_trimming = trim;
573 }
574 cmd_buff[1] = RESULT_SUCCESS.raw;
575 } else {
576 LOG_ERROR(Service_CAM, "invalid port_select=%u", port_select.m_val);
577 cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw;
578 }
140 579
141 cmd_buff[0] = IPC::MakeHeader(0xE, 1, 0); 580 cmd_buff[0] = IPC::MakeHeader(0xE, 1, 0);
142 cmd_buff[1] = RESULT_SUCCESS.raw;
143 581
144 LOG_WARNING(Service_CAM, "(STUBBED) called, port=%d, trim=%d", port, trim); 582 LOG_DEBUG(Service_CAM, "called, port_select=%u, trim=%d", port_select.m_val, trim);
583}
584
585void IsTrimming(Service::Interface* self) {
586 u32* cmd_buff = Kernel::GetCommandBuffer();
587
588 const PortSet port_select(cmd_buff[1]);
589
590 if (port_select.IsSingle()) {
591 int port = *port_select.begin();
592 cmd_buff[1] = RESULT_SUCCESS.raw;
593 cmd_buff[2] = ports[port].is_trimming;
594 } else {
595 LOG_ERROR(Service_CAM, "invalid port_select=%u", port_select.m_val);
596 cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw;
597 }
598
599 cmd_buff[0] = IPC::MakeHeader(0xF, 2, 0);
600
601 LOG_DEBUG(Service_CAM, "called, port_select=%u", port_select.m_val);
602}
603
604void SetTrimmingParams(Service::Interface* self) {
605 u32* cmd_buff = Kernel::GetCommandBuffer();
606
607 const PortSet port_select(cmd_buff[1]);
608 const u16 x0 = static_cast<u16>(cmd_buff[2] & 0xFFFF);
609 const u16 y0 = static_cast<u16>(cmd_buff[3] & 0xFFFF);
610 const u16 x1 = static_cast<u16>(cmd_buff[4] & 0xFFFF);
611 const u16 y1 = static_cast<u16>(cmd_buff[5] & 0xFFFF);
612
613 if (port_select.IsValid()) {
614 for (int i : port_select) {
615 ports[i].x0 = x0;
616 ports[i].y0 = y0;
617 ports[i].x1 = x1;
618 ports[i].y1 = y1;
619 }
620 cmd_buff[1] = RESULT_SUCCESS.raw;
621 } else {
622 LOG_ERROR(Service_CAM, "invalid port_select=%u", port_select.m_val);
623 cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw;
624 }
625
626 cmd_buff[0] = IPC::MakeHeader(0x10, 1, 0);
627
628 LOG_DEBUG(Service_CAM, "called, port_select=%u, x0=%u, y0=%u, x1=%u, y1=%u", port_select.m_val,
629 x0, y0, x1, y1);
630}
631
632void GetTrimmingParams(Service::Interface* self) {
633 u32* cmd_buff = Kernel::GetCommandBuffer();
634
635 const PortSet port_select(cmd_buff[1]);
636
637 if (port_select.IsSingle()) {
638 int port = *port_select.begin();
639 cmd_buff[1] = RESULT_SUCCESS.raw;
640 cmd_buff[2] = ports[port].x0;
641 cmd_buff[3] = ports[port].y0;
642 cmd_buff[4] = ports[port].x1;
643 cmd_buff[5] = ports[port].y1;
644 } else {
645 LOG_ERROR(Service_CAM, "invalid port_select=%u", port_select.m_val);
646 cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw;
647 }
648
649 cmd_buff[0] = IPC::MakeHeader(0x11, 5, 0);
650
651 LOG_DEBUG(Service_CAM, "called, port_select=%u", port_select.m_val);
145} 652}
146 653
147void SetTrimmingParamsCenter(Service::Interface* self) { 654void SetTrimmingParamsCenter(Service::Interface* self) {
148 u32* cmd_buff = Kernel::GetCommandBuffer(); 655 u32* cmd_buff = Kernel::GetCommandBuffer();
149 656
150 u8 port = cmd_buff[1] & 0xFF; 657 const PortSet port_select(cmd_buff[1]);
151 s16 trimW = cmd_buff[2] & 0xFFFF; 658 const u16 trim_w = static_cast<u16>(cmd_buff[2] & 0xFFFF);
152 s16 trimH = cmd_buff[3] & 0xFFFF; 659 const u16 trim_h = static_cast<u16>(cmd_buff[3] & 0xFFFF);
153 s16 camW = cmd_buff[4] & 0xFFFF; 660 const u16 cam_w = static_cast<u16>(cmd_buff[4] & 0xFFFF);
154 s16 camH = cmd_buff[5] & 0xFFFF; 661 const u16 cam_h = static_cast<u16>(cmd_buff[5] & 0xFFFF);
662
663 if (port_select.IsValid()) {
664 for (int i : port_select) {
665 ports[i].x0 = (cam_w - trim_w) / 2;
666 ports[i].y0 = (cam_h - trim_h) / 2;
667 ports[i].x1 = ports[i].x0 + trim_w;
668 ports[i].y1 = ports[i].y0 + trim_h;
669 }
670 cmd_buff[1] = RESULT_SUCCESS.raw;
671 } else {
672 LOG_ERROR(Service_CAM, "invalid port_select=%u", port_select.m_val);
673 cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw;
674 }
155 675
156 cmd_buff[0] = IPC::MakeHeader(0x12, 1, 0); 676 cmd_buff[0] = IPC::MakeHeader(0x12, 1, 0);
157 cmd_buff[1] = RESULT_SUCCESS.raw;
158 677
159 LOG_WARNING(Service_CAM, "(STUBBED) called, port=%d, trimW=%d, trimH=%d, camW=%d, camH=%d", 678 LOG_DEBUG(Service_CAM, "called, port_select=%u, trim_w=%u, trim_h=%u, cam_w=%u, cam_h=%u",
160 port, trimW, trimH, camW, camH); 679 port_select.m_val, trim_w, trim_h, cam_w, cam_h);
161} 680}
162 681
163void Activate(Service::Interface* self) { 682void Activate(Service::Interface* self) {
164 u32* cmd_buff = Kernel::GetCommandBuffer(); 683 u32* cmd_buff = Kernel::GetCommandBuffer();
165 684
166 u8 cam_select = cmd_buff[1] & 0xFF; 685 const CameraSet camera_select(cmd_buff[1]);
686
687 if (camera_select.IsValid()) {
688 if (camera_select.m_val == 0) { // deactive all
689 for (int i = 0; i < 2; ++i) {
690 if (ports[i].is_busy) {
691 CancelReceiving(i);
692 cameras[ports[i].camera_id].impl->StopCapture();
693 ports[i].is_busy = false;
694 }
695 ports[i].is_active = false;
696 }
697 cmd_buff[1] = RESULT_SUCCESS.raw;
698 } else if (camera_select[0] && camera_select[1]) {
699 LOG_ERROR(Service_CAM, "camera 0 and 1 can't be both activated");
700 cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw;
701 } else {
702 if (camera_select[0]) {
703 ActivatePort(0, 0);
704 } else if (camera_select[1]) {
705 ActivatePort(0, 1);
706 }
707
708 if (camera_select[2]) {
709 ActivatePort(1, 2);
710 }
711 cmd_buff[1] = RESULT_SUCCESS.raw;
712 }
713 } else {
714 LOG_ERROR(Service_CAM, "invalid camera_select=%u", camera_select.m_val);
715 cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw;
716 }
167 717
168 cmd_buff[0] = IPC::MakeHeader(0x13, 1, 0); 718 cmd_buff[0] = IPC::MakeHeader(0x13, 1, 0);
169 cmd_buff[1] = RESULT_SUCCESS.raw;
170 719
171 LOG_WARNING(Service_CAM, "(STUBBED) called, cam_select=%d", cam_select); 720 LOG_DEBUG(Service_CAM, "called, camera_select=%u", camera_select.m_val);
721}
722
723void SwitchContext(Service::Interface* self) {
724 u32* cmd_buff = Kernel::GetCommandBuffer();
725
726 const CameraSet camera_select(cmd_buff[1]);
727 const ContextSet context_select(cmd_buff[2]);
728
729 if (camera_select.IsValid() && context_select.IsSingle()) {
730 int context = *context_select.begin();
731 for (int camera : camera_select) {
732 cameras[camera].current_context = context;
733 const ContextConfig& context_config = cameras[camera].contexts[context];
734 cameras[camera].impl->SetFlip(context_config.flip);
735 cameras[camera].impl->SetEffect(context_config.effect);
736 cameras[camera].impl->SetFormat(context_config.format);
737 cameras[camera].impl->SetResolution(context_config.resolution);
738 }
739 cmd_buff[1] = RESULT_SUCCESS.raw;
740 } else {
741 LOG_ERROR(Service_CAM, "invalid camera_select=%u, context_select=%u", camera_select.m_val,
742 context_select.m_val);
743 cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw;
744 }
745
746 cmd_buff[0] = IPC::MakeHeader(0x14, 1, 0);
747
748 LOG_DEBUG(Service_CAM, "called, camera_select=%u, context_select=%u", camera_select.m_val,
749 context_select.m_val);
172} 750}
173 751
174void FlipImage(Service::Interface* self) { 752void FlipImage(Service::Interface* self) {
175 u32* cmd_buff = Kernel::GetCommandBuffer(); 753 u32* cmd_buff = Kernel::GetCommandBuffer();
176 754
177 u8 cam_select = cmd_buff[1] & 0xFF; 755 const CameraSet camera_select(cmd_buff[1]);
178 u8 flip = cmd_buff[2] & 0xFF; 756 const Flip flip = static_cast<Flip>(cmd_buff[2] & 0xFF);
179 u8 context = cmd_buff[3] & 0xFF; 757 const ContextSet context_select(cmd_buff[3]);
758
759 if (camera_select.IsValid() && context_select.IsValid()) {
760 for (int camera : camera_select) {
761 for (int context : context_select) {
762 cameras[camera].contexts[context].flip = flip;
763 if (cameras[camera].current_context == context) {
764 cameras[camera].impl->SetFlip(flip);
765 }
766 }
767 }
768 cmd_buff[1] = RESULT_SUCCESS.raw;
769 } else {
770 LOG_ERROR(Service_CAM, "invalid camera_select=%u, context_select=%u", camera_select.m_val,
771 context_select.m_val);
772 cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw;
773 }
180 774
181 cmd_buff[0] = IPC::MakeHeader(0x1D, 1, 0); 775 cmd_buff[0] = IPC::MakeHeader(0x1D, 1, 0);
182 cmd_buff[1] = RESULT_SUCCESS.raw;
183 776
184 LOG_WARNING(Service_CAM, "(STUBBED) called, cam_select=%d, flip=%d, context=%d", cam_select, 777 LOG_DEBUG(Service_CAM, "called, camera_select=%u, flip=%d, context_select=%u",
185 flip, context); 778 camera_select.m_val, static_cast<int>(flip), context_select.m_val);
779}
780
781void SetDetailSize(Service::Interface* self) {
782 u32* cmd_buff = Kernel::GetCommandBuffer();
783
784 const CameraSet camera_select(cmd_buff[1]);
785 Resolution resolution;
786 resolution.width = static_cast<u16>(cmd_buff[2] & 0xFFFF);
787 resolution.height = static_cast<u16>(cmd_buff[3] & 0xFFFF);
788 resolution.crop_x0 = static_cast<u16>(cmd_buff[4] & 0xFFFF);
789 resolution.crop_y0 = static_cast<u16>(cmd_buff[5] & 0xFFFF);
790 resolution.crop_x1 = static_cast<u16>(cmd_buff[6] & 0xFFFF);
791 resolution.crop_y1 = static_cast<u16>(cmd_buff[7] & 0xFFFF);
792 const ContextSet context_select(cmd_buff[8]);
793
794 if (camera_select.IsValid() && context_select.IsValid()) {
795 for (int camera : camera_select) {
796 for (int context : context_select) {
797 cameras[camera].contexts[context].resolution = resolution;
798 if (cameras[camera].current_context == context) {
799 cameras[camera].impl->SetResolution(resolution);
800 }
801 }
802 }
803 cmd_buff[1] = RESULT_SUCCESS.raw;
804 } else {
805 LOG_ERROR(Service_CAM, "invalid camera_select=%u, context_select=%u", camera_select.m_val,
806 context_select.m_val);
807 cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw;
808 }
809
810 cmd_buff[0] = IPC::MakeHeader(0x1E, 1, 0);
811
812 LOG_DEBUG(Service_CAM, "called, camera_select=%u, width=%u, height=%u, crop_x0=%u, crop_y0=%u, "
813 "crop_x1=%u, crop_y1=%u, context_select=%u",
814 camera_select.m_val, resolution.width, resolution.height, resolution.crop_x0,
815 resolution.crop_y0, resolution.crop_x1, resolution.crop_y1, context_select.m_val);
186} 816}
187 817
188void SetSize(Service::Interface* self) { 818void SetSize(Service::Interface* self) {
189 u32* cmd_buff = Kernel::GetCommandBuffer(); 819 u32* cmd_buff = Kernel::GetCommandBuffer();
190 820
191 u8 cam_select = cmd_buff[1] & 0xFF; 821 const CameraSet camera_select(cmd_buff[1]);
192 u8 size = cmd_buff[2] & 0xFF; 822 const u32 size = cmd_buff[2] & 0xFF;
193 u8 context = cmd_buff[3] & 0xFF; 823 const ContextSet context_select(cmd_buff[3]);
824
825 if (camera_select.IsValid() && context_select.IsValid()) {
826 for (int camera : camera_select) {
827 for (int context : context_select) {
828 cameras[camera].contexts[context].resolution = PRESET_RESOLUTION[size];
829 if (cameras[camera].current_context == context) {
830 cameras[camera].impl->SetResolution(PRESET_RESOLUTION[size]);
831 }
832 }
833 }
834 cmd_buff[1] = RESULT_SUCCESS.raw;
835 } else {
836 LOG_ERROR(Service_CAM, "invalid camera_select=%u, context_select=%u", camera_select.m_val,
837 context_select.m_val);
838 cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw;
839 }
194 840
195 cmd_buff[0] = IPC::MakeHeader(0x1F, 1, 0); 841 cmd_buff[0] = IPC::MakeHeader(0x1F, 1, 0);
196 cmd_buff[1] = RESULT_SUCCESS.raw;
197 842
198 LOG_WARNING(Service_CAM, "(STUBBED) called, cam_select=%d, size=%d, context=%d", cam_select, 843 LOG_DEBUG(Service_CAM, "called, camera_select=%u, size=%u, context_select=%u",
199 size, context); 844 camera_select.m_val, size, context_select.m_val);
200} 845}
201 846
202void SetFrameRate(Service::Interface* self) { 847void SetFrameRate(Service::Interface* self) {
203 u32* cmd_buff = Kernel::GetCommandBuffer(); 848 u32* cmd_buff = Kernel::GetCommandBuffer();
204 849
205 u8 cam_select = cmd_buff[1] & 0xFF; 850 const CameraSet camera_select(cmd_buff[1]);
206 u8 frame_rate = cmd_buff[2] & 0xFF; 851 const FrameRate frame_rate = static_cast<FrameRate>(cmd_buff[2] & 0xFF);
852
853 if (camera_select.IsValid()) {
854 for (int camera : camera_select) {
855 cameras[camera].frame_rate = frame_rate;
856 // TODO(wwylele): consider hinting the actual camera with the expected frame rate
857 }
858 cmd_buff[1] = RESULT_SUCCESS.raw;
859 } else {
860 LOG_ERROR(Service_CAM, "invalid camera_select=%u", camera_select.m_val);
861 cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw;
862 }
207 863
208 cmd_buff[0] = IPC::MakeHeader(0x20, 1, 0); 864 cmd_buff[0] = IPC::MakeHeader(0x20, 1, 0);
865
866 LOG_WARNING(Service_CAM, "(STUBBED) called, camera_select=%u, frame_rate=%d",
867 camera_select.m_val, static_cast<int>(frame_rate));
868}
869
870void SetEffect(Service::Interface* self) {
871 u32* cmd_buff = Kernel::GetCommandBuffer();
872
873 const CameraSet camera_select(cmd_buff[1]);
874 const Effect effect = static_cast<Effect>(cmd_buff[2] & 0xFF);
875 const ContextSet context_select(cmd_buff[3]);
876
877 if (camera_select.IsValid() && context_select.IsValid()) {
878 for (int camera : camera_select) {
879 for (int context : context_select) {
880 cameras[camera].contexts[context].effect = effect;
881 if (cameras[camera].current_context == context) {
882 cameras[camera].impl->SetEffect(effect);
883 }
884 }
885 }
886 cmd_buff[1] = RESULT_SUCCESS.raw;
887 } else {
888 LOG_ERROR(Service_CAM, "invalid camera_select=%u, context_select=%u", camera_select.m_val,
889 context_select.m_val);
890 cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw;
891 }
892
893 cmd_buff[0] = IPC::MakeHeader(0x22, 1, 0);
894
895 LOG_DEBUG(Service_CAM, "called, camera_select=%u, effect=%d, context_select=%u",
896 camera_select.m_val, static_cast<int>(effect), context_select.m_val);
897}
898
899void SetOutputFormat(Service::Interface* self) {
900 u32* cmd_buff = Kernel::GetCommandBuffer();
901
902 const CameraSet camera_select(cmd_buff[1]);
903 const OutputFormat format = static_cast<OutputFormat>(cmd_buff[2] & 0xFF);
904 const ContextSet context_select(cmd_buff[3]);
905
906 if (camera_select.IsValid() && context_select.IsValid()) {
907 for (int camera : camera_select) {
908 for (int context : context_select) {
909 cameras[camera].contexts[context].format = format;
910 if (cameras[camera].current_context == context) {
911 cameras[camera].impl->SetFormat(format);
912 }
913 }
914 }
915 cmd_buff[1] = RESULT_SUCCESS.raw;
916 } else {
917 LOG_ERROR(Service_CAM, "invalid camera_select=%u, context_select=%u", camera_select.m_val,
918 context_select.m_val);
919 cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw;
920 }
921
922 cmd_buff[0] = IPC::MakeHeader(0x25, 1, 0);
923
924 LOG_DEBUG(Service_CAM, "called, camera_select=%u, format=%d, context_select=%u",
925 camera_select.m_val, static_cast<int>(format), context_select.m_val);
926}
927
928void SynchronizeVsyncTiming(Service::Interface* self) {
929 u32* cmd_buff = Kernel::GetCommandBuffer();
930
931 const u32 camera_select1 = cmd_buff[1] & 0xFF;
932 const u32 camera_select2 = cmd_buff[2] & 0xFF;
933
934 cmd_buff[0] = IPC::MakeHeader(0x29, 1, 0);
209 cmd_buff[1] = RESULT_SUCCESS.raw; 935 cmd_buff[1] = RESULT_SUCCESS.raw;
210 936
211 LOG_WARNING(Service_CAM, "(STUBBED) called, cam_select=%d, frame_rate=%d", cam_select, 937 LOG_WARNING(Service_CAM, "(STUBBED) called, camera_select1=%u, camera_select2=%u",
212 frame_rate); 938 camera_select1, camera_select2);
213} 939}
214 940
215void GetStereoCameraCalibrationData(Service::Interface* self) { 941void GetStereoCameraCalibrationData(Service::Interface* self) {
@@ -239,6 +965,67 @@ void GetStereoCameraCalibrationData(Service::Interface* self) {
239 LOG_TRACE(Service_CAM, "called"); 965 LOG_TRACE(Service_CAM, "called");
240} 966}
241 967
968void SetPackageParameterWithoutContext(Service::Interface* self) {
969 u32* cmd_buff = Kernel::GetCommandBuffer();
970
971 PackageParameterWithoutContext package;
972 std::memcpy(&package, cmd_buff + 1, sizeof(package));
973
974 cmd_buff[0] = IPC::MakeHeader(0x33, 1, 0);
975 cmd_buff[1] = RESULT_SUCCESS.raw;
976
977 LOG_WARNING(Service_CAM, "(STUBBED) called");
978}
979
980template <typename PackageParameterType, int command_id>
981static void SetPackageParameter() {
982 u32* cmd_buff = Kernel::GetCommandBuffer();
983
984 PackageParameterType package;
985 std::memcpy(&package, cmd_buff + 1, sizeof(package));
986
987 const CameraSet camera_select(static_cast<u32>(package.camera_select));
988 const ContextSet context_select(static_cast<u32>(package.context_select));
989
990 if (camera_select.IsValid() && context_select.IsValid()) {
991 for (int camera_id : camera_select) {
992 CameraConfig& camera = cameras[camera_id];
993 for (int context_id : context_select) {
994 ContextConfig& context = camera.contexts[context_id];
995 context.effect = package.effect;
996 context.flip = package.flip;
997 context.resolution = package.GetResolution();
998 if (context_id == camera.current_context) {
999 camera.impl->SetEffect(context.effect);
1000 camera.impl->SetFlip(context.flip);
1001 camera.impl->SetResolution(context.resolution);
1002 }
1003 }
1004 }
1005 cmd_buff[1] = RESULT_SUCCESS.raw;
1006 } else {
1007 LOG_ERROR(Service_CAM, "invalid camera_select=%u, context_select=%u", package.camera_select,
1008 package.context_select);
1009 cmd_buff[1] = ERROR_INVALID_ENUM_VALUE.raw;
1010 }
1011
1012 cmd_buff[0] = IPC::MakeHeader(command_id, 1, 0);
1013
1014 LOG_DEBUG(Service_CAM, "called");
1015}
1016
1017Resolution PackageParameterWithContext::GetResolution() {
1018 return PRESET_RESOLUTION[static_cast<int>(size)];
1019}
1020
1021void SetPackageParameterWithContext(Service::Interface* self) {
1022 SetPackageParameter<PackageParameterWithContext, 0x34>();
1023}
1024
1025void SetPackageParameterWithContextDetail(Service::Interface* self) {
1026 SetPackageParameter<PackageParameterWithContextDetail, 0x35>();
1027}
1028
242void GetSuitableY2rStandardCoefficient(Service::Interface* self) { 1029void GetSuitableY2rStandardCoefficient(Service::Interface* self) {
243 u32* cmd_buff = Kernel::GetCommandBuffer(); 1030 u32* cmd_buff = Kernel::GetCommandBuffer();
244 1031
@@ -263,24 +1050,50 @@ void PlayShutterSound(Service::Interface* self) {
263void DriverInitialize(Service::Interface* self) { 1050void DriverInitialize(Service::Interface* self) {
264 u32* cmd_buff = Kernel::GetCommandBuffer(); 1051 u32* cmd_buff = Kernel::GetCommandBuffer();
265 1052
266 completion_event_cam1->Clear(); 1053 for (int camera_id = 0; camera_id < NumCameras; ++camera_id) {
267 completion_event_cam2->Clear(); 1054 CameraConfig& camera = cameras[camera_id];
268 interrupt_error_event->Clear(); 1055 camera.current_context = 0;
269 vsync_interrupt_error_event->Clear(); 1056 for (int context_id = 0; context_id < 2; ++context_id) {
1057 // Note: the following default values are verified against real 3DS
1058 ContextConfig& context = camera.contexts[context_id];
1059 context.flip = camera_id == 1 ? Flip::Horizontal : Flip::None;
1060 context.effect = Effect::None;
1061 context.format = OutputFormat::YUV422;
1062 context.resolution =
1063 context_id == 0 ? PRESET_RESOLUTION[5 /*DS_LCD*/] : PRESET_RESOLUTION[0 /*VGA*/];
1064 }
1065 camera.impl = Camera::CreateCamera(Settings::values.camera_name[camera_id],
1066 Settings::values.camera_config[camera_id]);
1067 camera.impl->SetFlip(camera.contexts[0].flip);
1068 camera.impl->SetEffect(camera.contexts[0].effect);
1069 camera.impl->SetFormat(camera.contexts[0].format);
1070 camera.impl->SetResolution(camera.contexts[0].resolution);
1071 }
1072
1073 for (PortConfig& port : ports) {
1074 port.Clear();
1075 }
270 1076
271 cmd_buff[0] = IPC::MakeHeader(0x39, 1, 0); 1077 cmd_buff[0] = IPC::MakeHeader(0x39, 1, 0);
272 cmd_buff[1] = RESULT_SUCCESS.raw; 1078 cmd_buff[1] = RESULT_SUCCESS.raw;
273 1079
274 LOG_WARNING(Service_CAM, "(STUBBED) called"); 1080 LOG_DEBUG(Service_CAM, "called");
275} 1081}
276 1082
277void DriverFinalize(Service::Interface* self) { 1083void DriverFinalize(Service::Interface* self) {
278 u32* cmd_buff = Kernel::GetCommandBuffer(); 1084 u32* cmd_buff = Kernel::GetCommandBuffer();
279 1085
1086 CancelReceiving(0);
1087 CancelReceiving(1);
1088
1089 for (CameraConfig& camera : cameras) {
1090 camera.impl = nullptr;
1091 }
1092
280 cmd_buff[0] = IPC::MakeHeader(0x3A, 1, 0); 1093 cmd_buff[0] = IPC::MakeHeader(0x3A, 1, 0);
281 cmd_buff[1] = RESULT_SUCCESS.raw; 1094 cmd_buff[1] = RESULT_SUCCESS.raw;
282 1095
283 LOG_WARNING(Service_CAM, "(STUBBED) called"); 1096 LOG_DEBUG(Service_CAM, "called");
284} 1097}
285 1098
286void Init() { 1099void Init() {
@@ -291,21 +1104,28 @@ void Init() {
291 AddService(new CAM_S_Interface); 1104 AddService(new CAM_S_Interface);
292 AddService(new CAM_U_Interface); 1105 AddService(new CAM_U_Interface);
293 1106
294 completion_event_cam1 = 1107 for (PortConfig& port : ports) {
295 Kernel::Event::Create(ResetType::OneShot, "CAM_U::completion_event_cam1"); 1108 port.completion_event = Event::Create(ResetType::Sticky, "CAM_U::completion_event");
296 completion_event_cam2 = 1109 port.buffer_error_interrupt_event =
297 Kernel::Event::Create(ResetType::OneShot, "CAM_U::completion_event_cam2"); 1110 Event::Create(ResetType::OneShot, "CAM_U::buffer_error_interrupt_event");
298 interrupt_error_event = 1111 port.vsync_interrupt_event =
299 Kernel::Event::Create(ResetType::OneShot, "CAM_U::interrupt_error_event"); 1112 Event::Create(ResetType::OneShot, "CAM_U::vsync_interrupt_event");
300 vsync_interrupt_error_event = 1113 }
301 Kernel::Event::Create(ResetType::OneShot, "CAM_U::vsync_interrupt_error_event"); 1114 completion_event_callback =
1115 CoreTiming::RegisterEvent("CAM_U::CompletionEventCallBack", CompletionEventCallBack);
302} 1116}
303 1117
304void Shutdown() { 1118void Shutdown() {
305 completion_event_cam1 = nullptr; 1119 CancelReceiving(0);
306 completion_event_cam2 = nullptr; 1120 CancelReceiving(1);
307 interrupt_error_event = nullptr; 1121 for (PortConfig& port : ports) {
308 vsync_interrupt_error_event = nullptr; 1122 port.completion_event = nullptr;
1123 port.buffer_error_interrupt_event = nullptr;
1124 port.vsync_interrupt_event = nullptr;
1125 }
1126 for (CameraConfig& camera : cameras) {
1127 camera.impl = nullptr;
1128 }
309} 1129}
310 1130
311} // namespace CAM 1131} // namespace CAM
diff --git a/src/core/hle/service/cam/cam.h b/src/core/hle/service/cam/cam.h
index c9b6f8acf..34a9c8479 100644
--- a/src/core/hle/service/cam/cam.h
+++ b/src/core/hle/service/cam/cam.h
@@ -13,17 +13,12 @@
13namespace Service { 13namespace Service {
14namespace CAM { 14namespace CAM {
15 15
16enum class Port : u8 { None = 0, Cam1 = 1, Cam2 = 2, Both = Cam1 | Cam2 }; 16enum CameraIndex {
17 OuterRightCamera = 0,
18 InnerCamera = 1,
19 OuterLeftCamera = 2,
17 20
18enum class CameraSelect : u8 { 21 NumCameras = 3,
19 None = 0,
20 Out1 = 1,
21 In1 = 2,
22 Out2 = 4,
23 In1Out1 = Out1 | In1,
24 Out1Out2 = Out1 | Out2,
25 In1Out2 = In1 | Out2,
26 All = Out1 | In1 | Out2,
27}; 22};
28 23
29enum class Effect : u8 { 24enum class Effect : u8 {
@@ -35,13 +30,6 @@ enum class Effect : u8 {
35 Sepia01 = 5, 30 Sepia01 = 5,
36}; 31};
37 32
38enum class Context : u8 {
39 None = 0,
40 A = 1,
41 B = 2,
42 Both = A | B,
43};
44
45enum class Flip : u8 { 33enum class Flip : u8 {
46 None = 0, 34 None = 0,
47 Horizontal = 1, 35 Horizontal = 1,
@@ -160,8 +148,23 @@ struct StereoCameraCalibrationData {
160static_assert(sizeof(StereoCameraCalibrationData) == 64, 148static_assert(sizeof(StereoCameraCalibrationData) == 64,
161 "StereoCameraCalibrationData structure size is wrong"); 149 "StereoCameraCalibrationData structure size is wrong");
162 150
163struct PackageParameterCameraSelect { 151/**
164 CameraSelect camera; 152 * Resolution parameters for the camera.
153 * The native resolution of 3DS camera is 640 * 480. The captured image will be cropped in the
154 * region [crop_x0, crop_x1] * [crop_y0, crop_y1], and then scaled to size width * height as the
155 * output image. Note that all cropping coordinates are inclusive.
156 */
157struct Resolution {
158 u16 width;
159 u16 height;
160 u16 crop_x0;
161 u16 crop_y0;
162 u16 crop_x1;
163 u16 crop_y1;
164};
165
166struct PackageParameterWithoutContext {
167 u8 camera_select;
165 s8 exposure; 168 s8 exposure;
166 WhiteBalance white_balance; 169 WhiteBalance white_balance;
167 s8 sharpness; 170 s8 sharpness;
@@ -183,14 +186,43 @@ struct PackageParameterCameraSelect {
183 s16 auto_white_balance_window_height; 186 s16 auto_white_balance_window_height;
184}; 187};
185 188
186static_assert(sizeof(PackageParameterCameraSelect) == 28, 189static_assert(sizeof(PackageParameterWithoutContext) == 28,
187 "PackageParameterCameraSelect structure size is wrong"); 190 "PackageParameterCameraWithoutContext structure size is wrong");
191
192struct PackageParameterWithContext {
193 u8 camera_select;
194 u8 context_select;
195 Flip flip;
196 Effect effect;
197 Size size;
198 INSERT_PADDING_BYTES(3);
199
200 Resolution GetResolution();
201};
202
203static_assert(sizeof(PackageParameterWithContext) == 8,
204 "PackageParameterWithContext structure size is wrong");
205
206struct PackageParameterWithContextDetail {
207 u8 camera_select;
208 u8 context_select;
209 Flip flip;
210 Effect effect;
211 Resolution resolution;
212
213 Resolution GetResolution() {
214 return resolution;
215 }
216};
217
218static_assert(sizeof(PackageParameterWithContextDetail) == 16,
219 "PackageParameterWithContextDetail structure size is wrong");
188 220
189/** 221/**
190 * Unknown 222 * Starts capturing at the selected port.
191 * Inputs: 223 * Inputs:
192 * 0: 0x00010040 224 * 0: 0x00010040
193 * 1: u8 Camera port (`Port` enum) 225 * 1: u8 selected port
194 * Outputs: 226 * Outputs:
195 * 0: 0x00010040 227 * 0: 0x00010040
196 * 1: ResultCode 228 * 1: ResultCode
@@ -198,10 +230,10 @@ static_assert(sizeof(PackageParameterCameraSelect) == 28,
198void StartCapture(Service::Interface* self); 230void StartCapture(Service::Interface* self);
199 231
200/** 232/**
201 * Unknown 233 * Stops capturing from the selected port.
202 * Inputs: 234 * Inputs:
203 * 0: 0x00020040 235 * 0: 0x00020040
204 * 1: u8 Camera port (`Port` enum) 236 * 1: u8 selected port
205 * Outputs: 237 * Outputs:
206 * 0: 0x00020040 238 * 0: 0x00020040
207 * 1: ResultCode 239 * 1: ResultCode
@@ -209,10 +241,33 @@ void StartCapture(Service::Interface* self);
209void StopCapture(Service::Interface* self); 241void StopCapture(Service::Interface* self);
210 242
211/** 243/**
244 * Gets whether the selected port is currently capturing.
245 * Inputs:
246 * 0: 0x00030040
247 * 1: u8 selected port
248 * Outputs:
249 * 0: 0x00030080
250 * 1: ResultCode
251 * 2: 0 if not capturing, 1 if capturing
252 */
253void IsBusy(Service::Interface* self);
254
255/**
256 * Clears the buffer of selected ports.
257 * Inputs:
258 * 0: 0x00040040
259 * 1: u8 selected port
260 * Outputs:
261 * 0: 0x00040040
262 * 2: ResultCode
263 */
264void ClearBuffer(Service::Interface* self);
265
266/**
212 * Unknown 267 * Unknown
213 * Inputs: 268 * Inputs:
214 * 0: 0x00050040 269 * 0: 0x00050040
215 * 1: u8 Camera port (`Port` enum) 270 * 1: u8 selected port
216 * Outputs: 271 * Outputs:
217 * 0: 0x00050042 272 * 0: 0x00050042
218 * 1: ResultCode 273 * 1: ResultCode
@@ -225,7 +280,7 @@ void GetVsyncInterruptEvent(Service::Interface* self);
225 * Unknown 280 * Unknown
226 * Inputs: 281 * Inputs:
227 * 0: 0x00060040 282 * 0: 0x00060040
228 * 1: u8 Camera port (`Port` enum) 283 * 1: u8 selected port
229 * Outputs: 284 * Outputs:
230 * 0: 0x00060042 285 * 0: 0x00060042
231 * 1: ResultCode 286 * 1: ResultCode
@@ -241,9 +296,9 @@ void GetBufferErrorInterruptEvent(Service::Interface* self);
241 * Inputs: 296 * Inputs:
242 * 0: 0x00070102 297 * 0: 0x00070102
243 * 1: Destination address in calling process 298 * 1: Destination address in calling process
244 * 2: u8 Camera port (`Port` enum) 299 * 2: u8 selected port
245 * 3: Image size (in bytes?) 300 * 3: Image size (in bytes)
246 * 4: u16 Transfer unit size (in bytes?) 301 * 4: u16 Transfer unit size (in bytes)
247 * 5: Descriptor: Handle 302 * 5: Descriptor: Handle
248 * 6: Handle to destination process 303 * 6: Handle to destination process
249 * Outputs: 304 * Outputs:
@@ -255,21 +310,34 @@ void GetBufferErrorInterruptEvent(Service::Interface* self);
255void SetReceiving(Service::Interface* self); 310void SetReceiving(Service::Interface* self);
256 311
257/** 312/**
258 * Unknown 313 * Gets whether the selected port finished receiving a frame.
314 * Inputs:
315 * 0: 0x00080040
316 * 1: u8 selected port
317 * Outputs:
318 * 0: 0x00080080
319 * 1: ResultCode
320 * 2: 0 if not finished, 1 if finished
321 */
322void IsFinishedReceiving(Service::Interface* self);
323
324/**
325 * Sets the number of lines the buffer contains.
259 * Inputs: 326 * Inputs:
260 * 0: 0x00090100 327 * 0: 0x00090100
261 * 1: u8 Camera port (`Port` enum) 328 * 1: u8 selected port
262 * 2: u16 Number of lines to transfer 329 * 2: u16 Number of lines to transfer
263 * 3: u16 Width 330 * 3: u16 Width
264 * 4: u16 Height 331 * 4: u16 Height
265 * Outputs: 332 * Outputs:
266 * 0: 0x00090040 333 * 0: 0x00090040
267 * 1: ResultCode 334 * 1: ResultCode
335 * @todo figure out how the "buffer" actually works.
268 */ 336 */
269void SetTransferLines(Service::Interface* self); 337void SetTransferLines(Service::Interface* self);
270 338
271/** 339/**
272 * Unknown 340 * Gets the maximum number of lines that fit in the buffer
273 * Inputs: 341 * Inputs:
274 * 0: 0x000A0080 342 * 0: 0x000A0080
275 * 1: u16 Width 343 * 1: u16 Width
@@ -277,27 +345,58 @@ void SetTransferLines(Service::Interface* self);
277 * Outputs: 345 * Outputs:
278 * 0: 0x000A0080 346 * 0: 0x000A0080
279 * 1: ResultCode 347 * 1: ResultCode
280 * 2: Maximum number of lines that fit in the buffer(?) 348 * 2: Maximum number of lines that fit in the buffer
349 * @todo figure out how the "buffer" actually works.
281 */ 350 */
282void GetMaxLines(Service::Interface* self); 351void GetMaxLines(Service::Interface* self);
283 352
284/** 353/**
285 * Unknown 354 * Sets the number of bytes the buffer contains.
355 * Inputs:
356 * 0: 0x000B0100
357 * 1: u8 selected port
358 * 2: u16 Number of bytes to transfer
359 * 3: u16 Width
360 * 4: u16 Height
361 * Outputs:
362 * 0: 0x000B0040
363 * 1: ResultCode
364 * @todo figure out how the "buffer" actually works.
365 */
366void SetTransferBytes(Service::Interface* self);
367
368/**
369 * Gets the number of bytes to the buffer contains.
286 * Inputs: 370 * Inputs:
287 * 0: 0x000C0040 371 * 0: 0x000C0040
288 * 1: u8 Camera port (`Port` enum) 372 * 1: u8 selected port
289 * Outputs: 373 * Outputs:
290 * 0: 0x000C0080 374 * 0: 0x000C0080
291 * 1: ResultCode 375 * 1: ResultCode
292 * 2: Total number of bytes for each frame with current settings(?) 376 * 2: The number of bytes the buffer contains
377 * @todo figure out how the "buffer" actually works.
293 */ 378 */
294void GetTransferBytes(Service::Interface* self); 379void GetTransferBytes(Service::Interface* self);
295 380
296/** 381/**
297 * Unknown 382 * Gets the maximum number of bytes that fit in the buffer.
383 * Inputs:
384 * 0: 0x000D0080
385 * 1: u16 Width
386 * 2: u16 Height
387 * Outputs:
388 * 0: 0x000D0080
389 * 1: ResultCode
390 * 2: Maximum number of bytes that fit in the buffer
391 * @todo figure out how the "buffer" actually works.
392 */
393void GetMaxBytes(Service::Interface* self);
394
395/**
396 * Enables or disables trimming.
298 * Inputs: 397 * Inputs:
299 * 0: 0x000E0080 398 * 0: 0x000E0080
300 * 1: u8 Camera port (`Port` enum) 399 * 1: u8 selected port
301 * 2: u8 bool Enable trimming if true 400 * 2: u8 bool Enable trimming if true
302 * Outputs: 401 * Outputs:
303 * 0: 0x000E0040 402 * 0: 0x000E0040
@@ -306,14 +405,58 @@ void GetTransferBytes(Service::Interface* self);
306void SetTrimming(Service::Interface* self); 405void SetTrimming(Service::Interface* self);
307 406
308/** 407/**
309 * Unknown 408 * Gets whether trimming is enabled.
409 * Inputs:
410 * 0: 0x000F0040
411 * 1: u8 selected port
412 * Outputs:
413 * 0: 0x000F0080
414 * 1: ResultCode
415 * 2: u8 bool Enable trimming if true
416 */
417void IsTrimming(Service::Interface* self);
418
419/**
420 * Sets the position to trim.
421 * Inputs:
422 * 0: 0x00100140
423 * 1: u8 selected port
424 * 2: x start
425 * 3: y start
426 * 4: x end (exclusive)
427 * 5: y end (exclusive)
428 * Outputs:
429 * 0: 0x00100040
430 * 1: ResultCode
431 */
432void SetTrimmingParams(Service::Interface* self);
433
434/**
435 * Gets the position to trim.
436 * Inputs:
437 * 0: 0x00110040
438 * 1: u8 selected port
439 *
440 * Outputs:
441 * 0: 0x00110140
442 * 1: ResultCode
443 * 2: x start
444 * 3: y start
445 * 4: x end (exclusive)
446 * 5: y end (exclusive)
447 */
448void GetTrimmingParams(Service::Interface* self);
449
450/**
451 * Sets the position to trim by giving the width and height. The trimming window is always at the
452 * center.
310 * Inputs: 453 * Inputs:
311 * 0: 0x00120140 454 * 0: 0x00120140
312 * 1: u8 Camera port (`Port` enum) 455 * 1: u8 selected port
313 * 2: s16 Trim width(?) 456 * 2: s16 Trim width
314 * 3: s16 Trim height(?) 457 * 3: s16 Trim height
315 * 4: s16 Camera width(?) 458 * 4: s16 Camera width
316 * 5: s16 Camera height(?) 459 * 5: s16 Camera height
317 * Outputs: 460 * Outputs:
318 * 0: 0x00120040 461 * 0: 0x00120040
319 * 1: ResultCode 462 * 1: ResultCode
@@ -324,7 +467,7 @@ void SetTrimmingParamsCenter(Service::Interface* self);
324 * Selects up to two physical cameras to enable. 467 * Selects up to two physical cameras to enable.
325 * Inputs: 468 * Inputs:
326 * 0: 0x00130040 469 * 0: 0x00130040
327 * 1: u8 Cameras to activate (`CameraSelect` enum) 470 * 1: u8 selected camera
328 * Outputs: 471 * Outputs:
329 * 0: 0x00130040 472 * 0: 0x00130040
330 * 1: ResultCode 473 * 1: ResultCode
@@ -332,12 +475,24 @@ void SetTrimmingParamsCenter(Service::Interface* self);
332void Activate(Service::Interface* self); 475void Activate(Service::Interface* self);
333 476
334/** 477/**
335 * Unknown 478 * Switches the context of camera settings.
479 * Inputs:
480 * 0: 0x00140080
481 * 1: u8 selected camera
482 * 2: u8 selected context
483 * Outputs:
484 * 0: 0x00140040
485 * 1: ResultCode
486 */
487void SwitchContext(Service::Interface* self);
488
489/**
490 * Sets flipping of images
336 * Inputs: 491 * Inputs:
337 * 0: 0x001D00C0 492 * 0: 0x001D00C0
338 * 1: u8 Camera select (`CameraSelect` enum) 493 * 1: u8 selected camera
339 * 2: u8 Type of flipping to perform (`Flip` enum) 494 * 2: u8 Type of flipping to perform (`Flip` enum)
340 * 3: u8 Context (`Context` enum) 495 * 3: u8 selected context
341 * Outputs: 496 * Outputs:
342 * 0: 0x001D0040 497 * 0: 0x001D0040
343 * 1: ResultCode 498 * 1: ResultCode
@@ -345,12 +500,30 @@ void Activate(Service::Interface* self);
345void FlipImage(Service::Interface* self); 500void FlipImage(Service::Interface* self);
346 501
347/** 502/**
348 * Unknown 503 * Sets camera resolution from custom parameters. For more details see the Resolution struct.
504 * Inputs:
505 * 0: 0x001E0200
506 * 1: u8 selected camera
507 * 2: width
508 * 3: height
509 * 4: crop x0
510 * 5: crop y0
511 * 6: crop x1
512 * 7: crop y1
513 * 8: u8 selected context
514 * Outputs:
515 * 0: 0x001E0040
516 * 1: ResultCode
517 */
518void SetDetailSize(Service::Interface* self);
519
520/**
521 * Sets camera resolution from preset resolution parameters.
349 * Inputs: 522 * Inputs:
350 * 0: 0x001F00C0 523 * 0: 0x001F00C0
351 * 1: u8 Camera select (`CameraSelect` enum) 524 * 1: u8 selected camera
352 * 2: u8 Camera frame resolution (`Size` enum) 525 * 2: u8 Camera frame resolution (`Size` enum)
353 * 3: u8 Context id (`Context` enum) 526 * 3: u8 selected context
354 * Outputs: 527 * Outputs:
355 * 0: 0x001F0040 528 * 0: 0x001F0040
356 * 1: ResultCode 529 * 1: ResultCode
@@ -358,10 +531,10 @@ void FlipImage(Service::Interface* self);
358void SetSize(Service::Interface* self); 531void SetSize(Service::Interface* self);
359 532
360/** 533/**
361 * Unknown 534 * Sets camera framerate.
362 * Inputs: 535 * Inputs:
363 * 0: 0x00200080 536 * 0: 0x00200080
364 * 1: u8 Camera select (`CameraSelect` enum) 537 * 1: u8 selected camera
365 * 2: u8 Camera framerate (`FrameRate` enum) 538 * 2: u8 Camera framerate (`FrameRate` enum)
366 * Outputs: 539 * Outputs:
367 * 0: 0x00200040 540 * 0: 0x00200040
@@ -370,6 +543,44 @@ void SetSize(Service::Interface* self);
370void SetFrameRate(Service::Interface* self); 543void SetFrameRate(Service::Interface* self);
371 544
372/** 545/**
546 * Sets effect on the output image
547 * Inputs:
548 * 0: 0x002200C0
549 * 1: u8 selected camera
550 * 2: u8 image effect (`Effect` enum)
551 * 3: u8 selected context
552 * Outputs:
553 * 0: 0x00220040
554 * 1: ResultCode
555 */
556void SetEffect(Service::Interface* self);
557
558/**
559 * Sets format of the output image
560 * Inputs:
561 * 0: 0x002500C0
562 * 1: u8 selected camera
563 * 2: u8 image format (`OutputFormat` enum)
564 * 3: u8 selected context
565 * Outputs:
566 * 0: 0x00250040
567 * 1: ResultCode
568 */
569void SetOutputFormat(Service::Interface* self);
570
571/**
572 * Synchronizes the V-Sync timing of two cameras.
573 * Inputs:
574 * 0: 0x00290080
575 * 1: u8 selected camera 1
576 * 2: u8 selected camera 2
577 * Outputs:
578 * 0: 0x00280040
579 * 1: ResultCode
580 */
581void SynchronizeVsyncTiming(Service::Interface* self);
582
583/**
373 * Returns calibration data relating the outside cameras to eachother, for use in AR applications. 584 * Returns calibration data relating the outside cameras to eachother, for use in AR applications.
374 * 585 *
375 * Inputs: 586 * Inputs:
@@ -382,6 +593,45 @@ void SetFrameRate(Service::Interface* self);
382void GetStereoCameraCalibrationData(Service::Interface* self); 593void GetStereoCameraCalibrationData(Service::Interface* self);
383 594
384/** 595/**
596 * Batch-configures context-free settings.
597 *
598 * Inputs:
599 * 0: 0x003302C0
600 * 1-7: struct PachageParameterWithoutContext
601 * 8-11: unused
602 * Outputs:
603 * 0: 0x00330040
604 * 1: ResultCode
605 */
606void SetPackageParameterWithoutContext(Service::Interface* self);
607
608/**
609 * Batch-configures context-related settings with preset resolution parameters.
610 *
611 * Inputs:
612 * 0: 0x00340140
613 * 1-2: struct PackageParameterWithContext
614 * 3-5: unused
615 * Outputs:
616 * 0: 0x00340040
617 * 1: ResultCode
618 */
619void SetPackageParameterWithContext(Service::Interface* self);
620
621/**
622 * Batch-configures context-related settings with custom resolution parameters
623 *
624 * Inputs:
625 * 0: 0x003501C0
626 * 1-4: struct PackageParameterWithContextDetail
627 * 5-7: unused
628 * Outputs:
629 * 0: 0x00350040
630 * 1: ResultCode
631 */
632void SetPackageParameterWithContextDetail(Service::Interface* self);
633
634/**
385 * Unknown 635 * Unknown
386 * Inputs: 636 * Inputs:
387 * 0: 0x00360000 637 * 0: 0x00360000
diff --git a/src/core/hle/service/cam/cam_u.cpp b/src/core/hle/service/cam/cam_u.cpp
index af2123e5b..251c1e6d4 100644
--- a/src/core/hle/service/cam/cam_u.cpp
+++ b/src/core/hle/service/cam/cam_u.cpp
@@ -11,24 +11,24 @@ namespace CAM {
11const Interface::FunctionInfo FunctionTable[] = { 11const Interface::FunctionInfo FunctionTable[] = {
12 {0x00010040, StartCapture, "StartCapture"}, 12 {0x00010040, StartCapture, "StartCapture"},
13 {0x00020040, StopCapture, "StopCapture"}, 13 {0x00020040, StopCapture, "StopCapture"},
14 {0x00030040, nullptr, "IsBusy"}, 14 {0x00030040, IsBusy, "IsBusy"},
15 {0x00040040, nullptr, "ClearBuffer"}, 15 {0x00040040, ClearBuffer, "ClearBuffer"},
16 {0x00050040, GetVsyncInterruptEvent, "GetVsyncInterruptEvent"}, 16 {0x00050040, GetVsyncInterruptEvent, "GetVsyncInterruptEvent"},
17 {0x00060040, GetBufferErrorInterruptEvent, "GetBufferErrorInterruptEvent"}, 17 {0x00060040, GetBufferErrorInterruptEvent, "GetBufferErrorInterruptEvent"},
18 {0x00070102, SetReceiving, "SetReceiving"}, 18 {0x00070102, SetReceiving, "SetReceiving"},
19 {0x00080040, nullptr, "IsFinishedReceiving"}, 19 {0x00080040, IsFinishedReceiving, "IsFinishedReceiving"},
20 {0x00090100, SetTransferLines, "SetTransferLines"}, 20 {0x00090100, SetTransferLines, "SetTransferLines"},
21 {0x000A0080, GetMaxLines, "GetMaxLines"}, 21 {0x000A0080, GetMaxLines, "GetMaxLines"},
22 {0x000B0100, nullptr, "SetTransferBytes"}, 22 {0x000B0100, SetTransferBytes, "SetTransferBytes"},
23 {0x000C0040, GetTransferBytes, "GetTransferBytes"}, 23 {0x000C0040, GetTransferBytes, "GetTransferBytes"},
24 {0x000D0080, nullptr, "GetMaxBytes"}, 24 {0x000D0080, GetMaxBytes, "GetMaxBytes"},
25 {0x000E0080, SetTrimming, "SetTrimming"}, 25 {0x000E0080, SetTrimming, "SetTrimming"},
26 {0x000F0040, nullptr, "IsTrimming"}, 26 {0x000F0040, IsTrimming, "IsTrimming"},
27 {0x00100140, nullptr, "SetTrimmingParams"}, 27 {0x00100140, SetTrimmingParams, "SetTrimmingParams"},
28 {0x00110040, nullptr, "GetTrimmingParams"}, 28 {0x00110040, GetTrimmingParams, "GetTrimmingParams"},
29 {0x00120140, SetTrimmingParamsCenter, "SetTrimmingParamsCenter"}, 29 {0x00120140, SetTrimmingParamsCenter, "SetTrimmingParamsCenter"},
30 {0x00130040, Activate, "Activate"}, 30 {0x00130040, Activate, "Activate"},
31 {0x00140080, nullptr, "SwitchContext"}, 31 {0x00140080, SwitchContext, "SwitchContext"},
32 {0x00150080, nullptr, "SetExposure"}, 32 {0x00150080, nullptr, "SetExposure"},
33 {0x00160080, nullptr, "SetWhiteBalance"}, 33 {0x00160080, nullptr, "SetWhiteBalance"},
34 {0x00170080, nullptr, "SetWhiteBalanceWithoutBaseUp"}, 34 {0x00170080, nullptr, "SetWhiteBalanceWithoutBaseUp"},
@@ -38,18 +38,18 @@ const Interface::FunctionInfo FunctionTable[] = {
38 {0x001B0080, nullptr, "SetAutoWhiteBalance"}, 38 {0x001B0080, nullptr, "SetAutoWhiteBalance"},
39 {0x001C0040, nullptr, "IsAutoWhiteBalance"}, 39 {0x001C0040, nullptr, "IsAutoWhiteBalance"},
40 {0x001D00C0, FlipImage, "FlipImage"}, 40 {0x001D00C0, FlipImage, "FlipImage"},
41 {0x001E0200, nullptr, "SetDetailSize"}, 41 {0x001E0200, SetDetailSize, "SetDetailSize"},
42 {0x001F00C0, SetSize, "SetSize"}, 42 {0x001F00C0, SetSize, "SetSize"},
43 {0x00200080, SetFrameRate, "SetFrameRate"}, 43 {0x00200080, SetFrameRate, "SetFrameRate"},
44 {0x00210080, nullptr, "SetPhotoMode"}, 44 {0x00210080, nullptr, "SetPhotoMode"},
45 {0x002200C0, nullptr, "SetEffect"}, 45 {0x002200C0, SetEffect, "SetEffect"},
46 {0x00230080, nullptr, "SetContrast"}, 46 {0x00230080, nullptr, "SetContrast"},
47 {0x00240080, nullptr, "SetLensCorrection"}, 47 {0x00240080, nullptr, "SetLensCorrection"},
48 {0x002500C0, nullptr, "SetOutputFormat"}, 48 {0x002500C0, SetOutputFormat, "SetOutputFormat"},
49 {0x00260140, nullptr, "SetAutoExposureWindow"}, 49 {0x00260140, nullptr, "SetAutoExposureWindow"},
50 {0x00270140, nullptr, "SetAutoWhiteBalanceWindow"}, 50 {0x00270140, nullptr, "SetAutoWhiteBalanceWindow"},
51 {0x00280080, nullptr, "SetNoiseFilter"}, 51 {0x00280080, nullptr, "SetNoiseFilter"},
52 {0x00290080, nullptr, "SynchronizeVsyncTiming"}, 52 {0x00290080, SynchronizeVsyncTiming, "SynchronizeVsyncTiming"},
53 {0x002A0080, nullptr, "GetLatestVsyncTiming"}, 53 {0x002A0080, nullptr, "GetLatestVsyncTiming"},
54 {0x002B0000, GetStereoCameraCalibrationData, "GetStereoCameraCalibrationData"}, 54 {0x002B0000, GetStereoCameraCalibrationData, "GetStereoCameraCalibrationData"},
55 {0x002C0400, nullptr, "SetStereoCameraCalibrationData"}, 55 {0x002C0400, nullptr, "SetStereoCameraCalibrationData"},
@@ -59,9 +59,9 @@ const Interface::FunctionInfo FunctionTable[] = {
59 {0x00300080, nullptr, "ReadMcuVariableI2cExclusive"}, 59 {0x00300080, nullptr, "ReadMcuVariableI2cExclusive"},
60 {0x00310180, nullptr, "SetImageQualityCalibrationData"}, 60 {0x00310180, nullptr, "SetImageQualityCalibrationData"},
61 {0x00320000, nullptr, "GetImageQualityCalibrationData"}, 61 {0x00320000, nullptr, "GetImageQualityCalibrationData"},
62 {0x003302C0, nullptr, "SetPackageParameterWithoutContext"}, 62 {0x003302C0, SetPackageParameterWithoutContext, "SetPackageParameterWithoutContext"},
63 {0x00340140, nullptr, "SetPackageParameterWithContext"}, 63 {0x00340140, SetPackageParameterWithContext, "SetPackageParameterWithContext"},
64 {0x003501C0, nullptr, "SetPackageParameterWithContextDetail"}, 64 {0x003501C0, SetPackageParameterWithContextDetail, "SetPackageParameterWithContextDetail"},
65 {0x00360000, GetSuitableY2rStandardCoefficient, "GetSuitableY2rStandardCoefficient"}, 65 {0x00360000, GetSuitableY2rStandardCoefficient, "GetSuitableY2rStandardCoefficient"},
66 {0x00370202, nullptr, "PlayShutterSoundWithWave"}, 66 {0x00370202, nullptr, "PlayShutterSoundWithWave"},
67 {0x00380040, PlayShutterSound, "PlayShutterSound"}, 67 {0x00380040, PlayShutterSound, "PlayShutterSound"},
diff --git a/src/core/hle/service/cfg/cfg.cpp b/src/core/hle/service/cfg/cfg.cpp
index 59dd6d1cd..4ddb1bc90 100644
--- a/src/core/hle/service/cfg/cfg.cpp
+++ b/src/core/hle/service/cfg/cfg.cpp
@@ -3,6 +3,8 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include <algorithm>
6#include <array>
7#include <cryptopp/sha.h>
6#include "common/file_util.h" 8#include "common/file_util.h"
7#include "common/logging/log.h" 9#include "common/logging/log.h"
8#include "common/string_util.h" 10#include "common/string_util.h"
@@ -176,14 +178,29 @@ void SecureInfoGetRegion(Service::Interface* self) {
176} 178}
177 179
178void GenHashConsoleUnique(Service::Interface* self) { 180void GenHashConsoleUnique(Service::Interface* self) {
179 u32* cmd_buff = Kernel::GetCommandBuffer(); 181 IPC::RequestParser rp(Kernel::GetCommandBuffer(), 0x03, 1, 0);
180 u32 app_id_salt = cmd_buff[1]; 182 const u32 app_id_salt = rp.Pop<u32>() & 0x000FFFFF;
181 183
182 cmd_buff[1] = RESULT_SUCCESS.raw; 184 IPC::RequestBuilder rb = rp.MakeBuilder(3, 0);
183 cmd_buff[2] = 0x33646D6F ^ (app_id_salt & 0xFFFFF); // 3dmoo hash 185
184 cmd_buff[3] = 0x6F534841 ^ (app_id_salt & 0xFFFFF); 186 std::array<u8, 12> buffer;
187 const ResultCode result = GetConfigInfoBlock(ConsoleUniqueID2BlockID, 8, 2, buffer.data());
188 rb.Push(result);
189 if (result.IsSuccess()) {
190 std::memcpy(&buffer[8], &app_id_salt, sizeof(u32));
191 std::array<u8, CryptoPP::SHA256::DIGESTSIZE> hash;
192 CryptoPP::SHA256().CalculateDigest(hash.data(), buffer.data(), sizeof(buffer));
193 u32 low, high;
194 memcpy(&low, &hash[hash.size() - 8], sizeof(u32));
195 memcpy(&high, &hash[hash.size() - 4], sizeof(u32));
196 rb.Push(low);
197 rb.Push(high);
198 } else {
199 rb.Push<u32>(0);
200 rb.Push<u32>(0);
201 }
185 202
186 LOG_WARNING(Service_CFG, "(STUBBED) called app_id_salt=0x%X", app_id_salt); 203 LOG_DEBUG(Service_CFG, "called app_id_salt=0x%X", app_id_salt);
187} 204}
188 205
189void GetRegionCanadaUSA(Service::Interface* self) { 206void GetRegionCanadaUSA(Service::Interface* self) {
@@ -322,47 +339,11 @@ static ResultVal<void*> GetConfigInfoBlockPointer(u32 block_id, u32 size, u32 fl
322 return MakeResult<void*>(pointer); 339 return MakeResult<void*>(pointer);
323} 340}
324 341
325/// Checks if the language is available in the chosen region, and returns a proper one
326static u8 AdjustLanguageInfoBlock(u32 region, u8 language) {
327 static const std::array<std::vector<u8>, 7> region_languages{{
328 // JPN
329 {LANGUAGE_JP},
330 // USA
331 {LANGUAGE_EN, LANGUAGE_FR, LANGUAGE_ES, LANGUAGE_PT},
332 // EUR
333 {LANGUAGE_EN, LANGUAGE_FR, LANGUAGE_DE, LANGUAGE_IT, LANGUAGE_ES, LANGUAGE_NL, LANGUAGE_PT,
334 LANGUAGE_RU},
335 // AUS
336 {LANGUAGE_EN, LANGUAGE_FR, LANGUAGE_DE, LANGUAGE_IT, LANGUAGE_ES, LANGUAGE_NL, LANGUAGE_PT,
337 LANGUAGE_RU},
338 // CHN
339 {LANGUAGE_ZH},
340 // KOR
341 {LANGUAGE_KO},
342 // TWN
343 {LANGUAGE_TW},
344 }};
345 const auto& available = region_languages[region];
346 if (std::find(available.begin(), available.end(), language) == available.end()) {
347 return available[0];
348 }
349 return language;
350}
351
352ResultCode GetConfigInfoBlock(u32 block_id, u32 size, u32 flag, void* output) { 342ResultCode GetConfigInfoBlock(u32 block_id, u32 size, u32 flag, void* output) {
353 void* pointer; 343 void* pointer;
354 CASCADE_RESULT(pointer, GetConfigInfoBlockPointer(block_id, size, flag)); 344 CASCADE_RESULT(pointer, GetConfigInfoBlockPointer(block_id, size, flag));
355 memcpy(output, pointer, size); 345 memcpy(output, pointer, size);
356 346
357 // override the language setting if the region setting is auto
358 if (block_id == LanguageBlockID &&
359 Settings::values.region_value == Settings::REGION_VALUE_AUTO_SELECT) {
360 u8 language;
361 memcpy(&language, output, sizeof(u8));
362 language = AdjustLanguageInfoBlock(preferred_region_code, language);
363 memcpy(output, &language, sizeof(u8));
364 }
365
366 return RESULT_SUCCESS; 347 return RESULT_SUCCESS;
367} 348}
368 349
@@ -586,9 +567,47 @@ void Init() {
586 567
587void Shutdown() {} 568void Shutdown() {}
588 569
570/// Checks if the language is available in the chosen region, and returns a proper one
571static SystemLanguage AdjustLanguageInfoBlock(u32 region, SystemLanguage language) {
572 static const std::array<std::vector<SystemLanguage>, 7> region_languages{{
573 // JPN
574 {LANGUAGE_JP},
575 // USA
576 {LANGUAGE_EN, LANGUAGE_FR, LANGUAGE_ES, LANGUAGE_PT},
577 // EUR
578 {LANGUAGE_EN, LANGUAGE_FR, LANGUAGE_DE, LANGUAGE_IT, LANGUAGE_ES, LANGUAGE_NL, LANGUAGE_PT,
579 LANGUAGE_RU},
580 // AUS
581 {LANGUAGE_EN, LANGUAGE_FR, LANGUAGE_DE, LANGUAGE_IT, LANGUAGE_ES, LANGUAGE_NL, LANGUAGE_PT,
582 LANGUAGE_RU},
583 // CHN
584 {LANGUAGE_ZH},
585 // KOR
586 {LANGUAGE_KO},
587 // TWN
588 {LANGUAGE_TW},
589 }};
590 const auto& available = region_languages[region];
591 if (std::find(available.begin(), available.end(), language) == available.end()) {
592 return available[0];
593 }
594 return language;
595}
596
589void SetPreferredRegionCode(u32 region_code) { 597void SetPreferredRegionCode(u32 region_code) {
590 preferred_region_code = region_code; 598 preferred_region_code = region_code;
591 LOG_INFO(Service_CFG, "Preferred region code set to %u", preferred_region_code); 599 LOG_INFO(Service_CFG, "Preferred region code set to %u", preferred_region_code);
600
601 if (Settings::values.region_value == Settings::REGION_VALUE_AUTO_SELECT) {
602 const SystemLanguage current_language = GetSystemLanguage();
603 const SystemLanguage adjusted_language =
604 AdjustLanguageInfoBlock(region_code, current_language);
605 if (current_language != adjusted_language) {
606 LOG_WARNING(Service_CFG, "System language %d does not fit the region. Adjusted to %d",
607 static_cast<int>(current_language), static_cast<int>(adjusted_language));
608 SetSystemLanguage(adjusted_language);
609 }
610 }
592} 611}
593 612
594void SetUsername(const std::u16string& name) { 613void SetUsername(const std::u16string& name) {
diff --git a/src/core/hle/service/err_f.cpp b/src/core/hle/service/err_f.cpp
index cd0a1a598..9da55f328 100644
--- a/src/core/hle/service/err_f.cpp
+++ b/src/core/hle/service/err_f.cpp
@@ -227,6 +227,8 @@ static void ThrowFatalError(Interface* self) {
227 LOG_CRITICAL(Service_ERR, "FINST2: 0x%08X", 227 LOG_CRITICAL(Service_ERR, "FINST2: 0x%08X",
228 errtype.exception_data.exception_info.fpinst2); 228 errtype.exception_data.exception_info.fpinst2);
229 break; 229 break;
230 case ExceptionType::Undefined:
231 break; // Not logging exception_info for this case
230 } 232 }
231 LOG_CRITICAL(Service_ERR, "Datetime: %s", GetCurrentSystemTime().c_str()); 233 LOG_CRITICAL(Service_ERR, "Datetime: %s", GetCurrentSystemTime().c_str());
232 break; 234 break;
diff --git a/src/core/hle/service/fs/archive.h b/src/core/hle/service/fs/archive.h
index 519c1f3a9..2ea956e0b 100644
--- a/src/core/hle/service/fs/archive.h
+++ b/src/core/hle/service/fs/archive.h
@@ -26,7 +26,7 @@ namespace FS {
26 26
27/// Supported archive types 27/// Supported archive types
28enum class ArchiveIdCode : u32 { 28enum class ArchiveIdCode : u32 {
29 RomFS = 0x00000003, 29 SelfNCCH = 0x00000003,
30 SaveData = 0x00000004, 30 SaveData = 0x00000004,
31 ExtSaveData = 0x00000006, 31 ExtSaveData = 0x00000006,
32 SharedExtSaveData = 0x00000007, 32 SharedExtSaveData = 0x00000007,
diff --git a/src/core/hle/service/fs/fs_user.cpp b/src/core/hle/service/fs/fs_user.cpp
index 337da1387..33b290699 100644
--- a/src/core/hle/service/fs/fs_user.cpp
+++ b/src/core/hle/service/fs/fs_user.cpp
@@ -54,15 +54,17 @@ static void Initialize(Service::Interface* self) {
54 * 3 : File handle 54 * 3 : File handle
55 */ 55 */
56static void OpenFile(Service::Interface* self) { 56static void OpenFile(Service::Interface* self) {
57 u32* cmd_buff = Kernel::GetCommandBuffer(); 57 // The helper should be passed by argument to the function
58 IPC::RequestParser rp(Kernel::GetCommandBuffer(), {0x080201C2});
59 rp.Pop<u32>(); // Always 0 ?
58 60
59 ArchiveHandle archive_handle = MakeArchiveHandle(cmd_buff[2], cmd_buff[3]); 61 ArchiveHandle archive_handle = rp.Pop<u64>();
60 auto filename_type = static_cast<FileSys::LowPathType>(cmd_buff[4]); 62 auto filename_type = static_cast<FileSys::LowPathType>(rp.Pop<u32>());
61 u32 filename_size = cmd_buff[5]; 63 u32 filename_size = rp.Pop<u32>();
62 FileSys::Mode mode; 64 FileSys::Mode mode;
63 mode.hex = cmd_buff[6]; 65 mode.hex = rp.Pop<u32>();
64 u32 attributes = cmd_buff[7]; // TODO(Link Mauve): do something with those attributes. 66 u32 attributes = rp.Pop<u32>(); // TODO(Link Mauve): do something with those attributes.
65 u32 filename_ptr = cmd_buff[9]; 67 VAddr filename_ptr = rp.PopStaticBuffer();
66 FileSys::Path file_path(filename_type, filename_size, filename_ptr); 68 FileSys::Path file_path(filename_type, filename_size, filename_ptr);
67 69
68 LOG_DEBUG(Service_FS, "path=%s, mode=%u attrs=%u", file_path.DebugStr().c_str(), mode.hex, 70 LOG_DEBUG(Service_FS, "path=%s, mode=%u attrs=%u", file_path.DebugStr().c_str(), mode.hex,
@@ -70,16 +72,17 @@ static void OpenFile(Service::Interface* self) {
70 72
71 ResultVal<std::shared_ptr<File>> file_res = 73 ResultVal<std::shared_ptr<File>> file_res =
72 OpenFileFromArchive(archive_handle, file_path, mode); 74 OpenFileFromArchive(archive_handle, file_path, mode);
73 cmd_buff[1] = file_res.Code().raw; 75 IPC::RequestBuilder rb = rp.MakeBuilder(1, 2);
76 rb.Push(file_res.Code());
74 if (file_res.Succeeded()) { 77 if (file_res.Succeeded()) {
75 std::shared_ptr<File> file = *file_res; 78 std::shared_ptr<File> file = *file_res;
76 auto sessions = ServerSession::CreateSessionPair(file->GetName(), file); 79 auto sessions = ServerSession::CreateSessionPair(file->GetName(), file);
77 file->ClientConnected(std::get<Kernel::SharedPtr<Kernel::ServerSession>>(sessions)); 80 file->ClientConnected(std::get<Kernel::SharedPtr<Kernel::ServerSession>>(sessions));
78 cmd_buff[3] = Kernel::g_handle_table 81 rb.PushMoveHandles(Kernel::g_handle_table
79 .Create(std::get<Kernel::SharedPtr<Kernel::ClientSession>>(sessions)) 82 .Create(std::get<Kernel::SharedPtr<Kernel::ClientSession>>(sessions))
80 .MoveFrom(); 83 .MoveFrom());
81 } else { 84 } else {
82 cmd_buff[3] = 0; 85 rb.PushMoveHandles(0);
83 LOG_ERROR(Service_FS, "failed to get a handle for file %s", file_path.DebugStr().c_str()); 86 LOG_ERROR(Service_FS, "failed to get a handle for file %s", file_path.DebugStr().c_str());
84 } 87 }
85} 88}
diff --git a/src/core/hle/service/gsp_gpu.cpp b/src/core/hle/service/gsp_gpu.cpp
index a8c1331ed..a960778a7 100644
--- a/src/core/hle/service/gsp_gpu.cpp
+++ b/src/core/hle/service/gsp_gpu.cpp
@@ -4,6 +4,7 @@
4 4
5#include "common/bit_field.h" 5#include "common/bit_field.h"
6#include "common/microprofile.h" 6#include "common/microprofile.h"
7#include "core/core.h"
7#include "core/hle/kernel/event.h" 8#include "core/hle/kernel/event.h"
8#include "core/hle/kernel/shared_memory.h" 9#include "core/hle/kernel/shared_memory.h"
9#include "core/hle/result.h" 10#include "core/hle/result.h"
@@ -118,10 +119,10 @@ static ResultCode WriteHWRegs(u32 base_address, u32 size_in_bytes, VAddr data_va
118 * Updates sequential GSP GPU hardware registers using parallel arrays of source data and masks. 119 * Updates sequential GSP GPU hardware registers using parallel arrays of source data and masks.
119 * For each register, the value is updated only where the mask is high 120 * For each register, the value is updated only where the mask is high
120 * 121 *
121 * @param base_address The address of the first register in the sequence 122 * @param base_address The address of the first register in the sequence
122 * @param size_in_bytes The number of registers to update (size of data) 123 * @param size_in_bytes The number of registers to update (size of data)
123 * @param data A pointer to the source data to use for updates 124 * @param data_vaddr A virtual address to the source data to use for updates
124 * @param masks A pointer to the masks 125 * @param masks_vaddr A virtual address to the masks
125 * @return RESULT_SUCCESS if the parameters are valid, error code otherwise 126 * @return RESULT_SUCCESS if the parameters are valid, error code otherwise
126 */ 127 */
127static ResultCode WriteHWRegsWithMask(u32 base_address, u32 size_in_bytes, VAddr data_vaddr, 128static ResultCode WriteHWRegsWithMask(u32 base_address, u32 size_in_bytes, VAddr data_vaddr,
@@ -280,6 +281,7 @@ ResultCode SetBufferSwap(u32 screen_id, const FrameBufferInfo& info) {
280 281
281 if (screen_id == 0) { 282 if (screen_id == 0) {
282 MicroProfileFlip(); 283 MicroProfileFlip();
284 Core::System::GetInstance().perf_stats.EndGameFrame();
283 } 285 }
284 286
285 return RESULT_SUCCESS; 287 return RESULT_SUCCESS;
@@ -705,6 +707,33 @@ static void ReleaseRight(Interface* self) {
705 LOG_WARNING(Service_GSP, "called"); 707 LOG_WARNING(Service_GSP, "called");
706} 708}
707 709
710/**
711 * GSP_GPU::StoreDataCache service function
712 *
713 * This Function is a no-op, We aren't emulating the CPU cache any time soon.
714 *
715 * Inputs:
716 * 0 : Header code [0x001F0082]
717 * 1 : Address
718 * 2 : Size
719 * 3 : Value 0, some descriptor for the KProcess Handle
720 * 4 : KProcess handle
721 * Outputs:
722 * 1 : Result of function, 0 on success, otherwise error code
723 */
724static void StoreDataCache(Interface* self) {
725 u32* cmd_buff = Kernel::GetCommandBuffer();
726 u32 address = cmd_buff[1];
727 u32 size = cmd_buff[2];
728 u32 process = cmd_buff[4];
729
730 cmd_buff[0] = IPC::MakeHeader(0x1F, 0x1, 0);
731 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
732
733 LOG_DEBUG(Service_GSP, "(STUBBED) called address=0x%08X, size=0x%08X, process=0x%08X", address,
734 size, process);
735}
736
708const Interface::FunctionInfo FunctionTable[] = { 737const Interface::FunctionInfo FunctionTable[] = {
709 {0x00010082, WriteHWRegs, "WriteHWRegs"}, 738 {0x00010082, WriteHWRegs, "WriteHWRegs"},
710 {0x00020084, WriteHWRegsWithMask, "WriteHWRegsWithMask"}, 739 {0x00020084, WriteHWRegsWithMask, "WriteHWRegsWithMask"},
@@ -736,7 +765,7 @@ const Interface::FunctionInfo FunctionTable[] = {
736 {0x001C0040, nullptr, "SetLedForceOff"}, 765 {0x001C0040, nullptr, "SetLedForceOff"},
737 {0x001D0040, nullptr, "SetTestCommand"}, 766 {0x001D0040, nullptr, "SetTestCommand"},
738 {0x001E0080, nullptr, "SetInternalPriorities"}, 767 {0x001E0080, nullptr, "SetInternalPriorities"},
739 {0x001F0082, nullptr, "StoreDataCache"}, 768 {0x001F0082, StoreDataCache, "StoreDataCache"},
740}; 769};
741 770
742GSP_GPU::GSP_GPU() { 771GSP_GPU::GSP_GPU() {
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index f14ab3811..fb3acb507 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -32,8 +32,8 @@ static u32 next_touch_index;
32static u32 next_accelerometer_index; 32static u32 next_accelerometer_index;
33static u32 next_gyroscope_index; 33static u32 next_gyroscope_index;
34 34
35static int enable_accelerometer_count = 0; // positive means enabled 35static int enable_accelerometer_count; // positive means enabled
36static int enable_gyroscope_count = 0; // positive means enabled 36static int enable_gyroscope_count; // positive means enabled
37 37
38static int pad_update_event; 38static int pad_update_event;
39static int accelerometer_update_event; 39static int accelerometer_update_event;
@@ -323,6 +323,9 @@ void Init() {
323 next_accelerometer_index = 0; 323 next_accelerometer_index = 0;
324 next_gyroscope_index = 0; 324 next_gyroscope_index = 0;
325 325
326 enable_accelerometer_count = 0;
327 enable_gyroscope_count = 0;
328
326 // Create event handles 329 // Create event handles
327 event_pad_or_touch_1 = Event::Create(ResetType::OneShot, "HID:EventPadOrTouch1"); 330 event_pad_or_touch_1 = Event::Create(ResetType::OneShot, "HID:EventPadOrTouch1");
328 event_pad_or_touch_2 = Event::Create(ResetType::OneShot, "HID:EventPadOrTouch2"); 331 event_pad_or_touch_2 = Event::Create(ResetType::OneShot, "HID:EventPadOrTouch2");
diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h
index 21e66dfe0..c7f4ee138 100644
--- a/src/core/hle/service/hid/hid.h
+++ b/src/core/hle/service/hid/hid.h
@@ -42,8 +42,6 @@ struct PadState {
42 BitField<14, 1, u32> zl; 42 BitField<14, 1, u32> zl;
43 BitField<15, 1, u32> zr; 43 BitField<15, 1, u32> zr;
44 44
45 BitField<20, 1, u32> touch;
46
47 BitField<24, 1, u32> c_right; 45 BitField<24, 1, u32> c_right;
48 BitField<25, 1, u32> c_left; 46 BitField<25, 1, u32> c_left;
49 BitField<26, 1, u32> c_up; 47 BitField<26, 1, u32> c_up;
@@ -203,8 +201,6 @@ const PadState PAD_Y = {{1u << 11}};
203const PadState PAD_ZL = {{1u << 14}}; 201const PadState PAD_ZL = {{1u << 14}};
204const PadState PAD_ZR = {{1u << 15}}; 202const PadState PAD_ZR = {{1u << 15}};
205 203
206const PadState PAD_TOUCH = {{1u << 20}};
207
208const PadState PAD_C_RIGHT = {{1u << 24}}; 204const PadState PAD_C_RIGHT = {{1u << 24}};
209const PadState PAD_C_LEFT = {{1u << 25}}; 205const PadState PAD_C_LEFT = {{1u << 25}};
210const PadState PAD_C_UP = {{1u << 26}}; 206const PadState PAD_C_UP = {{1u << 26}};
diff --git a/src/core/hle/service/ir/ir.cpp b/src/core/hle/service/ir/ir.cpp
index 7f1731a50..7ac34a990 100644
--- a/src/core/hle/service/ir/ir.cpp
+++ b/src/core/hle/service/ir/ir.cpp
@@ -2,9 +2,6 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "core/hle/kernel/event.h"
6#include "core/hle/kernel/kernel.h"
7#include "core/hle/kernel/shared_memory.h"
8#include "core/hle/service/ir/ir.h" 5#include "core/hle/service/ir/ir.h"
9#include "core/hle/service/ir/ir_rst.h" 6#include "core/hle/service/ir/ir_rst.h"
10#include "core/hle/service/ir/ir_u.h" 7#include "core/hle/service/ir/ir_u.h"
@@ -14,101 +11,18 @@
14namespace Service { 11namespace Service {
15namespace IR { 12namespace IR {
16 13
17static Kernel::SharedPtr<Kernel::Event> handle_event;
18static Kernel::SharedPtr<Kernel::Event> conn_status_event;
19static Kernel::SharedPtr<Kernel::SharedMemory> shared_memory;
20static Kernel::SharedPtr<Kernel::SharedMemory> transfer_shared_memory;
21
22void GetHandles(Service::Interface* self) {
23 u32* cmd_buff = Kernel::GetCommandBuffer();
24
25 cmd_buff[1] = RESULT_SUCCESS.raw;
26 cmd_buff[2] = 0x4000000;
27 cmd_buff[3] = Kernel::g_handle_table.Create(Service::IR::shared_memory).MoveFrom();
28 cmd_buff[4] = Kernel::g_handle_table.Create(Service::IR::handle_event).MoveFrom();
29}
30
31void InitializeIrNopShared(Interface* self) {
32 u32* cmd_buff = Kernel::GetCommandBuffer();
33
34 u32 transfer_buff_size = cmd_buff[1];
35 u32 recv_buff_size = cmd_buff[2];
36 u32 unk1 = cmd_buff[3];
37 u32 send_buff_size = cmd_buff[4];
38 u32 unk2 = cmd_buff[5];
39 u8 baud_rate = cmd_buff[6] & 0xFF;
40 Kernel::Handle handle = cmd_buff[8];
41
42 if (Kernel::g_handle_table.IsValid(handle)) {
43 transfer_shared_memory = Kernel::g_handle_table.Get<Kernel::SharedMemory>(handle);
44 transfer_shared_memory->name = "IR:TransferSharedMemory";
45 }
46
47 cmd_buff[1] = RESULT_SUCCESS.raw;
48
49 LOG_WARNING(Service_IR, "(STUBBED) called, transfer_buff_size=%d, recv_buff_size=%d, "
50 "unk1=%d, send_buff_size=%d, unk2=%d, baud_rate=%u, handle=0x%08X",
51 transfer_buff_size, recv_buff_size, unk1, send_buff_size, unk2, baud_rate, handle);
52}
53
54void RequireConnection(Interface* self) {
55 u32* cmd_buff = Kernel::GetCommandBuffer();
56
57 conn_status_event->Signal();
58
59 cmd_buff[1] = RESULT_SUCCESS.raw;
60
61 LOG_WARNING(Service_IR, "(STUBBED) called");
62}
63
64void Disconnect(Interface* self) {
65 u32* cmd_buff = Kernel::GetCommandBuffer();
66
67 cmd_buff[1] = RESULT_SUCCESS.raw;
68
69 LOG_WARNING(Service_IR, "(STUBBED) called");
70}
71
72void GetConnectionStatusEvent(Interface* self) {
73 u32* cmd_buff = Kernel::GetCommandBuffer();
74
75 cmd_buff[1] = RESULT_SUCCESS.raw;
76 cmd_buff[3] = Kernel::g_handle_table.Create(Service::IR::conn_status_event).MoveFrom();
77
78 LOG_WARNING(Service_IR, "(STUBBED) called");
79}
80
81void FinalizeIrNop(Interface* self) {
82 u32* cmd_buff = Kernel::GetCommandBuffer();
83
84 cmd_buff[1] = RESULT_SUCCESS.raw;
85
86 LOG_WARNING(Service_IR, "(STUBBED) called");
87}
88
89void Init() { 14void Init() {
90 using namespace Kernel;
91
92 AddService(new IR_RST_Interface); 15 AddService(new IR_RST_Interface);
93 AddService(new IR_U_Interface); 16 AddService(new IR_U_Interface);
94 AddService(new IR_User_Interface); 17 AddService(new IR_User_Interface);
95 18
96 using Kernel::MemoryPermission; 19 InitUser();
97 shared_memory = SharedMemory::Create(nullptr, 0x1000, Kernel::MemoryPermission::ReadWrite, 20 InitRST();
98 Kernel::MemoryPermission::ReadWrite, 0,
99 Kernel::MemoryRegion::BASE, "IR:SharedMemory");
100 transfer_shared_memory = nullptr;
101
102 // Create event handle(s)
103 handle_event = Event::Create(ResetType::OneShot, "IR:HandleEvent");
104 conn_status_event = Event::Create(ResetType::OneShot, "IR:ConnectionStatusEvent");
105} 21}
106 22
107void Shutdown() { 23void Shutdown() {
108 transfer_shared_memory = nullptr; 24 ShutdownUser();
109 shared_memory = nullptr; 25 ShutdownRST();
110 handle_event = nullptr;
111 conn_status_event = nullptr;
112} 26}
113 27
114} // namespace IR 28} // namespace IR
diff --git a/src/core/hle/service/ir/ir.h b/src/core/hle/service/ir/ir.h
index 72d44ce60..c741498e2 100644
--- a/src/core/hle/service/ir/ir.h
+++ b/src/core/hle/service/ir/ir.h
@@ -10,63 +10,6 @@ class Interface;
10 10
11namespace IR { 11namespace IR {
12 12
13/**
14 * IR::GetHandles service function
15 * Outputs:
16 * 1 : Result of function, 0 on success, otherwise error code
17 * 2 : Translate header, used by the ARM11-kernel
18 * 3 : Shared memory handle
19 * 4 : Event handle
20 */
21void GetHandles(Interface* self);
22
23/**
24 * IR::InitializeIrNopShared service function
25 * Inputs:
26 * 1 : Size of transfer buffer
27 * 2 : Recv buffer size
28 * 3 : unknown
29 * 4 : Send buffer size
30 * 5 : unknown
31 * 6 : BaudRate (u8)
32 * 7 : 0
33 * 8 : Handle of transfer shared memory
34 * Outputs:
35 * 1 : Result of function, 0 on success, otherwise error code
36 */
37void InitializeIrNopShared(Interface* self);
38
39/**
40 * IR::FinalizeIrNop service function
41 * Outputs:
42 * 1 : Result of function, 0 on success, otherwise error code
43 */
44void FinalizeIrNop(Interface* self);
45
46/**
47 * IR::GetConnectionStatusEvent service function
48 * Outputs:
49 * 1 : Result of function, 0 on success, otherwise error code
50 * 2 : Connection Status Event handle
51 */
52void GetConnectionStatusEvent(Interface* self);
53
54/**
55 * IR::Disconnect service function
56 * Outputs:
57 * 1 : Result of function, 0 on success, otherwise error code
58 */
59void Disconnect(Interface* self);
60
61/**
62 * IR::RequireConnection service function
63 * Inputs:
64 * 1 : unknown (u8), looks like always 1
65 * Outputs:
66 * 1 : Result of function, 0 on success, otherwise error code
67 */
68void RequireConnection(Interface* self);
69
70/// Initialize IR service 13/// Initialize IR service
71void Init(); 14void Init();
72 15
diff --git a/src/core/hle/service/ir/ir_rst.cpp b/src/core/hle/service/ir/ir_rst.cpp
index 1f10ebd3d..3f1275c53 100644
--- a/src/core/hle/service/ir/ir_rst.cpp
+++ b/src/core/hle/service/ir/ir_rst.cpp
@@ -2,12 +2,34 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "core/hle/kernel/event.h"
6#include "core/hle/kernel/shared_memory.h"
5#include "core/hle/service/ir/ir.h" 7#include "core/hle/service/ir/ir.h"
6#include "core/hle/service/ir/ir_rst.h" 8#include "core/hle/service/ir/ir_rst.h"
7 9
8namespace Service { 10namespace Service {
9namespace IR { 11namespace IR {
10 12
13static Kernel::SharedPtr<Kernel::Event> handle_event;
14static Kernel::SharedPtr<Kernel::SharedMemory> shared_memory;
15
16/**
17 * IR::GetHandles service function
18 * Outputs:
19 * 1 : Result of function, 0 on success, otherwise error code
20 * 2 : Translate header, used by the ARM11-kernel
21 * 3 : Shared memory handle
22 * 4 : Event handle
23 */
24static void GetHandles(Interface* self) {
25 u32* cmd_buff = Kernel::GetCommandBuffer();
26
27 cmd_buff[1] = RESULT_SUCCESS.raw;
28 cmd_buff[2] = 0x4000000;
29 cmd_buff[3] = Kernel::g_handle_table.Create(Service::IR::shared_memory).MoveFrom();
30 cmd_buff[4] = Kernel::g_handle_table.Create(Service::IR::handle_event).MoveFrom();
31}
32
11const Interface::FunctionInfo FunctionTable[] = { 33const Interface::FunctionInfo FunctionTable[] = {
12 {0x00010000, GetHandles, "GetHandles"}, 34 {0x00010000, GetHandles, "GetHandles"},
13 {0x00020080, nullptr, "Initialize"}, 35 {0x00020080, nullptr, "Initialize"},
@@ -19,5 +41,20 @@ IR_RST_Interface::IR_RST_Interface() {
19 Register(FunctionTable); 41 Register(FunctionTable);
20} 42}
21 43
44void InitRST() {
45 using namespace Kernel;
46
47 shared_memory =
48 SharedMemory::Create(nullptr, 0x1000, MemoryPermission::ReadWrite,
49 MemoryPermission::ReadWrite, 0, MemoryRegion::BASE, "IR:SharedMemory");
50
51 handle_event = Event::Create(ResetType::OneShot, "IR:HandleEvent");
52}
53
54void ShutdownRST() {
55 shared_memory = nullptr;
56 handle_event = nullptr;
57}
58
22} // namespace IR 59} // namespace IR
23} // namespace Service 60} // namespace Service
diff --git a/src/core/hle/service/ir/ir_rst.h b/src/core/hle/service/ir/ir_rst.h
index a492e15c9..75b732627 100644
--- a/src/core/hle/service/ir/ir_rst.h
+++ b/src/core/hle/service/ir/ir_rst.h
@@ -18,5 +18,8 @@ public:
18 } 18 }
19}; 19};
20 20
21void InitRST();
22void ShutdownRST();
23
21} // namespace IR 24} // namespace IR
22} // namespace Service 25} // namespace Service
diff --git a/src/core/hle/service/ir/ir_u.cpp b/src/core/hle/service/ir/ir_u.cpp
index 429615f31..ce00d5732 100644
--- a/src/core/hle/service/ir/ir_u.cpp
+++ b/src/core/hle/service/ir/ir_u.cpp
@@ -27,7 +27,7 @@ const Interface::FunctionInfo FunctionTable[] = {
27 {0x00100000, nullptr, "GetErrorStatus"}, 27 {0x00100000, nullptr, "GetErrorStatus"},
28 {0x00110040, nullptr, "SetSleepModeActive"}, 28 {0x00110040, nullptr, "SetSleepModeActive"},
29 {0x00120040, nullptr, "SetSleepModeState"}, 29 {0x00120040, nullptr, "SetSleepModeState"},
30 // clang-format off 30 // clang-format on
31}; 31};
32 32
33IR_U_Interface::IR_U_Interface() { 33IR_U_Interface::IR_U_Interface() {
diff --git a/src/core/hle/service/ir/ir_user.cpp b/src/core/hle/service/ir/ir_user.cpp
index 6cff1d544..b326d7fc7 100644
--- a/src/core/hle/service/ir/ir_user.cpp
+++ b/src/core/hle/service/ir/ir_user.cpp
@@ -2,12 +2,112 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "core/hle/kernel/event.h"
6#include "core/hle/kernel/shared_memory.h"
5#include "core/hle/service/ir/ir.h" 7#include "core/hle/service/ir/ir.h"
6#include "core/hle/service/ir/ir_user.h" 8#include "core/hle/service/ir/ir_user.h"
7 9
8namespace Service { 10namespace Service {
9namespace IR { 11namespace IR {
10 12
13static Kernel::SharedPtr<Kernel::Event> conn_status_event;
14static Kernel::SharedPtr<Kernel::SharedMemory> transfer_shared_memory;
15
16/**
17 * IR::InitializeIrNopShared service function
18 * Inputs:
19 * 1 : Size of transfer buffer
20 * 2 : Recv buffer size
21 * 3 : unknown
22 * 4 : Send buffer size
23 * 5 : unknown
24 * 6 : BaudRate (u8)
25 * 7 : 0
26 * 8 : Handle of transfer shared memory
27 * Outputs:
28 * 1 : Result of function, 0 on success, otherwise error code
29 */
30static void InitializeIrNopShared(Interface* self) {
31 u32* cmd_buff = Kernel::GetCommandBuffer();
32
33 u32 transfer_buff_size = cmd_buff[1];
34 u32 recv_buff_size = cmd_buff[2];
35 u32 unk1 = cmd_buff[3];
36 u32 send_buff_size = cmd_buff[4];
37 u32 unk2 = cmd_buff[5];
38 u8 baud_rate = cmd_buff[6] & 0xFF;
39 Kernel::Handle handle = cmd_buff[8];
40
41 if (Kernel::g_handle_table.IsValid(handle)) {
42 transfer_shared_memory = Kernel::g_handle_table.Get<Kernel::SharedMemory>(handle);
43 transfer_shared_memory->name = "IR:TransferSharedMemory";
44 }
45
46 cmd_buff[1] = RESULT_SUCCESS.raw;
47
48 LOG_WARNING(Service_IR, "(STUBBED) called, transfer_buff_size=%d, recv_buff_size=%d, "
49 "unk1=%d, send_buff_size=%d, unk2=%d, baud_rate=%u, handle=0x%08X",
50 transfer_buff_size, recv_buff_size, unk1, send_buff_size, unk2, baud_rate, handle);
51}
52
53/**
54 * IR::RequireConnection service function
55 * Inputs:
56 * 1 : unknown (u8), looks like always 1
57 * Outputs:
58 * 1 : Result of function, 0 on success, otherwise error code
59 */
60static void RequireConnection(Interface* self) {
61 u32* cmd_buff = Kernel::GetCommandBuffer();
62
63 conn_status_event->Signal();
64
65 cmd_buff[1] = RESULT_SUCCESS.raw;
66
67 LOG_WARNING(Service_IR, "(STUBBED) called");
68}
69
70/**
71 * IR::Disconnect service function
72 * Outputs:
73 * 1 : Result of function, 0 on success, otherwise error code
74 */
75static void Disconnect(Interface* self) {
76 u32* cmd_buff = Kernel::GetCommandBuffer();
77
78 cmd_buff[1] = RESULT_SUCCESS.raw;
79
80 LOG_WARNING(Service_IR, "(STUBBED) called");
81}
82
83/**
84 * IR::GetConnectionStatusEvent service function
85 * Outputs:
86 * 1 : Result of function, 0 on success, otherwise error code
87 * 2 : Connection Status Event handle
88 */
89static void GetConnectionStatusEvent(Interface* self) {
90 u32* cmd_buff = Kernel::GetCommandBuffer();
91
92 cmd_buff[1] = RESULT_SUCCESS.raw;
93 cmd_buff[3] = Kernel::g_handle_table.Create(Service::IR::conn_status_event).MoveFrom();
94
95 LOG_WARNING(Service_IR, "(STUBBED) called");
96}
97
98/**
99 * IR::FinalizeIrNop service function
100 * Outputs:
101 * 1 : Result of function, 0 on success, otherwise error code
102 */
103static void FinalizeIrNop(Interface* self) {
104 u32* cmd_buff = Kernel::GetCommandBuffer();
105
106 cmd_buff[1] = RESULT_SUCCESS.raw;
107
108 LOG_WARNING(Service_IR, "(STUBBED) called");
109}
110
11const Interface::FunctionInfo FunctionTable[] = { 111const Interface::FunctionInfo FunctionTable[] = {
12 {0x00010182, nullptr, "InitializeIrNop"}, 112 {0x00010182, nullptr, "InitializeIrNop"},
13 {0x00020000, FinalizeIrNop, "FinalizeIrNop"}, 113 {0x00020000, FinalizeIrNop, "FinalizeIrNop"},
@@ -41,5 +141,17 @@ IR_User_Interface::IR_User_Interface() {
41 Register(FunctionTable); 141 Register(FunctionTable);
42} 142}
43 143
144void InitUser() {
145 using namespace Kernel;
146
147 transfer_shared_memory = nullptr;
148 conn_status_event = Event::Create(ResetType::OneShot, "IR:ConnectionStatusEvent");
149}
150
151void ShutdownUser() {
152 transfer_shared_memory = nullptr;
153 conn_status_event = nullptr;
154}
155
44} // namespace IR 156} // namespace IR
45} // namespace Service 157} // namespace Service
diff --git a/src/core/hle/service/ir/ir_user.h b/src/core/hle/service/ir/ir_user.h
index 71c932ffa..3849bd923 100644
--- a/src/core/hle/service/ir/ir_user.h
+++ b/src/core/hle/service/ir/ir_user.h
@@ -18,5 +18,8 @@ public:
18 } 18 }
19}; 19};
20 20
21void InitUser();
22void ShutdownUser();
23
21} // namespace IR 24} // namespace IR
22} // namespace Service 25} // namespace Service
diff --git a/src/core/hle/service/ldr_ro/cro_helper.h b/src/core/hle/service/ldr_ro/cro_helper.h
index 060d5a55f..3bc10dbdc 100644
--- a/src/core/hle/service/ldr_ro/cro_helper.h
+++ b/src/core/hle/service/ldr_ro/cro_helper.h
@@ -57,7 +57,7 @@ public:
57 * @param is_crs true if the module itself is the static module 57 * @param is_crs true if the module itself is the static module
58 * @returns ResultCode RESULT_SUCCESS on success, otherwise error code. 58 * @returns ResultCode RESULT_SUCCESS on success, otherwise error code.
59 */ 59 */
60 ResultCode Rebase(VAddr crs_address, u32 cro_size, VAddr data_segment_addresss, 60 ResultCode Rebase(VAddr crs_address, u32 cro_size, VAddr data_segment_address,
61 u32 data_segment_size, VAddr bss_segment_address, u32 bss_segment_size, 61 u32 data_segment_size, VAddr bss_segment_address, u32 bss_segment_size,
62 bool is_crs); 62 bool is_crs);
63 63
@@ -102,7 +102,7 @@ public:
102 /** 102 /**
103 * Registers this module and adds it to the module list. 103 * Registers this module and adds it to the module list.
104 * @param crs_address the virtual address of the static module 104 * @param crs_address the virtual address of the static module
105 * @auto_link whether to register as an auto link module 105 * @param auto_link whether to register as an auto link module
106 */ 106 */
107 void Register(VAddr crs_address, bool auto_link); 107 void Register(VAddr crs_address, bool auto_link);
108 108
diff --git a/src/core/hle/service/ldr_ro/ldr_ro.cpp b/src/core/hle/service/ldr_ro/ldr_ro.cpp
index 8d00a7577..7af76676b 100644
--- a/src/core/hle/service/ldr_ro/ldr_ro.cpp
+++ b/src/core/hle/service/ldr_ro/ldr_ro.cpp
@@ -6,6 +6,7 @@
6#include "common/common_types.h" 6#include "common/common_types.h"
7#include "common/logging/log.h" 7#include "common/logging/log.h"
8#include "core/arm/arm_interface.h" 8#include "core/arm/arm_interface.h"
9#include "core/core.h"
9#include "core/hle/kernel/process.h" 10#include "core/hle/kernel/process.h"
10#include "core/hle/kernel/vm_manager.h" 11#include "core/hle/kernel/vm_manager.h"
11#include "core/hle/service/ldr_ro/cro_helper.h" 12#include "core/hle/service/ldr_ro/cro_helper.h"
diff --git a/src/core/hle/service/mic_u.cpp b/src/core/hle/service/mic_u.cpp
index c62f8afc6..e98388560 100644
--- a/src/core/hle/service/mic_u.cpp
+++ b/src/core/hle/service/mic_u.cpp
@@ -93,7 +93,7 @@ static void StartSampling(Interface* self) {
93 sample_rate = static_cast<SampleRate>(cmd_buff[2] & 0xFF); 93 sample_rate = static_cast<SampleRate>(cmd_buff[2] & 0xFF);
94 audio_buffer_offset = cmd_buff[3]; 94 audio_buffer_offset = cmd_buff[3];
95 audio_buffer_size = cmd_buff[4]; 95 audio_buffer_size = cmd_buff[4];
96 audio_buffer_loop = static_cast<bool>(cmd_buff[5] & 0xFF); 96 audio_buffer_loop = (cmd_buff[5] & 0xFF) != 0;
97 97
98 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 98 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
99 is_sampling = true; 99 is_sampling = true;
@@ -202,7 +202,7 @@ static void GetGain(Interface* self) {
202 */ 202 */
203static void SetPower(Interface* self) { 203static void SetPower(Interface* self) {
204 u32* cmd_buff = Kernel::GetCommandBuffer(); 204 u32* cmd_buff = Kernel::GetCommandBuffer();
205 mic_power = static_cast<bool>(cmd_buff[1] & 0xFF); 205 mic_power = (cmd_buff[1] & 0xFF) != 0;
206 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 206 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
207 LOG_WARNING(Service_MIC, "(STUBBED) called, mic_power=%u", mic_power); 207 LOG_WARNING(Service_MIC, "(STUBBED) called, mic_power=%u", mic_power);
208} 208}
@@ -252,7 +252,7 @@ static void SetIirFilterMic(Interface* self) {
252 */ 252 */
253static void SetClamp(Interface* self) { 253static void SetClamp(Interface* self) {
254 u32* cmd_buff = Kernel::GetCommandBuffer(); 254 u32* cmd_buff = Kernel::GetCommandBuffer();
255 clamp = static_cast<bool>(cmd_buff[1] & 0xFF); 255 clamp = (cmd_buff[1] & 0xFF) != 0;
256 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 256 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
257 LOG_WARNING(Service_MIC, "(STUBBED) called, clamp=%u", clamp); 257 LOG_WARNING(Service_MIC, "(STUBBED) called, clamp=%u", clamp);
258} 258}
@@ -282,7 +282,7 @@ static void GetClamp(Interface* self) {
282 */ 282 */
283static void SetAllowShellClosed(Interface* self) { 283static void SetAllowShellClosed(Interface* self) {
284 u32* cmd_buff = Kernel::GetCommandBuffer(); 284 u32* cmd_buff = Kernel::GetCommandBuffer();
285 allow_shell_closed = static_cast<bool>(cmd_buff[1] & 0xFF); 285 allow_shell_closed = (cmd_buff[1] & 0xFF) != 0;
286 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 286 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
287 LOG_WARNING(Service_MIC, "(STUBBED) called, allow_shell_closed=%u", allow_shell_closed); 287 LOG_WARNING(Service_MIC, "(STUBBED) called, allow_shell_closed=%u", allow_shell_closed);
288} 288}
diff --git a/src/core/hle/service/nfc/nfc.cpp b/src/core/hle/service/nfc/nfc.cpp
index e248285f9..fd3c7d9c2 100644
--- a/src/core/hle/service/nfc/nfc.cpp
+++ b/src/core/hle/service/nfc/nfc.cpp
@@ -11,6 +11,81 @@ namespace Service {
11namespace NFC { 11namespace NFC {
12 12
13static Kernel::SharedPtr<Kernel::Event> tag_in_range_event; 13static Kernel::SharedPtr<Kernel::Event> tag_in_range_event;
14static Kernel::SharedPtr<Kernel::Event> tag_out_of_range_event;
15static TagState nfc_tag_state = TagState::NotInitialized;
16static CommunicationStatus nfc_status = CommunicationStatus::NfcInitialized;
17
18void Initialize(Interface* self) {
19 u32* cmd_buff = Kernel::GetCommandBuffer();
20
21 u8 param = static_cast<u8>(cmd_buff[1] & 0xFF);
22
23 nfc_tag_state = TagState::NotScanning;
24
25 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
26 LOG_WARNING(Service_NFC, "(STUBBED) called, param=%u", param);
27}
28
29void Shutdown(Interface* self) {
30 u32* cmd_buff = Kernel::GetCommandBuffer();
31
32 u8 param = static_cast<u8>(cmd_buff[1] & 0xFF);
33 nfc_tag_state = TagState::NotInitialized;
34
35 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
36 LOG_WARNING(Service_NFC, "(STUBBED) called, param=%u", param);
37}
38
39void StartCommunication(Interface* self) {
40 u32* cmd_buff = Kernel::GetCommandBuffer();
41
42 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
43 LOG_WARNING(Service_NFC, "(STUBBED) called");
44}
45
46void StopCommunication(Interface* self) {
47 u32* cmd_buff = Kernel::GetCommandBuffer();
48
49 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
50 LOG_WARNING(Service_NFC, "(STUBBED) called");
51}
52
53void StartTagScanning(Interface* self) {
54 u32* cmd_buff = Kernel::GetCommandBuffer();
55
56 nfc_tag_state = TagState::TagInRange;
57 tag_in_range_event->Signal();
58
59 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
60 LOG_WARNING(Service_NFC, "(STUBBED) called");
61}
62
63void StopTagScanning(Interface* self) {
64 u32* cmd_buff = Kernel::GetCommandBuffer();
65
66 nfc_tag_state = TagState::NotScanning;
67
68 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
69 LOG_WARNING(Service_NFC, "(STUBBED) called");
70}
71
72void LoadAmiiboData(Interface* self) {
73 u32* cmd_buff = Kernel::GetCommandBuffer();
74
75 nfc_tag_state = TagState::TagDataLoaded;
76
77 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
78 LOG_WARNING(Service_NFC, "(STUBBED) called");
79}
80
81void ResetTagScanState(Interface* self) {
82 u32* cmd_buff = Kernel::GetCommandBuffer();
83
84 nfc_tag_state = TagState::NotScanning;
85
86 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
87 LOG_WARNING(Service_NFC, "(STUBBED) called");
88}
14 89
15void GetTagInRangeEvent(Interface* self) { 90void GetTagInRangeEvent(Interface* self) {
16 u32* cmd_buff = Kernel::GetCommandBuffer(); 91 u32* cmd_buff = Kernel::GetCommandBuffer();
@@ -22,16 +97,46 @@ void GetTagInRangeEvent(Interface* self) {
22 LOG_WARNING(Service_NFC, "(STUBBED) called"); 97 LOG_WARNING(Service_NFC, "(STUBBED) called");
23} 98}
24 99
100void GetTagOutOfRangeEvent(Interface* self) {
101 u32* cmd_buff = Kernel::GetCommandBuffer();
102
103 cmd_buff[0] = IPC::MakeHeader(0xC, 1, 2);
104 cmd_buff[1] = RESULT_SUCCESS.raw;
105 cmd_buff[2] = IPC::CopyHandleDesc();
106 cmd_buff[3] = Kernel::g_handle_table.Create(tag_out_of_range_event).MoveFrom();
107 LOG_WARNING(Service_NFC, "(STUBBED) called");
108}
109
110void GetTagState(Interface* self) {
111 u32* cmd_buff = Kernel::GetCommandBuffer();
112
113 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
114 cmd_buff[2] = static_cast<u8>(nfc_tag_state);
115 LOG_DEBUG(Service_NFC, "(STUBBED) called");
116}
117
118void CommunicationGetStatus(Interface* self) {
119 u32* cmd_buff = Kernel::GetCommandBuffer();
120
121 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
122 cmd_buff[2] = static_cast<u8>(nfc_status);
123 LOG_DEBUG(Service_NFC, "(STUBBED) called");
124}
125
25void Init() { 126void Init() {
26 AddService(new NFC_M()); 127 AddService(new NFC_M());
27 AddService(new NFC_U()); 128 AddService(new NFC_U());
28 129
29 tag_in_range_event = 130 tag_in_range_event =
30 Kernel::Event::Create(Kernel::ResetType::OneShot, "NFC::tag_in_range_event"); 131 Kernel::Event::Create(Kernel::ResetType::OneShot, "NFC::tag_in_range_event");
132 tag_out_of_range_event =
133 Kernel::Event::Create(Kernel::ResetType::OneShot, "NFC::tag_out_range_event");
134 nfc_tag_state = TagState::NotInitialized;
31} 135}
32 136
33void Shutdown() { 137void Shutdown() {
34 tag_in_range_event = nullptr; 138 tag_in_range_event = nullptr;
139 tag_out_of_range_event = nullptr;
35} 140}
36 141
37} // namespace NFC 142} // namespace NFC
diff --git a/src/core/hle/service/nfc/nfc.h b/src/core/hle/service/nfc/nfc.h
index b02354201..a013bdae7 100644
--- a/src/core/hle/service/nfc/nfc.h
+++ b/src/core/hle/service/nfc/nfc.h
@@ -4,12 +4,103 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "common/common_types.h"
8
7namespace Service { 9namespace Service {
8 10
9class Interface; 11class Interface;
10 12
11namespace NFC { 13namespace NFC {
12 14
15enum class TagState : u8 {
16 NotInitialized = 0,
17 NotScanning = 1,
18 Scanning = 2,
19 TagInRange = 3,
20 TagOutOfRange = 4,
21 TagDataLoaded = 5,
22};
23
24enum class CommunicationStatus : u8 {
25 AttemptInitialize = 1,
26 NfcInitialized = 2,
27};
28
29/**
30 * NFC::Initialize service function
31 * Inputs:
32 * 0 : Header code [0x00010040]
33 * 1 : (u8) unknown parameter. Can be either value 0x1 or 0x2
34 * Outputs:
35 * 1 : Result of function, 0 on success, otherwise error code
36 */
37void Initialize(Interface* self);
38
39/**
40 * NFC::Shutdown service function
41 * Inputs:
42 * 0 : Header code [0x00020040]
43 * 1 : (u8) unknown parameter
44 * Outputs:
45 * 1 : Result of function, 0 on success, otherwise error code
46 */
47void Shutdown(Interface* self);
48
49/**
50 * NFC::StartCommunication service function
51 * Inputs:
52 * 0 : Header code [0x00030000]
53 * Outputs:
54 * 1 : Result of function, 0 on success, otherwise error code
55 */
56void StartCommunication(Interface* self);
57
58/**
59 * NFC::StopCommunication service function
60 * Inputs:
61 * 0 : Header code [0x00040000]
62 * Outputs:
63 * 1 : Result of function, 0 on success, otherwise error code
64 */
65void StopCommunication(Interface* self);
66
67/**
68 * NFC::StartTagScanning service function
69 * Inputs:
70 * 0 : Header code [0x00050040]
71 * 1 : (u16) unknown. This is normally 0x0
72 * Outputs:
73 * 1 : Result of function, 0 on success, otherwise error code
74 */
75void StartTagScanning(Interface* self);
76
77/**
78 * NFC::StopTagScanning service function
79 * Inputs:
80 * 0 : Header code [0x00060000]
81 * Outputs:
82 * 1 : Result of function, 0 on success, otherwise error code
83 */
84void StopTagScanning(Interface* self);
85
86/**
87 * NFC::LoadAmiiboData service function
88 * Inputs:
89 * 0 : Header code [0x00070000]
90 * Outputs:
91 * 1 : Result of function, 0 on success, otherwise error code
92 */
93void LoadAmiiboData(Interface* self);
94
95/**
96 * NFC::ResetTagScanState service function
97 * Inputs:
98 * 0 : Header code [0x00080000]
99 * Outputs:
100 * 1 : Result of function, 0 on success, otherwise error code
101 */
102void ResetTagScanState(Interface* self);
103
13/** 104/**
14 * NFC::GetTagInRangeEvent service function 105 * NFC::GetTagInRangeEvent service function
15 * Inputs: 106 * Inputs:
@@ -21,6 +112,37 @@ namespace NFC {
21 */ 112 */
22void GetTagInRangeEvent(Interface* self); 113void GetTagInRangeEvent(Interface* self);
23 114
115/**
116 * NFC::GetTagOutOfRangeEvent service function
117 * Inputs:
118 * 0 : Header code [0x000C0000]
119 * Outputs:
120 * 1 : Result of function, 0 on success, otherwise error code
121 * 2 : Copy handle descriptor
122 * 3 : Event Handle
123 */
124void GetTagOutOfRangeEvent(Interface* self);
125
126/**
127 * NFC::GetTagState service function
128 * Inputs:
129 * 0 : Header code [0x000D0000]
130 * Outputs:
131 * 1 : Result of function, 0 on success, otherwise error code
132 * 2 : (u8) Tag state
133 */
134void GetTagState(Interface* self);
135
136/**
137 * NFC::CommunicationGetStatus service function
138 * Inputs:
139 * 0 : Header code [0x000F0000]
140 * Outputs:
141 * 1 : Result of function, 0 on success, otherwise error code
142 * 2 : (u8) Communication state
143 */
144void CommunicationGetStatus(Interface* self);
145
24/// Initialize all NFC services. 146/// Initialize all NFC services.
25void Init(); 147void Init();
26 148
diff --git a/src/core/hle/service/nfc/nfc_m.cpp b/src/core/hle/service/nfc/nfc_m.cpp
index f43b4029a..ebe637650 100644
--- a/src/core/hle/service/nfc/nfc_m.cpp
+++ b/src/core/hle/service/nfc/nfc_m.cpp
@@ -11,18 +11,19 @@ namespace NFC {
11const Interface::FunctionInfo FunctionTable[] = { 11const Interface::FunctionInfo FunctionTable[] = {
12 // clang-format off 12 // clang-format off
13 // nfc:u shared commands 13 // nfc:u shared commands
14 {0x00010040, nullptr, "Initialize"}, 14 {0x00010040, Initialize, "Initialize"},
15 {0x00020040, nullptr, "Shutdown"}, 15 {0x00020040, Shutdown, "Shutdown"},
16 {0x00030000, nullptr, "StartCommunication"}, 16 {0x00030000, StartCommunication, "StartCommunication"},
17 {0x00040000, nullptr, "StopCommunication"}, 17 {0x00040000, StopCommunication, "StopCommunication"},
18 {0x00050040, nullptr, "StartTagScanning"}, 18 {0x00050040, StartTagScanning, "StartTagScanning"},
19 {0x00060000, nullptr, "StopTagScanning"}, 19 {0x00060000, StopTagScanning, "StopTagScanning"},
20 {0x00070000, nullptr, "LoadAmiiboData"}, 20 {0x00070000, LoadAmiiboData, "LoadAmiiboData"},
21 {0x00080000, nullptr, "ResetTagScanState"}, 21 {0x00080000, ResetTagScanState, "ResetTagScanState"},
22 {0x00090002, nullptr, "UpdateStoredAmiiboData"}, 22 {0x00090002, nullptr, "UpdateStoredAmiiboData"},
23 {0x000B0000, GetTagInRangeEvent, "GetTagInRangeEvent"}, 23 {0x000B0000, GetTagInRangeEvent, "GetTagInRangeEvent"},
24 {0x000D0000, nullptr, "GetTagState"}, 24 {0x000C0000, GetTagOutOfRangeEvent, "GetTagOutOfRangeEvent"},
25 {0x000F0000, nullptr, "CommunicationGetStatus"}, 25 {0x000D0000, GetTagState, "GetTagState"},
26 {0x000F0000, CommunicationGetStatus, "CommunicationGetStatus"},
26 {0x00100000, nullptr, "GetTagInfo2"}, 27 {0x00100000, nullptr, "GetTagInfo2"},
27 {0x00110000, nullptr, "GetTagInfo"}, 28 {0x00110000, nullptr, "GetTagInfo"},
28 {0x00120000, nullptr, "CommunicationGetResult"}, 29 {0x00120000, nullptr, "CommunicationGetResult"},
diff --git a/src/core/hle/service/nfc/nfc_u.cpp b/src/core/hle/service/nfc/nfc_u.cpp
index 4b5200ae8..5a40c7874 100644
--- a/src/core/hle/service/nfc/nfc_u.cpp
+++ b/src/core/hle/service/nfc/nfc_u.cpp
@@ -10,18 +10,19 @@ namespace NFC {
10 10
11const Interface::FunctionInfo FunctionTable[] = { 11const Interface::FunctionInfo FunctionTable[] = {
12 // clang-format off 12 // clang-format off
13 {0x00010040, nullptr, "Initialize"}, 13 {0x00010040, Initialize, "Initialize"},
14 {0x00020040, nullptr, "Shutdown"}, 14 {0x00020040, Shutdown, "Shutdown"},
15 {0x00030000, nullptr, "StartCommunication"}, 15 {0x00030000, StartCommunication, "StartCommunication"},
16 {0x00040000, nullptr, "StopCommunication"}, 16 {0x00040000, StopCommunication, "StopCommunication"},
17 {0x00050040, nullptr, "StartTagScanning"}, 17 {0x00050040, StartTagScanning, "StartTagScanning"},
18 {0x00060000, nullptr, "StopTagScanning"}, 18 {0x00060000, StopTagScanning, "StopTagScanning"},
19 {0x00070000, nullptr, "LoadAmiiboData"}, 19 {0x00070000, LoadAmiiboData, "LoadAmiiboData"},
20 {0x00080000, nullptr, "ResetTagScanState"}, 20 {0x00080000, ResetTagScanState, "ResetTagScanState"},
21 {0x00090002, nullptr, "UpdateStoredAmiiboData"}, 21 {0x00090002, nullptr, "UpdateStoredAmiiboData"},
22 {0x000B0000, GetTagInRangeEvent, "GetTagInRangeEvent"}, 22 {0x000B0000, GetTagInRangeEvent, "GetTagInRangeEvent"},
23 {0x000D0000, nullptr, "GetTagState"}, 23 {0x000C0000, GetTagOutOfRangeEvent, "GetTagOutOfRangeEvent"},
24 {0x000F0000, nullptr, "CommunicationGetStatus"}, 24 {0x000D0000, GetTagState, "GetTagState"},
25 {0x000F0000, CommunicationGetStatus, "CommunicationGetStatus"},
25 {0x00100000, nullptr, "GetTagInfo2"}, 26 {0x00100000, nullptr, "GetTagInfo2"},
26 {0x00110000, nullptr, "GetTagInfo"}, 27 {0x00110000, nullptr, "GetTagInfo"},
27 {0x00120000, nullptr, "CommunicationGetResult"}, 28 {0x00120000, nullptr, "CommunicationGetResult"},
diff --git a/src/core/hle/service/nim/nim.cpp b/src/core/hle/service/nim/nim.cpp
index 0be94322c..63c334cb2 100644
--- a/src/core/hle/service/nim/nim.cpp
+++ b/src/core/hle/service/nim/nim.cpp
@@ -19,7 +19,7 @@ void CheckSysUpdateAvailable(Service::Interface* self) {
19 cmd_buff[1] = RESULT_SUCCESS.raw; 19 cmd_buff[1] = RESULT_SUCCESS.raw;
20 cmd_buff[2] = 0; // No update available 20 cmd_buff[2] = 0; // No update available
21 21
22 LOG_WARNING(Service_NWM, "(STUBBED) called"); 22 LOG_WARNING(Service_NIM, "(STUBBED) called");
23} 23}
24 24
25void Init() { 25void Init() {
diff --git a/src/core/hle/service/service.h b/src/core/hle/service/service.h
index a7ba7688f..e6a5f1417 100644
--- a/src/core/hle/service/service.h
+++ b/src/core/hle/service/service.h
@@ -10,6 +10,7 @@
10#include <boost/container/flat_map.hpp> 10#include <boost/container/flat_map.hpp>
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "core/hle/ipc.h" 12#include "core/hle/ipc.h"
13#include "core/hle/ipc_helpers.h"
13#include "core/hle/kernel/client_port.h" 14#include "core/hle/kernel/client_port.h"
14#include "core/hle/kernel/thread.h" 15#include "core/hle/kernel/thread.h"
15#include "core/hle/result.h" 16#include "core/hle/result.h"
diff --git a/src/core/hle/service/y2r_u.cpp b/src/core/hle/service/y2r_u.cpp
index 31bb466fc..907d9c8fa 100644
--- a/src/core/hle/service/y2r_u.cpp
+++ b/src/core/hle/service/y2r_u.cpp
@@ -281,37 +281,39 @@ static void GetTransferEndEvent(Interface* self) {
281} 281}
282 282
283static void SetSendingY(Interface* self) { 283static void SetSendingY(Interface* self) {
284 u32* cmd_buff = Kernel::GetCommandBuffer(); 284 // The helper should be passed by argument to the function
285 285 IPC::RequestParser rp(Kernel::GetCommandBuffer(), 0x00100102);
286 conversion.src_Y.address = cmd_buff[1]; 286 conversion.src_Y.address = rp.Pop<u32>();
287 conversion.src_Y.image_size = cmd_buff[2]; 287 conversion.src_Y.image_size = rp.Pop<u32>();
288 conversion.src_Y.transfer_unit = cmd_buff[3]; 288 conversion.src_Y.transfer_unit = rp.Pop<u32>();
289 conversion.src_Y.gap = cmd_buff[4]; 289 conversion.src_Y.gap = rp.Pop<u32>();
290 Kernel::Handle src_process_handle = rp.PopHandle();
290 291
291 cmd_buff[0] = IPC::MakeHeader(0x10, 1, 0); 292 IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
292 cmd_buff[1] = RESULT_SUCCESS.raw; 293 rb.Push(RESULT_SUCCESS);
293 294
294 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, " 295 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
295 "src_process_handle=0x%08X", 296 "src_process_handle=0x%08X",
296 conversion.src_Y.image_size, conversion.src_Y.transfer_unit, conversion.src_Y.gap, 297 conversion.src_Y.image_size, conversion.src_Y.transfer_unit, conversion.src_Y.gap,
297 cmd_buff[6]); 298 src_process_handle);
298} 299}
299 300
300static void SetSendingU(Interface* self) { 301static void SetSendingU(Interface* self) {
301 u32* cmd_buff = Kernel::GetCommandBuffer(); 302 // The helper should be passed by argument to the function
303 IPC::RequestParser rp(Kernel::GetCommandBuffer(), 0x00110102);
304 conversion.src_U.address = rp.Pop<u32>();
305 conversion.src_U.image_size = rp.Pop<u32>();
306 conversion.src_U.transfer_unit = rp.Pop<u32>();
307 conversion.src_U.gap = rp.Pop<u32>();
308 Kernel::Handle src_process_handle = rp.PopHandle();
302 309
303 conversion.src_U.address = cmd_buff[1]; 310 IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
304 conversion.src_U.image_size = cmd_buff[2]; 311 rb.Push(RESULT_SUCCESS);
305 conversion.src_U.transfer_unit = cmd_buff[3];
306 conversion.src_U.gap = cmd_buff[4];
307
308 cmd_buff[0] = IPC::MakeHeader(0x11, 1, 0);
309 cmd_buff[1] = RESULT_SUCCESS.raw;
310 312
311 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, " 313 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
312 "src_process_handle=0x%08X", 314 "src_process_handle=0x%08X",
313 conversion.src_U.image_size, conversion.src_U.transfer_unit, conversion.src_U.gap, 315 conversion.src_U.image_size, conversion.src_U.transfer_unit, conversion.src_U.gap,
314 cmd_buff[6]); 316 src_process_handle);
315} 317}
316 318
317static void SetSendingV(Interface* self) { 319static void SetSendingV(Interface* self) {
@@ -561,11 +563,10 @@ static void GetAlpha(Interface* self) {
561} 563}
562 564
563static void SetDitheringWeightParams(Interface* self) { 565static void SetDitheringWeightParams(Interface* self) {
564 u32* cmd_buff = Kernel::GetCommandBuffer(); 566 IPC::RequestParser rp(Kernel::GetCommandBuffer(), 0x24, 8, 0); // 0x240200
565 std::memcpy(&dithering_weight_params, &cmd_buff[1], sizeof(DitheringWeightParams)); 567 rp.PopRaw(dithering_weight_params);
566 568 IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
567 cmd_buff[0] = IPC::MakeHeader(0x24, 1, 0); 569 rb.Push(RESULT_SUCCESS);
568 cmd_buff[1] = RESULT_SUCCESS.raw;
569 570
570 LOG_DEBUG(Service_Y2R, "called"); 571 LOG_DEBUG(Service_Y2R, "called");
571} 572}
diff --git a/src/core/hle/svc.cpp b/src/core/hle/svc.cpp
index 96db39ad9..4e0c3fb8b 100644
--- a/src/core/hle/svc.cpp
+++ b/src/core/hle/svc.cpp
@@ -556,11 +556,21 @@ static ResultCode CreateThread(Kernel::Handle* out_handle, s32 priority, u32 ent
556 break; 556 break;
557 } 557 }
558 558
559 if (processor_id == THREADPROCESSORID_1 || processor_id == THREADPROCESSORID_ALL || 559 if (processor_id == THREADPROCESSORID_ALL) {
560 (processor_id == THREADPROCESSORID_DEFAULT && 560 LOG_INFO(Kernel_SVC,
561 Kernel::g_current_process->ideal_processor == THREADPROCESSORID_1)) { 561 "Newly created thread is allowed to be run in any Core, unimplemented.");
562 LOG_WARNING(Kernel_SVC, 562 }
563 "Newly created thread is allowed to be run in the SysCore, unimplemented."); 563
564 if (processor_id == THREADPROCESSORID_DEFAULT &&
565 Kernel::g_current_process->ideal_processor == THREADPROCESSORID_1) {
566 LOG_WARNING(
567 Kernel_SVC,
568 "Newly created thread is allowed to be run in the SysCore (Core1), unimplemented.");
569 }
570
571 if (processor_id == THREADPROCESSORID_1) {
572 LOG_ERROR(Kernel_SVC,
573 "Newly created thread must run in the SysCore (Core1), unimplemented.");
564 } 574 }
565 575
566 CASCADE_RESULT(SharedPtr<Thread> thread, Kernel::Thread::Create(name, entry_point, priority, 576 CASCADE_RESULT(SharedPtr<Thread> thread, Kernel::Thread::Create(name, entry_point, priority,
@@ -837,6 +847,11 @@ static ResultCode SetTimer(Kernel::Handle handle, s64 initial, s64 interval) {
837 847
838 LOG_TRACE(Kernel_SVC, "called timer=0x%08X", handle); 848 LOG_TRACE(Kernel_SVC, "called timer=0x%08X", handle);
839 849
850 if (initial < 0 || interval < 0) {
851 return ResultCode(ErrorDescription::OutOfRange, ErrorModule::Kernel,
852 ErrorSummary::InvalidArgument, ErrorLevel::Permanent);
853 }
854
840 SharedPtr<Timer> timer = Kernel::g_handle_table.Get<Timer>(handle); 855 SharedPtr<Timer> timer = Kernel::g_handle_table.Get<Timer>(handle);
841 if (timer == nullptr) 856 if (timer == nullptr)
842 return ERR_INVALID_HANDLE; 857 return ERR_INVALID_HANDLE;
diff --git a/src/core/hw/aes/arithmetic128.cpp b/src/core/hw/aes/arithmetic128.cpp
new file mode 100644
index 000000000..55b954a52
--- /dev/null
+++ b/src/core/hw/aes/arithmetic128.cpp
@@ -0,0 +1,47 @@
1// Copyright 2017 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <functional>
7#include "core/hw/aes/arithmetic128.h"
8
9namespace HW {
10namespace AES {
11
12AESKey Lrot128(const AESKey& in, u32 rot) {
13 AESKey out;
14 rot %= 128;
15 const u32 byte_shift = rot / 8;
16 const u32 bit_shift = rot % 8;
17
18 for (u32 i = 0; i < 16; i++) {
19 const u32 wrap_index_a = (i + byte_shift) % 16;
20 const u32 wrap_index_b = (i + byte_shift + 1) % 16;
21 out[i] = ((in[wrap_index_a] << bit_shift) | (in[wrap_index_b] >> (8 - bit_shift))) & 0xFF;
22 }
23 return out;
24}
25
26AESKey Add128(const AESKey& a, const AESKey& b) {
27 AESKey out;
28 u32 carry = 0;
29 u32 sum = 0;
30
31 for (int i = 15; i >= 0; i--) {
32 sum = a[i] + b[i] + carry;
33 carry = sum >> 8;
34 out[i] = static_cast<u8>(sum & 0xff);
35 }
36
37 return out;
38}
39
40AESKey Xor128(const AESKey& a, const AESKey& b) {
41 AESKey out;
42 std::transform(a.cbegin(), a.cend(), b.cbegin(), out.begin(), std::bit_xor<>());
43 return out;
44}
45
46} // namespace AES
47} // namespace HW
diff --git a/src/core/hw/aes/arithmetic128.h b/src/core/hw/aes/arithmetic128.h
new file mode 100644
index 000000000..d670e2ce2
--- /dev/null
+++ b/src/core/hw/aes/arithmetic128.h
@@ -0,0 +1,17 @@
1// Copyright 2017 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "core/hw/aes/key.h"
9
10namespace HW {
11namespace AES {
12AESKey Lrot128(const AESKey& in, u32 rot);
13AESKey Add128(const AESKey& a, const AESKey& b);
14AESKey Xor128(const AESKey& a, const AESKey& b);
15
16} // namspace AES
17} // namespace HW
diff --git a/src/core/hw/aes/ccm.cpp b/src/core/hw/aes/ccm.cpp
new file mode 100644
index 000000000..dc7035ab6
--- /dev/null
+++ b/src/core/hw/aes/ccm.cpp
@@ -0,0 +1,95 @@
1// Copyright 2017 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <cryptopp/aes.h>
7#include <cryptopp/ccm.h>
8#include <cryptopp/cryptlib.h>
9#include <cryptopp/filters.h>
10#include "common/alignment.h"
11#include "common/logging/log.h"
12#include "core/hw/aes/ccm.h"
13#include "core/hw/aes/key.h"
14
15namespace HW {
16namespace AES {
17
18namespace {
19
20// 3DS uses a non-standard AES-CCM algorithm, so we need to derive a sub class from the standard one
21// and override with the non-standard part.
22using CryptoPP::lword;
23using CryptoPP::AES;
24using CryptoPP::CCM_Final;
25using CryptoPP::CCM_Base;
26template <bool T_IsEncryption>
27class CCM_3DSVariant_Final : public CCM_Final<AES, CCM_MAC_SIZE, T_IsEncryption> {
28public:
29 void UncheckedSpecifyDataLengths(lword header_length, lword message_length,
30 lword footer_length) override {
31 // 3DS uses the aligned size to generate B0 for authentication, instead of the original size
32 lword aligned_message_length = Common::AlignUp(message_length, AES_BLOCK_SIZE);
33 CCM_Base::UncheckedSpecifyDataLengths(header_length, aligned_message_length, footer_length);
34 CCM_Base::m_messageLength = message_length; // restore the actual message size
35 }
36};
37
38class CCM_3DSVariant {
39public:
40 using Encryption = CCM_3DSVariant_Final<true>;
41 using Decryption = CCM_3DSVariant_Final<false>;
42};
43
44} // namespace
45
46std::vector<u8> EncryptSignCCM(const std::vector<u8>& pdata, const CCMNonce& nonce,
47 size_t slot_id) {
48 if (!IsNormalKeyAvailable(slot_id)) {
49 LOG_ERROR(HW_AES, "Key slot %d not available. Will use zero key.", slot_id);
50 }
51 const AESKey normal = GetNormalKey(slot_id);
52 std::vector<u8> cipher(pdata.size() + CCM_MAC_SIZE);
53
54 try {
55 CCM_3DSVariant::Encryption e;
56 e.SetKeyWithIV(normal.data(), AES_BLOCK_SIZE, nonce.data(), CCM_NONCE_SIZE);
57 e.SpecifyDataLengths(0, pdata.size(), 0);
58 CryptoPP::ArraySource as(pdata.data(), pdata.size(), true,
59 new CryptoPP::AuthenticatedEncryptionFilter(
60 e, new CryptoPP::ArraySink(cipher.data(), cipher.size())));
61 } catch (const CryptoPP::Exception& e) {
62 LOG_ERROR(HW_AES, "FAILED with: %s", e.what());
63 }
64 return cipher;
65}
66
67std::vector<u8> DecryptVerifyCCM(const std::vector<u8>& cipher, const CCMNonce& nonce,
68 size_t slot_id) {
69 if (!IsNormalKeyAvailable(slot_id)) {
70 LOG_ERROR(HW_AES, "Key slot %d not available. Will use zero key.", slot_id);
71 }
72 const AESKey normal = GetNormalKey(slot_id);
73 const std::size_t pdata_size = cipher.size() - CCM_MAC_SIZE;
74 std::vector<u8> pdata(pdata_size);
75
76 try {
77 CCM_3DSVariant::Decryption d;
78 d.SetKeyWithIV(normal.data(), AES_BLOCK_SIZE, nonce.data(), CCM_NONCE_SIZE);
79 d.SpecifyDataLengths(0, pdata_size, 0);
80 CryptoPP::AuthenticatedDecryptionFilter df(
81 d, new CryptoPP::ArraySink(pdata.data(), pdata_size));
82 CryptoPP::ArraySource as(cipher.data(), cipher.size(), true, new CryptoPP::Redirector(df));
83 if (!df.GetLastResult()) {
84 LOG_ERROR(HW_AES, "FAILED");
85 return {};
86 }
87 } catch (const CryptoPP::Exception& e) {
88 LOG_ERROR(HW_AES, "FAILED with: %s", e.what());
89 return {};
90 }
91 return pdata;
92}
93
94} // namespace AES
95} // namespace HW
diff --git a/src/core/hw/aes/ccm.h b/src/core/hw/aes/ccm.h
new file mode 100644
index 000000000..bf4146e80
--- /dev/null
+++ b/src/core/hw/aes/ccm.h
@@ -0,0 +1,40 @@
1// Copyright 2017 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <cstddef>
9#include <vector>
10#include "common/common_types.h"
11
12namespace HW {
13namespace AES {
14
15constexpr size_t CCM_NONCE_SIZE = 12;
16constexpr size_t CCM_MAC_SIZE = 16;
17
18using CCMNonce = std::array<u8, CCM_NONCE_SIZE>;
19
20/**
21 * Encrypts and adds a MAC to the given data using AES-CCM algorithm.
22 * @param pdata The plain text data to encrypt
23 * @param nonce The nonce data to use for encryption
24 * @param slot_id The slot ID of the key to use for encryption
25 * @returns a vector of u8 containing the encrypted data with MAC at the end
26 */
27std::vector<u8> EncryptSignCCM(const std::vector<u8>& pdata, const CCMNonce& nonce, size_t slot_id);
28
29/**
30 * Decrypts and verify the MAC of the given data using AES-CCM algorithm.
31 * @param cipher The cipher text data to decrypt, with MAC at the end to verify
32 * @param nonce The nonce data to use for decryption
33 * @param slot_id The slot ID of the key to use for decryption
34 * @returns a vector of u8 containing the decrypted data; an empty vector if the verification fails
35 */
36std::vector<u8> DecryptVerifyCCM(const std::vector<u8>& cipher, const CCMNonce& nonce,
37 size_t slot_id);
38
39} // namespace AES
40} // namespace HW
diff --git a/src/core/hw/aes/key.cpp b/src/core/hw/aes/key.cpp
new file mode 100644
index 000000000..4e8a8a59a
--- /dev/null
+++ b/src/core/hw/aes/key.cpp
@@ -0,0 +1,173 @@
1// Copyright 2017 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <exception>
7#include <sstream>
8#include <boost/optional.hpp>
9#include "common/common_paths.h"
10#include "common/file_util.h"
11#include "common/logging/log.h"
12#include "common/string_util.h"
13#include "core/hw/aes/arithmetic128.h"
14#include "core/hw/aes/key.h"
15
16namespace HW {
17namespace AES {
18
19namespace {
20
21boost::optional<AESKey> generator_constant;
22
23struct KeySlot {
24 boost::optional<AESKey> x;
25 boost::optional<AESKey> y;
26 boost::optional<AESKey> normal;
27
28 void SetKeyX(const AESKey& key) {
29 x = key;
30 if (y && generator_constant) {
31 GenerateNormalKey();
32 }
33 }
34
35 void SetKeyY(const AESKey& key) {
36 y = key;
37 if (x && generator_constant) {
38 GenerateNormalKey();
39 }
40 }
41
42 void SetNormalKey(const AESKey& key) {
43 normal = key;
44 }
45
46 void GenerateNormalKey() {
47 normal = Lrot128(Add128(Xor128(Lrot128(*x, 2), *y), *generator_constant), 87);
48 }
49
50 void Clear() {
51 x.reset();
52 y.reset();
53 normal.reset();
54 }
55};
56
57std::array<KeySlot, KeySlotID::MaxKeySlotID> key_slots;
58
59void ClearAllKeys() {
60 for (KeySlot& slot : key_slots) {
61 slot.Clear();
62 }
63 generator_constant.reset();
64}
65
66AESKey HexToKey(const std::string& hex) {
67 if (hex.size() < 32) {
68 throw std::invalid_argument("hex string is too short");
69 }
70
71 AESKey key;
72 for (size_t i = 0; i < key.size(); ++i) {
73 key[i] = static_cast<u8>(std::stoi(hex.substr(i * 2, 2), 0, 16));
74 }
75
76 return key;
77}
78
79void LoadPresetKeys() {
80 const std::string filepath = FileUtil::GetUserPath(D_SYSDATA_IDX) + AES_KEYS;
81 FileUtil::CreateFullPath(filepath); // Create path if not already created
82 std::ifstream file;
83 OpenFStream(file, filepath, std::ios_base::in);
84 if (!file) {
85 return;
86 }
87
88 while (!file.eof()) {
89 std::string line;
90 std::getline(file, line);
91 std::vector<std::string> parts;
92 Common::SplitString(line, '=', parts);
93 if (parts.size() != 2) {
94 LOG_ERROR(HW_AES, "Failed to parse %s", line.c_str());
95 continue;
96 }
97
98 const std::string& name = parts[0];
99 AESKey key;
100 try {
101 key = HexToKey(parts[1]);
102 } catch (const std::logic_error& e) {
103 LOG_ERROR(HW_AES, "Invalid key %s: %s", parts[1].c_str(), e.what());
104 continue;
105 }
106
107 if (name == "generator") {
108 generator_constant = key;
109 continue;
110 }
111
112 size_t slot_id;
113 char key_type;
114 if (std::sscanf(name.c_str(), "slot0x%zXKey%c", &slot_id, &key_type) != 2) {
115 LOG_ERROR(HW_AES, "Invalid key name %s", name.c_str());
116 continue;
117 }
118
119 if (slot_id >= MaxKeySlotID) {
120 LOG_ERROR(HW_AES, "Out of range slot ID 0x%zX", slot_id);
121 continue;
122 }
123
124 switch (key_type) {
125 case 'X':
126 key_slots.at(slot_id).SetKeyX(key);
127 break;
128 case 'Y':
129 key_slots.at(slot_id).SetKeyY(key);
130 break;
131 case 'N':
132 key_slots.at(slot_id).SetNormalKey(key);
133 break;
134 default:
135 LOG_ERROR(HW_AES, "Invalid key type %c", key_type);
136 break;
137 }
138 }
139}
140
141} // namespace
142
143void InitKeys() {
144 ClearAllKeys();
145 LoadPresetKeys();
146}
147
148void SetGeneratorConstant(const AESKey& key) {
149 generator_constant = key;
150}
151
152void SetKeyX(size_t slot_id, const AESKey& key) {
153 key_slots.at(slot_id).SetKeyX(key);
154}
155
156void SetKeyY(size_t slot_id, const AESKey& key) {
157 key_slots.at(slot_id).SetKeyY(key);
158}
159
160void SetNormalKey(size_t slot_id, const AESKey& key) {
161 key_slots.at(slot_id).SetNormalKey(key);
162}
163
164bool IsNormalKeyAvailable(size_t slot_id) {
165 return key_slots.at(slot_id).normal.is_initialized();
166}
167
168AESKey GetNormalKey(size_t slot_id) {
169 return key_slots.at(slot_id).normal.value_or(AESKey{});
170}
171
172} // namespace AES
173} // namespace HW
diff --git a/src/core/hw/aes/key.h b/src/core/hw/aes/key.h
new file mode 100644
index 000000000..b01d04f13
--- /dev/null
+++ b/src/core/hw/aes/key.h
@@ -0,0 +1,35 @@
1// Copyright 2017 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <cstddef>
9#include "common/common_types.h"
10
11namespace HW {
12namespace AES {
13
14enum KeySlotID : size_t {
15 APTWrap = 0x31,
16
17 MaxKeySlotID = 0x40,
18};
19
20constexpr size_t AES_BLOCK_SIZE = 16;
21
22using AESKey = std::array<u8, AES_BLOCK_SIZE>;
23
24void InitKeys();
25
26void SetGeneratorConstant(const AESKey& key);
27void SetKeyX(size_t slot_id, const AESKey& key);
28void SetKeyY(size_t slot_id, const AESKey& key);
29void SetNormalKey(size_t slot_id, const AESKey& key);
30
31bool IsNormalKeyAvailable(size_t slot_id);
32AESKey GetNormalKey(size_t slot_id);
33
34} // namspace AES
35} // namespace HW
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index fa8c13d36..42809c731 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -8,17 +8,13 @@
8#include "common/color.h" 8#include "common/color.h"
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "common/logging/log.h" 10#include "common/logging/log.h"
11#include "common/math_util.h"
12#include "common/microprofile.h" 11#include "common/microprofile.h"
13#include "common/thread.h"
14#include "common/timer.h"
15#include "common/vector_math.h" 12#include "common/vector_math.h"
16#include "core/core_timing.h" 13#include "core/core_timing.h"
17#include "core/hle/service/gsp_gpu.h" 14#include "core/hle/service/gsp_gpu.h"
18#include "core/hw/gpu.h" 15#include "core/hw/gpu.h"
19#include "core/hw/hw.h" 16#include "core/hw/hw.h"
20#include "core/memory.h" 17#include "core/memory.h"
21#include "core/settings.h"
22#include "core/tracer/recorder.h" 18#include "core/tracer/recorder.h"
23#include "video_core/command_processor.h" 19#include "video_core/command_processor.h"
24#include "video_core/debug_utils/debug_utils.h" 20#include "video_core/debug_utils/debug_utils.h"
@@ -32,19 +28,9 @@ namespace GPU {
32Regs g_regs; 28Regs g_regs;
33 29
34/// 268MHz CPU clocks / 60Hz frames per second 30/// 268MHz CPU clocks / 60Hz frames per second
35const u64 frame_ticks = BASE_CLOCK_RATE_ARM11 / 60; 31const u64 frame_ticks = BASE_CLOCK_RATE_ARM11 / SCREEN_REFRESH_RATE;
36/// Event id for CoreTiming 32/// Event id for CoreTiming
37static int vblank_event; 33static int vblank_event;
38/// Total number of frames drawn
39static u64 frame_count;
40/// Start clock for frame limiter
41static u32 time_point;
42/// Total delay caused by slow frames
43static float time_delay;
44constexpr float FIXED_FRAME_TIME = 1000.0f / 60;
45// Max lag caused by slow frames. Can be adjusted to compensate for too many slow frames. Higher
46// values increases time needed to limit frame rate after spikes
47constexpr float MAX_LAG_TIME = 18;
48 34
49template <typename T> 35template <typename T>
50inline void Read(T& var, const u32 raw_addr) { 36inline void Read(T& var, const u32 raw_addr) {
@@ -522,24 +508,8 @@ template void Write<u32>(u32 addr, const u32 data);
522template void Write<u16>(u32 addr, const u16 data); 508template void Write<u16>(u32 addr, const u16 data);
523template void Write<u8>(u32 addr, const u8 data); 509template void Write<u8>(u32 addr, const u8 data);
524 510
525static void FrameLimiter() {
526 time_delay += FIXED_FRAME_TIME;
527 time_delay = MathUtil::Clamp(time_delay, -MAX_LAG_TIME, MAX_LAG_TIME);
528 s32 desired_time = static_cast<s32>(time_delay);
529 s32 elapsed_time = static_cast<s32>(Common::Timer::GetTimeMs() - time_point);
530
531 if (elapsed_time < desired_time) {
532 Common::SleepCurrentThread(desired_time - elapsed_time);
533 }
534
535 u32 frame_time = Common::Timer::GetTimeMs() - time_point;
536
537 time_delay -= frame_time;
538}
539
540/// Update hardware 511/// Update hardware
541static void VBlankCallback(u64 userdata, int cycles_late) { 512static void VBlankCallback(u64 userdata, int cycles_late) {
542 frame_count++;
543 VideoCore::g_renderer->SwapBuffers(); 513 VideoCore::g_renderer->SwapBuffers();
544 514
545 // Signal to GSP that GPU interrupt has occurred 515 // Signal to GSP that GPU interrupt has occurred
@@ -550,12 +520,6 @@ static void VBlankCallback(u64 userdata, int cycles_late) {
550 Service::GSP::SignalInterrupt(Service::GSP::InterruptId::PDC0); 520 Service::GSP::SignalInterrupt(Service::GSP::InterruptId::PDC0);
551 Service::GSP::SignalInterrupt(Service::GSP::InterruptId::PDC1); 521 Service::GSP::SignalInterrupt(Service::GSP::InterruptId::PDC1);
552 522
553 if (!Settings::values.use_vsync && Settings::values.toggle_framelimit) {
554 FrameLimiter();
555 }
556
557 time_point = Common::Timer::GetTimeMs();
558
559 // Reschedule recurrent event 523 // Reschedule recurrent event
560 CoreTiming::ScheduleEvent(frame_ticks - cycles_late, vblank_event); 524 CoreTiming::ScheduleEvent(frame_ticks - cycles_late, vblank_event);
561} 525}
@@ -590,9 +554,6 @@ void Init() {
590 framebuffer_sub.color_format.Assign(Regs::PixelFormat::RGB8); 554 framebuffer_sub.color_format.Assign(Regs::PixelFormat::RGB8);
591 framebuffer_sub.active_fb = 0; 555 framebuffer_sub.active_fb = 0;
592 556
593 frame_count = 0;
594 time_point = Common::Timer::GetTimeMs();
595
596 vblank_event = CoreTiming::RegisterEvent("GPU::VBlankCallback", VBlankCallback); 557 vblank_event = CoreTiming::RegisterEvent("GPU::VBlankCallback", VBlankCallback);
597 CoreTiming::ScheduleEvent(frame_ticks, vblank_event); 558 CoreTiming::ScheduleEvent(frame_ticks, vblank_event);
598 559
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h
index d53381216..bdd997b2a 100644
--- a/src/core/hw/gpu.h
+++ b/src/core/hw/gpu.h
@@ -13,6 +13,8 @@
13 13
14namespace GPU { 14namespace GPU {
15 15
16constexpr float SCREEN_REFRESH_RATE = 60;
17
16// Returns index corresponding to the Regs member labeled by field_name 18// Returns index corresponding to the Regs member labeled by field_name
17// TODO: Due to Visual studio bug 209229, offsetof does not return constant expressions 19// TODO: Due to Visual studio bug 209229, offsetof does not return constant expressions
18// when used with array elements (e.g. GPU_REG_INDEX(memory_fill_config[0])). 20// when used with array elements (e.g. GPU_REG_INDEX(memory_fill_config[0])).
diff --git a/src/core/hw/hw.cpp b/src/core/hw/hw.cpp
index 9ff8825b2..8499f2ce6 100644
--- a/src/core/hw/hw.cpp
+++ b/src/core/hw/hw.cpp
@@ -4,6 +4,7 @@
4 4
5#include "common/common_types.h" 5#include "common/common_types.h"
6#include "common/logging/log.h" 6#include "common/logging/log.h"
7#include "core/hw/aes/key.h"
7#include "core/hw/gpu.h" 8#include "core/hw/gpu.h"
8#include "core/hw/hw.h" 9#include "core/hw/hw.h"
9#include "core/hw/lcd.h" 10#include "core/hw/lcd.h"
@@ -85,6 +86,7 @@ void Update() {}
85 86
86/// Initialize hardware 87/// Initialize hardware
87void Init() { 88void Init() {
89 AES::InitKeys();
88 GPU::Init(); 90 GPU::Init();
89 LCD::Init(); 91 LCD::Init();
90 LOG_DEBUG(HW, "initialized OK"); 92 LOG_DEBUG(HW, "initialized OK");
diff --git a/src/core/loader/3dsx.cpp b/src/core/loader/3dsx.cpp
index 09266e8b0..74e336487 100644
--- a/src/core/loader/3dsx.cpp
+++ b/src/core/loader/3dsx.cpp
@@ -5,7 +5,7 @@
5#include <algorithm> 5#include <algorithm>
6#include <vector> 6#include <vector>
7#include "common/logging/log.h" 7#include "common/logging/log.h"
8#include "core/file_sys/archive_romfs.h" 8#include "core/file_sys/archive_selfncch.h"
9#include "core/hle/kernel/process.h" 9#include "core/hle/kernel/process.h"
10#include "core/hle/kernel/resource_limit.h" 10#include "core/hle/kernel/resource_limit.h"
11#include "core/hle/service/fs/archive.h" 11#include "core/hle/service/fs/archive.h"
@@ -277,8 +277,8 @@ ResultStatus AppLoader_THREEDSX::Load() {
277 277
278 Kernel::g_current_process->Run(48, Kernel::DEFAULT_STACK_SIZE); 278 Kernel::g_current_process->Run(48, Kernel::DEFAULT_STACK_SIZE);
279 279
280 Service::FS::RegisterArchiveType(std::make_unique<FileSys::ArchiveFactory_RomFS>(*this), 280 Service::FS::RegisterArchiveType(std::make_unique<FileSys::ArchiveFactory_SelfNCCH>(*this),
281 Service::FS::ArchiveIdCode::RomFS); 281 Service::FS::ArchiveIdCode::SelfNCCH);
282 282
283 is_loaded = true; 283 is_loaded = true;
284 return ResultStatus::Success; 284 return ResultStatus::Success;
diff --git a/src/core/loader/loader.h b/src/core/loader/loader.h
index a6c2a745f..1d80766ae 100644
--- a/src/core/loader/loader.h
+++ b/src/core/loader/loader.h
@@ -54,7 +54,7 @@ FileType IdentifyFile(const std::string& file_name);
54 * @return FileType of file. Note: this will return FileType::Unknown if it is unable to determine 54 * @return FileType of file. Note: this will return FileType::Unknown if it is unable to determine
55 * a filetype, and will never return FileType::Error. 55 * a filetype, and will never return FileType::Error.
56 */ 56 */
57FileType GuessFromExtension(const std::string& extension_); 57FileType GuessFromExtension(const std::string& extension);
58 58
59/** 59/**
60 * Convert a FileType into a string which can be displayed to the user. 60 * Convert a FileType into a string which can be displayed to the user.
diff --git a/src/core/loader/ncch.cpp b/src/core/loader/ncch.cpp
index 5df33f6d2..98b8259d9 100644
--- a/src/core/loader/ncch.cpp
+++ b/src/core/loader/ncch.cpp
@@ -8,7 +8,7 @@
8#include "common/logging/log.h" 8#include "common/logging/log.h"
9#include "common/string_util.h" 9#include "common/string_util.h"
10#include "common/swap.h" 10#include "common/swap.h"
11#include "core/file_sys/archive_romfs.h" 11#include "core/file_sys/archive_selfncch.h"
12#include "core/hle/kernel/process.h" 12#include "core/hle/kernel/process.h"
13#include "core/hle/kernel/resource_limit.h" 13#include "core/hle/kernel/resource_limit.h"
14#include "core/hle/service/cfg/cfg.h" 14#include "core/hle/service/cfg/cfg.h"
@@ -342,8 +342,8 @@ ResultStatus AppLoader_NCCH::Load() {
342 if (ResultStatus::Success != result) 342 if (ResultStatus::Success != result)
343 return result; 343 return result;
344 344
345 Service::FS::RegisterArchiveType(std::make_unique<FileSys::ArchiveFactory_RomFS>(*this), 345 Service::FS::RegisterArchiveType(std::make_unique<FileSys::ArchiveFactory_SelfNCCH>(*this),
346 Service::FS::ArchiveIdCode::RomFS); 346 Service::FS::ArchiveIdCode::SelfNCCH);
347 347
348 ParseRegionLockoutInfo(); 348 ParseRegionLockoutInfo();
349 349
diff --git a/src/core/perf_stats.cpp b/src/core/perf_stats.cpp
new file mode 100644
index 000000000..2cdfb9ded
--- /dev/null
+++ b/src/core/perf_stats.cpp
@@ -0,0 +1,105 @@
1// Copyright 2017 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <chrono>
6#include <mutex>
7#include <thread>
8#include "common/math_util.h"
9#include "core/hw/gpu.h"
10#include "core/perf_stats.h"
11#include "core/settings.h"
12
13using namespace std::chrono_literals;
14using DoubleSecs = std::chrono::duration<double, std::chrono::seconds::period>;
15using std::chrono::duration_cast;
16using std::chrono::microseconds;
17
18namespace Core {
19
20void PerfStats::BeginSystemFrame() {
21 std::lock_guard<std::mutex> lock(object_mutex);
22
23 frame_begin = Clock::now();
24}
25
26void PerfStats::EndSystemFrame() {
27 std::lock_guard<std::mutex> lock(object_mutex);
28
29 auto frame_end = Clock::now();
30 accumulated_frametime += frame_end - frame_begin;
31 system_frames += 1;
32
33 previous_frame_length = frame_end - previous_frame_end;
34 previous_frame_end = frame_end;
35}
36
37void PerfStats::EndGameFrame() {
38 std::lock_guard<std::mutex> lock(object_mutex);
39
40 game_frames += 1;
41}
42
43PerfStats::Results PerfStats::GetAndResetStats(u64 current_system_time_us) {
44 std::lock_guard<std::mutex> lock(object_mutex);
45
46 auto now = Clock::now();
47 // Walltime elapsed since stats were reset
48 auto interval = duration_cast<DoubleSecs>(now - reset_point).count();
49
50 auto system_us_per_second =
51 static_cast<double>(current_system_time_us - reset_point_system_us) / interval;
52
53 Results results{};
54 results.system_fps = static_cast<double>(system_frames) / interval;
55 results.game_fps = static_cast<double>(game_frames) / interval;
56 results.frametime = duration_cast<DoubleSecs>(accumulated_frametime).count() /
57 static_cast<double>(system_frames);
58 results.emulation_speed = system_us_per_second / 1'000'000.0;
59
60 // Reset counters
61 reset_point = now;
62 reset_point_system_us = current_system_time_us;
63 accumulated_frametime = Clock::duration::zero();
64 system_frames = 0;
65 game_frames = 0;
66
67 return results;
68}
69
70double PerfStats::GetLastFrameTimeScale() {
71 std::lock_guard<std::mutex> lock(object_mutex);
72
73 constexpr double FRAME_LENGTH = 1.0 / GPU::SCREEN_REFRESH_RATE;
74 return duration_cast<DoubleSecs>(previous_frame_length).count() / FRAME_LENGTH;
75}
76
77void FrameLimiter::DoFrameLimiting(u64 current_system_time_us) {
78 // Max lag caused by slow frames. Can be adjusted to compensate for too many slow frames. Higher
79 // values increase the time needed to recover and limit framerate again after spikes.
80 constexpr microseconds MAX_LAG_TIME_US = 25ms;
81
82 if (!Settings::values.toggle_framelimit) {
83 return;
84 }
85
86 auto now = Clock::now();
87
88 frame_limiting_delta_err += microseconds(current_system_time_us - previous_system_time_us);
89 frame_limiting_delta_err -= duration_cast<microseconds>(now - previous_walltime);
90 frame_limiting_delta_err =
91 MathUtil::Clamp(frame_limiting_delta_err, -MAX_LAG_TIME_US, MAX_LAG_TIME_US);
92
93 if (frame_limiting_delta_err > microseconds::zero()) {
94 std::this_thread::sleep_for(frame_limiting_delta_err);
95
96 auto now_after_sleep = Clock::now();
97 frame_limiting_delta_err -= duration_cast<microseconds>(now_after_sleep - now);
98 now = now_after_sleep;
99 }
100
101 previous_system_time_us = current_system_time_us;
102 previous_walltime = now;
103}
104
105} // namespace Core
diff --git a/src/core/perf_stats.h b/src/core/perf_stats.h
new file mode 100644
index 000000000..362b205c8
--- /dev/null
+++ b/src/core/perf_stats.h
@@ -0,0 +1,83 @@
1// Copyright 2017 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <chrono>
8#include <mutex>
9#include "common/common_types.h"
10
11namespace Core {
12
13/**
14 * Class to manage and query performance/timing statistics. All public functions of this class are
15 * thread-safe unless stated otherwise.
16 */
17class PerfStats {
18public:
19 using Clock = std::chrono::high_resolution_clock;
20
21 struct Results {
22 /// System FPS (LCD VBlanks) in Hz
23 double system_fps;
24 /// Game FPS (GSP frame submissions) in Hz
25 double game_fps;
26 /// Walltime per system frame, in seconds, excluding any waits
27 double frametime;
28 /// Ratio of walltime / emulated time elapsed
29 double emulation_speed;
30 };
31
32 void BeginSystemFrame();
33 void EndSystemFrame();
34 void EndGameFrame();
35
36 Results GetAndResetStats(u64 current_system_time_us);
37
38 /**
39 * Gets the ratio between walltime and the emulated time of the previous system frame. This is
40 * useful for scaling inputs or outputs moving between the two time domains.
41 */
42 double GetLastFrameTimeScale();
43
44private:
45 std::mutex object_mutex;
46
47 /// Point when the cumulative counters were reset
48 Clock::time_point reset_point = Clock::now();
49 /// System time when the cumulative counters were reset
50 u64 reset_point_system_us = 0;
51
52 /// Cumulative duration (excluding v-sync/frame-limiting) of frames since last reset
53 Clock::duration accumulated_frametime = Clock::duration::zero();
54 /// Cumulative number of system frames (LCD VBlanks) presented since last reset
55 u32 system_frames = 0;
56 /// Cumulative number of game frames (GSP frame submissions) since last reset
57 u32 game_frames = 0;
58
59 /// Point when the previous system frame ended
60 Clock::time_point previous_frame_end = reset_point;
61 /// Point when the current system frame began
62 Clock::time_point frame_begin = reset_point;
63 /// Total visible duration (including frame-limiting, etc.) of the previous system frame
64 Clock::duration previous_frame_length = Clock::duration::zero();
65};
66
67class FrameLimiter {
68public:
69 using Clock = std::chrono::high_resolution_clock;
70
71 void DoFrameLimiting(u64 current_system_time_us);
72
73private:
74 /// Emulated system time (in microseconds) at the last limiter invocation
75 u64 previous_system_time_us = 0;
76 /// Walltime at the last limiter invocation
77 Clock::time_point previous_walltime = Clock::now();
78
79 /// Accumulated difference between walltime and emulated time
80 std::chrono::microseconds frame_limiting_delta_err{0};
81};
82
83} // namespace Core
diff --git a/src/core/settings.cpp b/src/core/settings.cpp
index 9afaf79ec..3a32b70aa 100644
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -15,7 +15,7 @@ Values values = {};
15 15
16void Apply() { 16void Apply() {
17 17
18 GDBStub::SetServerPort(static_cast<u32>(values.gdbstub_port)); 18 GDBStub::SetServerPort(values.gdbstub_port);
19 GDBStub::ToggleServer(values.use_gdbstub); 19 GDBStub::ToggleServer(values.use_gdbstub);
20 20
21 VideoCore::g_hw_renderer_enabled = values.use_hw_renderer; 21 VideoCore::g_hw_renderer_enabled = values.use_hw_renderer;
diff --git a/src/core/settings.h b/src/core/settings.h
index 8dbda653a..b6c75531f 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -7,6 +7,7 @@
7#include <array> 7#include <array>
8#include <string> 8#include <string>
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "core/hle/service/cam/cam.h"
10 11
11namespace Settings { 12namespace Settings {
12 13
@@ -104,6 +105,11 @@ struct Values {
104 // Audio 105 // Audio
105 std::string sink_id; 106 std::string sink_id;
106 bool enable_audio_stretching; 107 bool enable_audio_stretching;
108 std::string audio_device_id;
109
110 // Camera
111 std::array<std::string, Service::CAM::NumCameras> camera_name;
112 std::array<std::string, Service::CAM::NumCameras> camera_config;
107 113
108 // Debugging 114 // Debugging
109 bool use_gdbstub; 115 bool use_gdbstub;
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 6ca319b59..5317719e8 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -1,26 +1,46 @@
1set(SRCS 1set(SRCS
2 command_processor.cpp
3 debug_utils/debug_utils.cpp
4 pica.cpp
5 primitive_assembly.cpp
6 regs.cpp
7 renderer_base.cpp
2 renderer_opengl/gl_rasterizer.cpp 8 renderer_opengl/gl_rasterizer.cpp
3 renderer_opengl/gl_rasterizer_cache.cpp 9 renderer_opengl/gl_rasterizer_cache.cpp
4 renderer_opengl/gl_shader_gen.cpp 10 renderer_opengl/gl_shader_gen.cpp
5 renderer_opengl/gl_shader_util.cpp 11 renderer_opengl/gl_shader_util.cpp
6 renderer_opengl/gl_state.cpp 12 renderer_opengl/gl_state.cpp
7 renderer_opengl/renderer_opengl.cpp 13 renderer_opengl/renderer_opengl.cpp
8 debug_utils/debug_utils.cpp
9 clipper.cpp
10 command_processor.cpp
11 pica.cpp
12 primitive_assembly.cpp
13 rasterizer.cpp
14 renderer_base.cpp
15 shader/shader.cpp 14 shader/shader.cpp
16 shader/shader_interpreter.cpp 15 shader/shader_interpreter.cpp
17 swrasterizer.cpp 16 swrasterizer/clipper.cpp
17 swrasterizer/framebuffer.cpp
18 swrasterizer/rasterizer.cpp
19 swrasterizer/swrasterizer.cpp
20 swrasterizer/texturing.cpp
21 texture/etc1.cpp
22 texture/texture_decode.cpp
18 vertex_loader.cpp 23 vertex_loader.cpp
19 video_core.cpp 24 video_core.cpp
20 ) 25 )
21 26
22set(HEADERS 27set(HEADERS
28 command_processor.h
23 debug_utils/debug_utils.h 29 debug_utils/debug_utils.h
30 gpu_debugger.h
31 pica.h
32 pica_state.h
33 pica_types.h
34 primitive_assembly.h
35 rasterizer_interface.h
36 regs.h
37 regs_framebuffer.h
38 regs_lighting.h
39 regs_pipeline.h
40 regs_rasterizer.h
41 regs_shader.h
42 regs_texturing.h
43 renderer_base.h
24 renderer_opengl/gl_rasterizer.h 44 renderer_opengl/gl_rasterizer.h
25 renderer_opengl/gl_rasterizer_cache.h 45 renderer_opengl/gl_rasterizer_cache.h
26 renderer_opengl/gl_resource_manager.h 46 renderer_opengl/gl_resource_manager.h
@@ -29,20 +49,16 @@ set(HEADERS
29 renderer_opengl/gl_state.h 49 renderer_opengl/gl_state.h
30 renderer_opengl/pica_to_gl.h 50 renderer_opengl/pica_to_gl.h
31 renderer_opengl/renderer_opengl.h 51 renderer_opengl/renderer_opengl.h
32 clipper.h
33 command_processor.h
34 gpu_debugger.h
35 pica.h
36 pica_state.h
37 pica_types.h
38 primitive_assembly.h
39 rasterizer.h
40 rasterizer_interface.h
41 renderer_base.h
42 shader/debug_data.h 52 shader/debug_data.h
43 shader/shader.h 53 shader/shader.h
44 shader/shader_interpreter.h 54 shader/shader_interpreter.h
45 swrasterizer.h 55 swrasterizer/clipper.h
56 swrasterizer/framebuffer.h
57 swrasterizer/rasterizer.h
58 swrasterizer/swrasterizer.h
59 swrasterizer/texturing.h
60 texture/etc1.h
61 texture/texture_decode.h
46 utils.h 62 utils.h
47 vertex_loader.h 63 vertex_loader.h
48 video_core.h 64 video_core.h
@@ -50,10 +66,12 @@ set(HEADERS
50 66
51if(ARCHITECTURE_x86_64) 67if(ARCHITECTURE_x86_64)
52 set(SRCS ${SRCS} 68 set(SRCS ${SRCS}
53 shader/shader_jit_x64.cpp) 69 shader/shader_jit_x64.cpp
70 shader/shader_jit_x64_compiler.cpp)
54 71
55 set(HEADERS ${HEADERS} 72 set(HEADERS ${HEADERS}
56 shader/shader_jit_x64.h) 73 shader/shader_jit_x64.h
74 shader/shader_jit_x64_compiler.h)
57endif() 75endif()
58 76
59create_directory_groups(${SRCS} ${HEADERS}) 77create_directory_groups(${SRCS} ${HEADERS})
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index ea58e9f54..2e32ff905 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -16,11 +16,13 @@
16#include "core/tracer/recorder.h" 16#include "core/tracer/recorder.h"
17#include "video_core/command_processor.h" 17#include "video_core/command_processor.h"
18#include "video_core/debug_utils/debug_utils.h" 18#include "video_core/debug_utils/debug_utils.h"
19#include "video_core/pica.h"
20#include "video_core/pica_state.h" 19#include "video_core/pica_state.h"
21#include "video_core/pica_types.h" 20#include "video_core/pica_types.h"
22#include "video_core/primitive_assembly.h" 21#include "video_core/primitive_assembly.h"
23#include "video_core/rasterizer_interface.h" 22#include "video_core/rasterizer_interface.h"
23#include "video_core/regs.h"
24#include "video_core/regs_pipeline.h"
25#include "video_core/regs_texturing.h"
24#include "video_core/renderer_base.h" 26#include "video_core/renderer_base.h"
25#include "video_core/shader/shader.h" 27#include "video_core/shader/shader.h"
26#include "video_core/vertex_loader.h" 28#include "video_core/vertex_loader.h"
@@ -49,19 +51,23 @@ MICROPROFILE_DEFINE(GPU_Drawing, "GPU", "Drawing", MP_RGB(50, 50, 240));
49static void WritePicaReg(u32 id, u32 value, u32 mask) { 51static void WritePicaReg(u32 id, u32 value, u32 mask) {
50 auto& regs = g_state.regs; 52 auto& regs = g_state.regs;
51 53
52 if (id >= regs.NumIds()) 54 if (id >= Regs::NUM_REGS) {
55 LOG_ERROR(HW_GPU,
56 "Commandlist tried to write to invalid register 0x%03X (value: %08X, mask: %X)",
57 id, value, mask);
53 return; 58 return;
59 }
54 60
55 // TODO: Figure out how register masking acts on e.g. vs.uniform_setup.set_value 61 // TODO: Figure out how register masking acts on e.g. vs.uniform_setup.set_value
56 u32 old_value = regs[id]; 62 u32 old_value = regs.reg_array[id];
57 63
58 const u32 write_mask = expand_bits_to_bytes[mask]; 64 const u32 write_mask = expand_bits_to_bytes[mask];
59 65
60 regs[id] = (old_value & ~write_mask) | (value & write_mask); 66 regs.reg_array[id] = (old_value & ~write_mask) | (value & write_mask);
61 67
62 // Double check for is_pica_tracing to avoid call overhead 68 // Double check for is_pica_tracing to avoid call overhead
63 if (DebugUtils::IsPicaTracing()) { 69 if (DebugUtils::IsPicaTracing()) {
64 DebugUtils::OnPicaRegWrite({(u16)id, (u16)mask, regs[id]}); 70 DebugUtils::OnPicaRegWrite({(u16)id, (u16)mask, regs.reg_array[id]});
65 } 71 }
66 72
67 if (g_debug_context) 73 if (g_debug_context)
@@ -74,23 +80,23 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
74 Service::GSP::SignalInterrupt(Service::GSP::InterruptId::P3D); 80 Service::GSP::SignalInterrupt(Service::GSP::InterruptId::P3D);
75 break; 81 break;
76 82
77 case PICA_REG_INDEX_WORKAROUND(triangle_topology, 0x25E): 83 case PICA_REG_INDEX(pipeline.triangle_topology):
78 g_state.primitive_assembler.Reconfigure(regs.triangle_topology); 84 g_state.primitive_assembler.Reconfigure(regs.pipeline.triangle_topology);
79 break; 85 break;
80 86
81 case PICA_REG_INDEX_WORKAROUND(restart_primitive, 0x25F): 87 case PICA_REG_INDEX(pipeline.restart_primitive):
82 g_state.primitive_assembler.Reset(); 88 g_state.primitive_assembler.Reset();
83 break; 89 break;
84 90
85 case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.index, 0x232): 91 case PICA_REG_INDEX(pipeline.vs_default_attributes_setup.index):
86 g_state.immediate.current_attribute = 0; 92 g_state.immediate.current_attribute = 0;
87 default_attr_counter = 0; 93 default_attr_counter = 0;
88 break; 94 break;
89 95
90 // Load default vertex input attributes 96 // Load default vertex input attributes
91 case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[0], 0x233): 97 case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[0], 0x233):
92 case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[1], 0x234): 98 case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[1], 0x234):
93 case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[2], 0x235): { 99 case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[2], 0x235): {
94 // TODO: Does actual hardware indeed keep an intermediate buffer or does 100 // TODO: Does actual hardware indeed keep an intermediate buffer or does
95 // it directly write the values? 101 // it directly write the values?
96 default_attr_write_buffer[default_attr_counter++] = value; 102 default_attr_write_buffer[default_attr_counter++] = value;
@@ -102,7 +108,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
102 if (default_attr_counter >= 3) { 108 if (default_attr_counter >= 3) {
103 default_attr_counter = 0; 109 default_attr_counter = 0;
104 110
105 auto& setup = regs.vs_default_attributes_setup; 111 auto& setup = regs.pipeline.vs_default_attributes_setup;
106 112
107 if (setup.index >= 16) { 113 if (setup.index >= 16) {
108 LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index); 114 LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index);
@@ -125,33 +131,37 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
125 131
126 // TODO: Verify that this actually modifies the register! 132 // TODO: Verify that this actually modifies the register!
127 if (setup.index < 15) { 133 if (setup.index < 15) {
128 g_state.vs_default_attributes[setup.index] = attribute; 134 g_state.input_default_attributes.attr[setup.index] = attribute;
129 setup.index++; 135 setup.index++;
130 } else { 136 } else {
131 // Put each attribute into an immediate input buffer. 137 // Put each attribute into an immediate input buffer. When all specified immediate
132 // When all specified immediate attributes are present, the Vertex Shader is invoked 138 // attributes are present, the Vertex Shader is invoked and everything is sent to
133 // and everything is 139 // the primitive assembler.
134 // sent to the primitive assembler.
135 140
136 auto& immediate_input = g_state.immediate.input_vertex; 141 auto& immediate_input = g_state.immediate.input_vertex;
137 auto& immediate_attribute_id = g_state.immediate.current_attribute; 142 auto& immediate_attribute_id = g_state.immediate.current_attribute;
138 143
139 immediate_input.attr[immediate_attribute_id++] = attribute; 144 immediate_input.attr[immediate_attribute_id] = attribute;
140 145
141 if (immediate_attribute_id >= regs.vs.num_input_attributes + 1) { 146 if (immediate_attribute_id < regs.pipeline.max_input_attrib_index) {
147 immediate_attribute_id += 1;
148 } else {
142 MICROPROFILE_SCOPE(GPU_Drawing); 149 MICROPROFILE_SCOPE(GPU_Drawing);
143 immediate_attribute_id = 0; 150 immediate_attribute_id = 0;
144 151
145 Shader::UnitState shader_unit; 152 auto* shader_engine = Shader::GetEngine();
146 g_state.vs.Setup(); 153 shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset);
147 154
148 // Send to vertex shader 155 // Send to vertex shader
149 if (g_debug_context) 156 if (g_debug_context)
150 g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, 157 g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation,
151 static_cast<void*>(&immediate_input)); 158 static_cast<void*>(&immediate_input));
152 g_state.vs.Run(shader_unit, immediate_input, regs.vs.num_input_attributes + 1); 159 Shader::UnitState shader_unit;
153 Shader::OutputVertex output_vertex = 160 Shader::AttributeBuffer output{};
154 shader_unit.output_registers.ToVertex(regs.vs); 161
162 shader_unit.LoadInput(regs.vs, immediate_input);
163 shader_engine->Run(g_state.vs, shader_unit);
164 shader_unit.WriteOutput(regs.vs, output);
155 165
156 // Send to renderer 166 // Send to renderer
157 using Pica::Shader::OutputVertex; 167 using Pica::Shader::OutputVertex;
@@ -160,15 +170,17 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
160 VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); 170 VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2);
161 }; 171 };
162 172
163 g_state.primitive_assembler.SubmitVertex(output_vertex, AddTriangle); 173 g_state.primitive_assembler.SubmitVertex(
174 Shader::OutputVertex::FromAttributeBuffer(regs.rasterizer, output),
175 AddTriangle);
164 } 176 }
165 } 177 }
166 } 178 }
167 break; 179 break;
168 } 180 }
169 181
170 case PICA_REG_INDEX(gpu_mode): 182 case PICA_REG_INDEX(pipeline.gpu_mode):
171 if (regs.gpu_mode == Regs::GPUMode::Configuring) { 183 if (regs.pipeline.gpu_mode == PipelineRegs::GPUMode::Configuring) {
172 MICROPROFILE_SCOPE(GPU_Drawing); 184 MICROPROFILE_SCOPE(GPU_Drawing);
173 185
174 // Draw immediate mode triangles when GPU Mode is set to GPUMode::Configuring 186 // Draw immediate mode triangles when GPU Mode is set to GPUMode::Configuring
@@ -180,19 +192,20 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
180 } 192 }
181 break; 193 break;
182 194
183 case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[0], 0x23c): 195 case PICA_REG_INDEX_WORKAROUND(pipeline.command_buffer.trigger[0], 0x23c):
184 case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[1], 0x23d): { 196 case PICA_REG_INDEX_WORKAROUND(pipeline.command_buffer.trigger[1], 0x23d): {
185 unsigned index = static_cast<unsigned>(id - PICA_REG_INDEX(command_buffer.trigger[0])); 197 unsigned index =
186 u32* head_ptr = 198 static_cast<unsigned>(id - PICA_REG_INDEX(pipeline.command_buffer.trigger[0]));
187 (u32*)Memory::GetPhysicalPointer(regs.command_buffer.GetPhysicalAddress(index)); 199 u32* head_ptr = (u32*)Memory::GetPhysicalPointer(
200 regs.pipeline.command_buffer.GetPhysicalAddress(index));
188 g_state.cmd_list.head_ptr = g_state.cmd_list.current_ptr = head_ptr; 201 g_state.cmd_list.head_ptr = g_state.cmd_list.current_ptr = head_ptr;
189 g_state.cmd_list.length = regs.command_buffer.GetSize(index) / sizeof(u32); 202 g_state.cmd_list.length = regs.pipeline.command_buffer.GetSize(index) / sizeof(u32);
190 break; 203 break;
191 } 204 }
192 205
193 // It seems like these trigger vertex rendering 206 // It seems like these trigger vertex rendering
194 case PICA_REG_INDEX(trigger_draw): 207 case PICA_REG_INDEX(pipeline.trigger_draw):
195 case PICA_REG_INDEX(trigger_draw_indexed): { 208 case PICA_REG_INDEX(pipeline.trigger_draw_indexed): {
196 MICROPROFILE_SCOPE(GPU_Drawing); 209 MICROPROFILE_SCOPE(GPU_Drawing);
197 210
198#if PICA_LOG_TEV 211#if PICA_LOG_TEV
@@ -204,13 +217,13 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
204 // Processes information about internal vertex attributes to figure out how a vertex is 217 // Processes information about internal vertex attributes to figure out how a vertex is
205 // loaded. 218 // loaded.
206 // Later, these can be compiled and cached. 219 // Later, these can be compiled and cached.
207 const u32 base_address = regs.vertex_attributes.GetPhysicalBaseAddress(); 220 const u32 base_address = regs.pipeline.vertex_attributes.GetPhysicalBaseAddress();
208 VertexLoader loader(regs); 221 VertexLoader loader(regs.pipeline);
209 222
210 // Load vertices 223 // Load vertices
211 bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed)); 224 bool is_indexed = (id == PICA_REG_INDEX(pipeline.trigger_draw_indexed));
212 225
213 const auto& index_info = regs.index_array; 226 const auto& index_info = regs.pipeline.index_array;
214 const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset); 227 const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset);
215 const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8); 228 const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8);
216 bool index_u16 = index_info.format != 0; 229 bool index_u16 = index_info.format != 0;
@@ -219,13 +232,13 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
219 232
220 if (g_debug_context && g_debug_context->recorder) { 233 if (g_debug_context && g_debug_context->recorder) {
221 for (int i = 0; i < 3; ++i) { 234 for (int i = 0; i < 3; ++i) {
222 const auto texture = regs.GetTextures()[i]; 235 const auto texture = regs.texturing.GetTextures()[i];
223 if (!texture.enabled) 236 if (!texture.enabled)
224 continue; 237 continue;
225 238
226 u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress()); 239 u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress());
227 g_debug_context->recorder->MemoryAccessed( 240 g_debug_context->recorder->MemoryAccessed(
228 texture_data, Pica::Regs::NibblesPerPixel(texture.format) * 241 texture_data, Pica::TexturingRegs::NibblesPerPixel(texture.format) *
229 texture.config.width / 2 * texture.config.height, 242 texture.config.width / 2 * texture.config.height,
230 texture.config.GetPhysicalAddress()); 243 texture.config.GetPhysicalAddress());
231 } 244 }
@@ -243,14 +256,16 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
243 unsigned int vertex_cache_pos = 0; 256 unsigned int vertex_cache_pos = 0;
244 vertex_cache_ids.fill(-1); 257 vertex_cache_ids.fill(-1);
245 258
259 auto* shader_engine = Shader::GetEngine();
246 Shader::UnitState shader_unit; 260 Shader::UnitState shader_unit;
247 g_state.vs.Setup();
248 261
249 for (unsigned int index = 0; index < regs.num_vertices; ++index) { 262 shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset);
263
264 for (unsigned int index = 0; index < regs.pipeline.num_vertices; ++index) {
250 // Indexed rendering doesn't use the start offset 265 // Indexed rendering doesn't use the start offset
251 unsigned int vertex = 266 unsigned int vertex =
252 is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) 267 is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index])
253 : (index + regs.vertex_offset); 268 : (index + regs.pipeline.vertex_offset);
254 269
255 // -1 is a common special value used for primitive restart. Since it's unknown if 270 // -1 is a common special value used for primitive restart. Since it's unknown if
256 // the PICA supports it, and it would mess up the caching, guard against it here. 271 // the PICA supports it, and it would mess up the caching, guard against it here.
@@ -276,17 +291,19 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
276 291
277 if (!vertex_cache_hit) { 292 if (!vertex_cache_hit) {
278 // Initialize data for the current vertex 293 // Initialize data for the current vertex
279 Shader::InputVertex input; 294 Shader::AttributeBuffer input, output{};
280 loader.LoadVertex(base_address, index, vertex, input, memory_accesses); 295 loader.LoadVertex(base_address, index, vertex, input, memory_accesses);
281 296
282 // Send to vertex shader 297 // Send to vertex shader
283 if (g_debug_context) 298 if (g_debug_context)
284 g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, 299 g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation,
285 (void*)&input); 300 (void*)&input);
286 g_state.vs.Run(shader_unit, input, loader.GetNumTotalAttributes()); 301 shader_unit.LoadInput(regs.vs, input);
302 shader_engine->Run(g_state.vs, shader_unit);
303 shader_unit.WriteOutput(regs.vs, output);
287 304
288 // Retrieve vertex from register data 305 // Retrieve vertex from register data
289 output_vertex = shader_unit.output_registers.ToVertex(regs.vs); 306 output_vertex = Shader::OutputVertex::FromAttributeBuffer(regs.rasterizer, output);
290 307
291 if (is_indexed) { 308 if (is_indexed) {
292 vertex_cache[vertex_cache_pos] = output_vertex; 309 vertex_cache[vertex_cache_pos] = output_vertex;
@@ -428,16 +445,16 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
428 break; 445 break;
429 } 446 }
430 447
431 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[0], 0xe8): 448 case PICA_REG_INDEX_WORKAROUND(texturing.fog_lut_data[0], 0xe8):
432 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[1], 0xe9): 449 case PICA_REG_INDEX_WORKAROUND(texturing.fog_lut_data[1], 0xe9):
433 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[2], 0xea): 450 case PICA_REG_INDEX_WORKAROUND(texturing.fog_lut_data[2], 0xea):
434 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[3], 0xeb): 451 case PICA_REG_INDEX_WORKAROUND(texturing.fog_lut_data[3], 0xeb):
435 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[4], 0xec): 452 case PICA_REG_INDEX_WORKAROUND(texturing.fog_lut_data[4], 0xec):
436 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[5], 0xed): 453 case PICA_REG_INDEX_WORKAROUND(texturing.fog_lut_data[5], 0xed):
437 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[6], 0xee): 454 case PICA_REG_INDEX_WORKAROUND(texturing.fog_lut_data[6], 0xee):
438 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[7], 0xef): { 455 case PICA_REG_INDEX_WORKAROUND(texturing.fog_lut_data[7], 0xef): {
439 g_state.fog.lut[regs.fog_lut_offset % 128].raw = value; 456 g_state.fog.lut[regs.texturing.fog_lut_offset % 128].raw = value;
440 regs.fog_lut_offset.Assign(regs.fog_lut_offset + 1); 457 regs.texturing.fog_lut_offset.Assign(regs.texturing.fog_lut_offset + 1);
441 break; 458 break;
442 } 459 }
443 460
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index c44b3d95a..47dbc8cc8 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -29,12 +29,15 @@
29#include "common/math_util.h" 29#include "common/math_util.h"
30#include "common/vector_math.h" 30#include "common/vector_math.h"
31#include "video_core/debug_utils/debug_utils.h" 31#include "video_core/debug_utils/debug_utils.h"
32#include "video_core/pica.h"
33#include "video_core/pica_state.h" 32#include "video_core/pica_state.h"
34#include "video_core/pica_types.h" 33#include "video_core/pica_types.h"
35#include "video_core/rasterizer_interface.h" 34#include "video_core/rasterizer_interface.h"
35#include "video_core/regs_rasterizer.h"
36#include "video_core/regs_shader.h"
37#include "video_core/regs_texturing.h"
36#include "video_core/renderer_base.h" 38#include "video_core/renderer_base.h"
37#include "video_core/shader/shader.h" 39#include "video_core/shader/shader.h"
40#include "video_core/texture/texture_decode.h"
38#include "video_core/utils.h" 41#include "video_core/utils.h"
39#include "video_core/video_core.h" 42#include "video_core/video_core.h"
40 43
@@ -87,9 +90,9 @@ std::shared_ptr<DebugContext> g_debug_context; // TODO: Get rid of this global
87 90
88namespace DebugUtils { 91namespace DebugUtils {
89 92
90void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, 93void DumpShader(const std::string& filename, const ShaderRegs& config,
91 const Shader::ShaderSetup& setup, 94 const Shader::ShaderSetup& setup,
92 const Regs::VSOutputAttributes* output_attributes) { 95 const RasterizerRegs::VSOutputAttributes* output_attributes) {
93 struct StuffToWrite { 96 struct StuffToWrite {
94 const u8* pointer; 97 const u8* pointer;
95 u32 size; 98 u32 size;
@@ -128,7 +131,7 @@ void DumpShader(const std::string& filename, const Regs::ShaderConfig& config,
128 // This is put into a try-catch block to make sure we notice unknown configurations. 131 // This is put into a try-catch block to make sure we notice unknown configurations.
129 std::vector<OutputRegisterInfo> output_info_table; 132 std::vector<OutputRegisterInfo> output_info_table;
130 for (unsigned i = 0; i < 7; ++i) { 133 for (unsigned i = 0; i < 7; ++i) {
131 using OutputAttributes = Pica::Regs::VSOutputAttributes; 134 using OutputAttributes = Pica::RasterizerRegs::VSOutputAttributes;
132 135
133 // TODO: It's still unclear how the attribute components map to the register! 136 // TODO: It's still unclear how the attribute components map to the register!
134 // Once we know that, this code probably will not make much sense anymore. 137 // Once we know that, this code probably will not make much sense anymore.
@@ -315,257 +318,6 @@ std::unique_ptr<PicaTrace> FinishPicaTracing() {
315 return ret; 318 return ret;
316} 319}
317 320
318const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const TextureInfo& info,
319 bool disable_alpha) {
320 const unsigned int coarse_x = x & ~7;
321 const unsigned int coarse_y = y & ~7;
322
323 if (info.format != Regs::TextureFormat::ETC1 && info.format != Regs::TextureFormat::ETC1A4) {
324 // TODO(neobrain): Fix code design to unify vertical block offsets!
325 source += coarse_y * info.stride;
326 }
327
328 // TODO: Assert that width/height are multiples of block dimensions
329
330 switch (info.format) {
331 case Regs::TextureFormat::RGBA8: {
332 auto res = Color::DecodeRGBA8(source + VideoCore::GetMortonOffset(x, y, 4));
333 return {res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a())};
334 }
335
336 case Regs::TextureFormat::RGB8: {
337 auto res = Color::DecodeRGB8(source + VideoCore::GetMortonOffset(x, y, 3));
338 return {res.r(), res.g(), res.b(), 255};
339 }
340
341 case Regs::TextureFormat::RGB5A1: {
342 auto res = Color::DecodeRGB5A1(source + VideoCore::GetMortonOffset(x, y, 2));
343 return {res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a())};
344 }
345
346 case Regs::TextureFormat::RGB565: {
347 auto res = Color::DecodeRGB565(source + VideoCore::GetMortonOffset(x, y, 2));
348 return {res.r(), res.g(), res.b(), 255};
349 }
350
351 case Regs::TextureFormat::RGBA4: {
352 auto res = Color::DecodeRGBA4(source + VideoCore::GetMortonOffset(x, y, 2));
353 return {res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a())};
354 }
355
356 case Regs::TextureFormat::IA8: {
357 const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 2);
358
359 if (disable_alpha) {
360 // Show intensity as red, alpha as green
361 return {source_ptr[1], source_ptr[0], 0, 255};
362 } else {
363 return {source_ptr[1], source_ptr[1], source_ptr[1], source_ptr[0]};
364 }
365 }
366
367 case Regs::TextureFormat::RG8: {
368 auto res = Color::DecodeRG8(source + VideoCore::GetMortonOffset(x, y, 2));
369 return {res.r(), res.g(), 0, 255};
370 }
371
372 case Regs::TextureFormat::I8: {
373 const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1);
374 return {*source_ptr, *source_ptr, *source_ptr, 255};
375 }
376
377 case Regs::TextureFormat::A8: {
378 const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1);
379
380 if (disable_alpha) {
381 return {*source_ptr, *source_ptr, *source_ptr, 255};
382 } else {
383 return {0, 0, 0, *source_ptr};
384 }
385 }
386
387 case Regs::TextureFormat::IA4: {
388 const u8* source_ptr = source + VideoCore::GetMortonOffset(x, y, 1);
389
390 u8 i = Color::Convert4To8(((*source_ptr) & 0xF0) >> 4);
391 u8 a = Color::Convert4To8((*source_ptr) & 0xF);
392
393 if (disable_alpha) {
394 // Show intensity as red, alpha as green
395 return {i, a, 0, 255};
396 } else {
397 return {i, i, i, a};
398 }
399 }
400
401 case Regs::TextureFormat::I4: {
402 u32 morton_offset = VideoCore::GetMortonOffset(x, y, 1);
403 const u8* source_ptr = source + morton_offset / 2;
404
405 u8 i = (morton_offset % 2) ? ((*source_ptr & 0xF0) >> 4) : (*source_ptr & 0xF);
406 i = Color::Convert4To8(i);
407
408 return {i, i, i, 255};
409 }
410
411 case Regs::TextureFormat::A4: {
412 u32 morton_offset = VideoCore::GetMortonOffset(x, y, 1);
413 const u8* source_ptr = source + morton_offset / 2;
414
415 u8 a = (morton_offset % 2) ? ((*source_ptr & 0xF0) >> 4) : (*source_ptr & 0xF);
416 a = Color::Convert4To8(a);
417
418 if (disable_alpha) {
419 return {a, a, a, 255};
420 } else {
421 return {0, 0, 0, a};
422 }
423 }
424
425 case Regs::TextureFormat::ETC1:
426 case Regs::TextureFormat::ETC1A4: {
427 bool has_alpha = (info.format == Regs::TextureFormat::ETC1A4);
428
429 // ETC1 further subdivides each 8x8 tile into four 4x4 subtiles
430 const int subtile_width = 4;
431 const int subtile_height = 4;
432
433 int subtile_index = ((x / subtile_width) & 1) + 2 * ((y / subtile_height) & 1);
434 unsigned subtile_bytes = has_alpha ? 2 : 1; // TODO: Name...
435
436 const u64* source_ptr = (const u64*)(source + coarse_x * subtile_bytes * 4 +
437 coarse_y * subtile_bytes * 4 * (info.width / 8) +
438 subtile_index * subtile_bytes * 8);
439 u64 alpha = 0xFFFFFFFFFFFFFFFF;
440 if (has_alpha) {
441 alpha = *source_ptr;
442 source_ptr++;
443 }
444
445 union ETC1Tile {
446 // Each of these two is a collection of 16 bits (one per lookup value)
447 BitField<0, 16, u64> table_subindexes;
448 BitField<16, 16, u64> negation_flags;
449
450 unsigned GetTableSubIndex(unsigned index) const {
451 return (table_subindexes >> index) & 1;
452 }
453
454 bool GetNegationFlag(unsigned index) const {
455 return ((negation_flags >> index) & 1) == 1;
456 }
457
458 BitField<32, 1, u64> flip;
459 BitField<33, 1, u64> differential_mode;
460
461 BitField<34, 3, u64> table_index_2;
462 BitField<37, 3, u64> table_index_1;
463
464 union {
465 // delta value + base value
466 BitField<40, 3, s64> db;
467 BitField<43, 5, u64> b;
468
469 BitField<48, 3, s64> dg;
470 BitField<51, 5, u64> g;
471
472 BitField<56, 3, s64> dr;
473 BitField<59, 5, u64> r;
474 } differential;
475
476 union {
477 BitField<40, 4, u64> b2;
478 BitField<44, 4, u64> b1;
479
480 BitField<48, 4, u64> g2;
481 BitField<52, 4, u64> g1;
482
483 BitField<56, 4, u64> r2;
484 BitField<60, 4, u64> r1;
485 } separate;
486
487 const Math::Vec3<u8> GetRGB(int x, int y) const {
488 int texel = 4 * x + y;
489
490 if (flip)
491 std::swap(x, y);
492
493 // Lookup base value
494 Math::Vec3<int> ret;
495 if (differential_mode) {
496 ret.r() = static_cast<int>(differential.r);
497 ret.g() = static_cast<int>(differential.g);
498 ret.b() = static_cast<int>(differential.b);
499 if (x >= 2) {
500 ret.r() += static_cast<int>(differential.dr);
501 ret.g() += static_cast<int>(differential.dg);
502 ret.b() += static_cast<int>(differential.db);
503 }
504 ret.r() = Color::Convert5To8(ret.r());
505 ret.g() = Color::Convert5To8(ret.g());
506 ret.b() = Color::Convert5To8(ret.b());
507 } else {
508 if (x < 2) {
509 ret.r() = Color::Convert4To8(static_cast<u8>(separate.r1));
510 ret.g() = Color::Convert4To8(static_cast<u8>(separate.g1));
511 ret.b() = Color::Convert4To8(static_cast<u8>(separate.b1));
512 } else {
513 ret.r() = Color::Convert4To8(static_cast<u8>(separate.r2));
514 ret.g() = Color::Convert4To8(static_cast<u8>(separate.g2));
515 ret.b() = Color::Convert4To8(static_cast<u8>(separate.b2));
516 }
517 }
518
519 // Add modifier
520 unsigned table_index =
521 static_cast<int>((x < 2) ? table_index_1.Value() : table_index_2.Value());
522
523 static const std::array<std::array<u8, 2>, 8> etc1_modifier_table = {{
524 {{2, 8}},
525 {{5, 17}},
526 {{9, 29}},
527 {{13, 42}},
528 {{18, 60}},
529 {{24, 80}},
530 {{33, 106}},
531 {{47, 183}},
532 }};
533
534 int modifier = etc1_modifier_table.at(table_index).at(GetTableSubIndex(texel));
535 if (GetNegationFlag(texel))
536 modifier *= -1;
537
538 ret.r() = MathUtil::Clamp(ret.r() + modifier, 0, 255);
539 ret.g() = MathUtil::Clamp(ret.g() + modifier, 0, 255);
540 ret.b() = MathUtil::Clamp(ret.b() + modifier, 0, 255);
541
542 return ret.Cast<u8>();
543 }
544 } const* etc1_tile = reinterpret_cast<const ETC1Tile*>(source_ptr);
545
546 alpha >>= 4 * ((x & 3) * 4 + (y & 3));
547 return Math::MakeVec(etc1_tile->GetRGB(x & 3, y & 3),
548 disable_alpha ? (u8)255 : Color::Convert4To8(alpha & 0xF));
549 }
550
551 default:
552 LOG_ERROR(HW_GPU, "Unknown texture format: %x", (u32)info.format);
553 DEBUG_ASSERT(false);
554 return {};
555 }
556}
557
558TextureInfo TextureInfo::FromPicaRegister(const Regs::TextureConfig& config,
559 const Regs::TextureFormat& format) {
560 TextureInfo info;
561 info.physical_address = config.GetPhysicalAddress();
562 info.width = config.width;
563 info.height = config.height;
564 info.format = format;
565 info.stride = Pica::Regs::NibblesPerPixel(info.format) * info.width / 2;
566 return info;
567}
568
569#ifdef HAVE_PNG 321#ifdef HAVE_PNG
570// Adapter functions to libpng to write/flush to File::IOFile instances. 322// Adapter functions to libpng to write/flush to File::IOFile instances.
571static void WriteIOFile(png_structp png_ptr, png_bytep data, png_size_t length) { 323static void WriteIOFile(png_structp png_ptr, png_bytep data, png_size_t length) {
@@ -581,7 +333,7 @@ static void FlushIOFile(png_structp png_ptr) {
581} 333}
582#endif 334#endif
583 335
584void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) { 336void DumpTexture(const TexturingRegs::TextureConfig& texture_config, u8* data) {
585#ifndef HAVE_PNG 337#ifndef HAVE_PNG
586 return; 338 return;
587#else 339#else
@@ -642,12 +394,12 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) {
642 buf = new u8[row_stride * texture_config.height]; 394 buf = new u8[row_stride * texture_config.height];
643 for (unsigned y = 0; y < texture_config.height; ++y) { 395 for (unsigned y = 0; y < texture_config.height; ++y) {
644 for (unsigned x = 0; x < texture_config.width; ++x) { 396 for (unsigned x = 0; x < texture_config.width; ++x) {
645 TextureInfo info; 397 Pica::Texture::TextureInfo info;
646 info.width = texture_config.width; 398 info.width = texture_config.width;
647 info.height = texture_config.height; 399 info.height = texture_config.height;
648 info.stride = row_stride; 400 info.stride = row_stride;
649 info.format = g_state.regs.texture0_format; 401 info.format = g_state.regs.texturing.texture0_format;
650 Math::Vec4<u8> texture_color = LookupTexture(data, x, y, info); 402 Math::Vec4<u8> texture_color = Pica::Texture::LookupTexture(data, x, y, info);
651 buf[3 * x + y * row_stride] = texture_color.r(); 403 buf[3 * x + y * row_stride] = texture_color.r();
652 buf[3 * x + y * row_stride + 1] = texture_color.g(); 404 buf[3 * x + y * row_stride + 1] = texture_color.g();
653 buf[3 * x + y * row_stride + 2] = texture_color.b(); 405 buf[3 * x + y * row_stride + 2] = texture_color.b();
@@ -684,8 +436,10 @@ static std::string ReplacePattern(const std::string& input, const std::string& p
684 return ret; 436 return ret;
685} 437}
686 438
687static std::string GetTevStageConfigSourceString(const Pica::Regs::TevStageConfig::Source& source) { 439static std::string GetTevStageConfigSourceString(
688 using Source = Pica::Regs::TevStageConfig::Source; 440 const TexturingRegs::TevStageConfig::Source& source) {
441
442 using Source = TexturingRegs::TevStageConfig::Source;
689 static const std::map<Source, std::string> source_map = { 443 static const std::map<Source, std::string> source_map = {
690 {Source::PrimaryColor, "PrimaryColor"}, 444 {Source::PrimaryColor, "PrimaryColor"},
691 {Source::PrimaryFragmentColor, "PrimaryFragmentColor"}, 445 {Source::PrimaryFragmentColor, "PrimaryFragmentColor"},
@@ -707,9 +461,10 @@ static std::string GetTevStageConfigSourceString(const Pica::Regs::TevStageConfi
707} 461}
708 462
709static std::string GetTevStageConfigColorSourceString( 463static std::string GetTevStageConfigColorSourceString(
710 const Pica::Regs::TevStageConfig::Source& source, 464 const TexturingRegs::TevStageConfig::Source& source,
711 const Pica::Regs::TevStageConfig::ColorModifier modifier) { 465 const TexturingRegs::TevStageConfig::ColorModifier modifier) {
712 using ColorModifier = Pica::Regs::TevStageConfig::ColorModifier; 466
467 using ColorModifier = TexturingRegs::TevStageConfig::ColorModifier;
713 static const std::map<ColorModifier, std::string> color_modifier_map = { 468 static const std::map<ColorModifier, std::string> color_modifier_map = {
714 {ColorModifier::SourceColor, "%source.rgb"}, 469 {ColorModifier::SourceColor, "%source.rgb"},
715 {ColorModifier::OneMinusSourceColor, "(1.0 - %source.rgb)"}, 470 {ColorModifier::OneMinusSourceColor, "(1.0 - %source.rgb)"},
@@ -733,9 +488,10 @@ static std::string GetTevStageConfigColorSourceString(
733} 488}
734 489
735static std::string GetTevStageConfigAlphaSourceString( 490static std::string GetTevStageConfigAlphaSourceString(
736 const Pica::Regs::TevStageConfig::Source& source, 491 const TexturingRegs::TevStageConfig::Source& source,
737 const Pica::Regs::TevStageConfig::AlphaModifier modifier) { 492 const TexturingRegs::TevStageConfig::AlphaModifier modifier) {
738 using AlphaModifier = Pica::Regs::TevStageConfig::AlphaModifier; 493
494 using AlphaModifier = TexturingRegs::TevStageConfig::AlphaModifier;
739 static const std::map<AlphaModifier, std::string> alpha_modifier_map = { 495 static const std::map<AlphaModifier, std::string> alpha_modifier_map = {
740 {AlphaModifier::SourceAlpha, "%source.a"}, 496 {AlphaModifier::SourceAlpha, "%source.a"},
741 {AlphaModifier::OneMinusSourceAlpha, "(1.0 - %source.a)"}, 497 {AlphaModifier::OneMinusSourceAlpha, "(1.0 - %source.a)"},
@@ -757,8 +513,9 @@ static std::string GetTevStageConfigAlphaSourceString(
757} 513}
758 514
759static std::string GetTevStageConfigOperationString( 515static std::string GetTevStageConfigOperationString(
760 const Pica::Regs::TevStageConfig::Operation& operation) { 516 const TexturingRegs::TevStageConfig::Operation& operation) {
761 using Operation = Pica::Regs::TevStageConfig::Operation; 517
518 using Operation = TexturingRegs::TevStageConfig::Operation;
762 static const std::map<Operation, std::string> combiner_map = { 519 static const std::map<Operation, std::string> combiner_map = {
763 {Operation::Replace, "%source1"}, 520 {Operation::Replace, "%source1"},
764 {Operation::Modulate, "(%source1 * %source2)"}, 521 {Operation::Modulate, "(%source1 * %source2)"},
@@ -778,7 +535,7 @@ static std::string GetTevStageConfigOperationString(
778 return op_it->second; 535 return op_it->second;
779} 536}
780 537
781std::string GetTevStageConfigColorCombinerString(const Pica::Regs::TevStageConfig& tev_stage) { 538std::string GetTevStageConfigColorCombinerString(const TexturingRegs::TevStageConfig& tev_stage) {
782 auto op_str = GetTevStageConfigOperationString(tev_stage.color_op); 539 auto op_str = GetTevStageConfigOperationString(tev_stage.color_op);
783 op_str = ReplacePattern( 540 op_str = ReplacePattern(
784 op_str, "%source1", 541 op_str, "%source1",
@@ -791,7 +548,7 @@ std::string GetTevStageConfigColorCombinerString(const Pica::Regs::TevStageConfi
791 GetTevStageConfigColorSourceString(tev_stage.color_source3, tev_stage.color_modifier3)); 548 GetTevStageConfigColorSourceString(tev_stage.color_source3, tev_stage.color_modifier3));
792} 549}
793 550
794std::string GetTevStageConfigAlphaCombinerString(const Pica::Regs::TevStageConfig& tev_stage) { 551std::string GetTevStageConfigAlphaCombinerString(const TexturingRegs::TevStageConfig& tev_stage) {
795 auto op_str = GetTevStageConfigOperationString(tev_stage.alpha_op); 552 auto op_str = GetTevStageConfigOperationString(tev_stage.alpha_op);
796 op_str = ReplacePattern( 553 op_str = ReplacePattern(
797 op_str, "%source1", 554 op_str, "%source1",
@@ -804,7 +561,7 @@ std::string GetTevStageConfigAlphaCombinerString(const Pica::Regs::TevStageConfi
804 GetTevStageConfigAlphaSourceString(tev_stage.alpha_source3, tev_stage.alpha_modifier3)); 561 GetTevStageConfigAlphaSourceString(tev_stage.alpha_source3, tev_stage.alpha_modifier3));
805} 562}
806 563
807void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig, 6>& stages) { 564void DumpTevStageConfig(const std::array<TexturingRegs::TevStageConfig, 6>& stages) {
808 std::string stage_info = "Tev setup:\n"; 565 std::string stage_info = "Tev setup:\n";
809 for (size_t index = 0; index < stages.size(); ++index) { 566 for (size_t index = 0; index < stages.size(); ++index) {
810 const auto& tev_stage = stages[index]; 567 const auto& tev_stage = stages[index];
diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h
index 46ea8d9c7..c1f29c527 100644
--- a/src/video_core/debug_utils/debug_utils.h
+++ b/src/video_core/debug_utils/debug_utils.h
@@ -17,7 +17,9 @@
17#include <vector> 17#include <vector>
18#include "common/common_types.h" 18#include "common/common_types.h"
19#include "common/vector_math.h" 19#include "common/vector_math.h"
20#include "video_core/pica.h" 20#include "video_core/regs_rasterizer.h"
21#include "video_core/regs_shader.h"
22#include "video_core/regs_texturing.h"
21 23
22namespace CiTrace { 24namespace CiTrace {
23class Recorder; 25class Recorder;
@@ -85,7 +87,7 @@ public:
85 * @param data Optional data pointer (if unused, this is a nullptr) 87 * @param data Optional data pointer (if unused, this is a nullptr)
86 * @note This function will perform nothing unless it is overridden in the child class. 88 * @note This function will perform nothing unless it is overridden in the child class.
87 */ 89 */
88 virtual void OnPicaBreakPointHit(Event, void*) {} 90 virtual void OnPicaBreakPointHit(Event event, void* data) {}
89 91
90 /** 92 /**
91 * Action to perform when emulation is resumed from a breakpoint. 93 * Action to perform when emulation is resumed from a breakpoint.
@@ -182,9 +184,9 @@ namespace DebugUtils {
182#define PICA_DUMP_TEXTURES 0 184#define PICA_DUMP_TEXTURES 0
183#define PICA_LOG_TEV 0 185#define PICA_LOG_TEV 0
184 186
185void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, 187void DumpShader(const std::string& filename, const ShaderRegs& config,
186 const Shader::ShaderSetup& setup, 188 const Shader::ShaderSetup& setup,
187 const Regs::VSOutputAttributes* output_attributes); 189 const RasterizerRegs::VSOutputAttributes* output_attributes);
188 190
189// Utility class to log Pica commands. 191// Utility class to log Pica commands.
190struct PicaTrace { 192struct PicaTrace {
@@ -205,38 +207,13 @@ inline bool IsPicaTracing() {
205void OnPicaRegWrite(PicaTrace::Write write); 207void OnPicaRegWrite(PicaTrace::Write write);
206std::unique_ptr<PicaTrace> FinishPicaTracing(); 208std::unique_ptr<PicaTrace> FinishPicaTracing();
207 209
208struct TextureInfo { 210void DumpTexture(const TexturingRegs::TextureConfig& texture_config, u8* data);
209 PAddr physical_address;
210 int width;
211 int height;
212 int stride;
213 Pica::Regs::TextureFormat format;
214 211
215 static TextureInfo FromPicaRegister(const Pica::Regs::TextureConfig& config, 212std::string GetTevStageConfigColorCombinerString(const TexturingRegs::TevStageConfig& tev_stage);
216 const Pica::Regs::TextureFormat& format); 213std::string GetTevStageConfigAlphaCombinerString(const TexturingRegs::TevStageConfig& tev_stage);
217};
218
219/**
220 * Lookup texel located at the given coordinates and return an RGBA vector of its color.
221 * @param source Source pointer to read data from
222 * @param s,t Texture coordinates to read from
223 * @param info TextureInfo object describing the texture setup
224 * @param disable_alpha This is used for debug widgets which use this method to display textures
225 * without providing a good way to visualize alpha by themselves. If true, this will return 255 for
226 * the alpha component, and either drop the information entirely or store it in an "unused" color
227 * channel.
228 * @todo Eventually we should get rid of the disable_alpha parameter.
229 */
230const Math::Vec4<u8> LookupTexture(const u8* source, int s, int t, const TextureInfo& info,
231 bool disable_alpha = false);
232
233void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data);
234
235std::string GetTevStageConfigColorCombinerString(const Pica::Regs::TevStageConfig& tev_stage);
236std::string GetTevStageConfigAlphaCombinerString(const Pica::Regs::TevStageConfig& tev_stage);
237 214
238/// Dumps the Tev stage config to log at trace level 215/// Dumps the Tev stage config to log at trace level
239void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig, 6>& stages); 216void DumpTevStageConfig(const std::array<TexturingRegs::TevStageConfig, 6>& stages);
240 217
241/** 218/**
242 * Used in the vertex loader to merge access records. TODO: Investigate if actually useful. 219 * Used in the vertex loader to merge access records. TODO: Investigate if actually useful.
diff --git a/src/video_core/pica.cpp b/src/video_core/pica.cpp
index ce2bd455e..b95148a6a 100644
--- a/src/video_core/pica.cpp
+++ b/src/video_core/pica.cpp
@@ -3,503 +3,20 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cstring> 5#include <cstring>
6#include <iterator>
7#include <unordered_map>
8#include <utility>
9#include "video_core/pica.h" 6#include "video_core/pica.h"
10#include "video_core/pica_state.h" 7#include "video_core/pica_state.h"
11#include "video_core/primitive_assembly.h" 8#include "video_core/regs_pipeline.h"
12#include "video_core/shader/shader.h"
13 9
14namespace Pica { 10namespace Pica {
15 11
16State g_state; 12State g_state;
17 13
18static const std::pair<u16, const char*> register_names[] = {
19 {0x010, "GPUREG_FINALIZE"},
20
21 {0x040, "GPUREG_FACECULLING_CONFIG"},
22 {0x041, "GPUREG_VIEWPORT_WIDTH"},
23 {0x042, "GPUREG_VIEWPORT_INVW"},
24 {0x043, "GPUREG_VIEWPORT_HEIGHT"},
25 {0x044, "GPUREG_VIEWPORT_INVH"},
26
27 {0x047, "GPUREG_FRAGOP_CLIP"},
28 {0x048, "GPUREG_FRAGOP_CLIP_DATA0"},
29 {0x049, "GPUREG_FRAGOP_CLIP_DATA1"},
30 {0x04A, "GPUREG_FRAGOP_CLIP_DATA2"},
31 {0x04B, "GPUREG_FRAGOP_CLIP_DATA3"},
32
33 {0x04D, "GPUREG_DEPTHMAP_SCALE"},
34 {0x04E, "GPUREG_DEPTHMAP_OFFSET"},
35 {0x04F, "GPUREG_SH_OUTMAP_TOTAL"},
36 {0x050, "GPUREG_SH_OUTMAP_O0"},
37 {0x051, "GPUREG_SH_OUTMAP_O1"},
38 {0x052, "GPUREG_SH_OUTMAP_O2"},
39 {0x053, "GPUREG_SH_OUTMAP_O3"},
40 {0x054, "GPUREG_SH_OUTMAP_O4"},
41 {0x055, "GPUREG_SH_OUTMAP_O5"},
42 {0x056, "GPUREG_SH_OUTMAP_O6"},
43
44 {0x061, "GPUREG_EARLYDEPTH_FUNC"},
45 {0x062, "GPUREG_EARLYDEPTH_TEST1"},
46 {0x063, "GPUREG_EARLYDEPTH_CLEAR"},
47 {0x064, "GPUREG_SH_OUTATTR_MODE"},
48 {0x065, "GPUREG_SCISSORTEST_MODE"},
49 {0x066, "GPUREG_SCISSORTEST_POS"},
50 {0x067, "GPUREG_SCISSORTEST_DIM"},
51 {0x068, "GPUREG_VIEWPORT_XY"},
52
53 {0x06A, "GPUREG_EARLYDEPTH_DATA"},
54
55 {0x06D, "GPUREG_DEPTHMAP_ENABLE"},
56 {0x06E, "GPUREG_RENDERBUF_DIM"},
57 {0x06F, "GPUREG_SH_OUTATTR_CLOCK"},
58
59 {0x080, "GPUREG_TEXUNIT_CONFIG"},
60 {0x081, "GPUREG_TEXUNIT0_BORDER_COLOR"},
61 {0x082, "GPUREG_TEXUNIT0_DIM"},
62 {0x083, "GPUREG_TEXUNIT0_PARAM"},
63 {0x084, "GPUREG_TEXUNIT0_LOD"},
64 {0x085, "GPUREG_TEXUNIT0_ADDR1"},
65 {0x086, "GPUREG_TEXUNIT0_ADDR2"},
66 {0x087, "GPUREG_TEXUNIT0_ADDR3"},
67 {0x088, "GPUREG_TEXUNIT0_ADDR4"},
68 {0x089, "GPUREG_TEXUNIT0_ADDR5"},
69 {0x08A, "GPUREG_TEXUNIT0_ADDR6"},
70 {0x08B, "GPUREG_TEXUNIT0_SHADOW"},
71
72 {0x08E, "GPUREG_TEXUNIT0_TYPE"},
73 {0x08F, "GPUREG_LIGHTING_ENABLE0"},
74
75 {0x091, "GPUREG_TEXUNIT1_BORDER_COLOR"},
76 {0x092, "GPUREG_TEXUNIT1_DIM"},
77 {0x093, "GPUREG_TEXUNIT1_PARAM"},
78 {0x094, "GPUREG_TEXUNIT1_LOD"},
79 {0x095, "GPUREG_TEXUNIT1_ADDR"},
80 {0x096, "GPUREG_TEXUNIT1_TYPE"},
81
82 {0x099, "GPUREG_TEXUNIT2_BORDER_COLOR"},
83 {0x09A, "GPUREG_TEXUNIT2_DIM"},
84 {0x09B, "GPUREG_TEXUNIT2_PARAM"},
85 {0x09C, "GPUREG_TEXUNIT2_LOD"},
86 {0x09D, "GPUREG_TEXUNIT2_ADDR"},
87 {0x09E, "GPUREG_TEXUNIT2_TYPE"},
88
89 {0x0A8, "GPUREG_TEXUNIT3_PROCTEX0"},
90 {0x0A9, "GPUREG_TEXUNIT3_PROCTEX1"},
91 {0x0AA, "GPUREG_TEXUNIT3_PROCTEX2"},
92 {0x0AB, "GPUREG_TEXUNIT3_PROCTEX3"},
93 {0x0AC, "GPUREG_TEXUNIT3_PROCTEX4"},
94 {0x0AD, "GPUREG_TEXUNIT3_PROCTEX5"},
95
96 {0x0AF, "GPUREG_PROCTEX_LUT"},
97 {0x0B0, "GPUREG_PROCTEX_LUT_DATA0"},
98 {0x0B1, "GPUREG_PROCTEX_LUT_DATA1"},
99 {0x0B2, "GPUREG_PROCTEX_LUT_DATA2"},
100 {0x0B3, "GPUREG_PROCTEX_LUT_DATA3"},
101 {0x0B4, "GPUREG_PROCTEX_LUT_DATA4"},
102 {0x0B5, "GPUREG_PROCTEX_LUT_DATA5"},
103 {0x0B6, "GPUREG_PROCTEX_LUT_DATA6"},
104 {0x0B7, "GPUREG_PROCTEX_LUT_DATA7"},
105
106 {0x0C0, "GPUREG_TEXENV0_SOURCE"},
107 {0x0C1, "GPUREG_TEXENV0_OPERAND"},
108 {0x0C2, "GPUREG_TEXENV0_COMBINER"},
109 {0x0C3, "GPUREG_TEXENV0_COLOR"},
110 {0x0C4, "GPUREG_TEXENV0_SCALE"},
111
112 {0x0C8, "GPUREG_TEXENV1_SOURCE"},
113 {0x0C9, "GPUREG_TEXENV1_OPERAND"},
114 {0x0CA, "GPUREG_TEXENV1_COMBINER"},
115 {0x0CB, "GPUREG_TEXENV1_COLOR"},
116 {0x0CC, "GPUREG_TEXENV1_SCALE"},
117
118 {0x0D0, "GPUREG_TEXENV2_SOURCE"},
119 {0x0D1, "GPUREG_TEXENV2_OPERAND"},
120 {0x0D2, "GPUREG_TEXENV2_COMBINER"},
121 {0x0D3, "GPUREG_TEXENV2_COLOR"},
122 {0x0D4, "GPUREG_TEXENV2_SCALE"},
123
124 {0x0D8, "GPUREG_TEXENV3_SOURCE"},
125 {0x0D9, "GPUREG_TEXENV3_OPERAND"},
126 {0x0DA, "GPUREG_TEXENV3_COMBINER"},
127 {0x0DB, "GPUREG_TEXENV3_COLOR"},
128 {0x0DC, "GPUREG_TEXENV3_SCALE"},
129
130 {0x0E0, "GPUREG_TEXENV_UPDATE_BUFFER"},
131 {0x0E1, "GPUREG_FOG_COLOR"},
132
133 {0x0E4, "GPUREG_GAS_ATTENUATION"},
134 {0x0E5, "GPUREG_GAS_ACCMAX"},
135 {0x0E6, "GPUREG_FOG_LUT_INDEX"},
136
137 {0x0E8, "GPUREG_FOG_LUT_DATA0"},
138 {0x0E9, "GPUREG_FOG_LUT_DATA1"},
139 {0x0EA, "GPUREG_FOG_LUT_DATA2"},
140 {0x0EB, "GPUREG_FOG_LUT_DATA3"},
141 {0x0EC, "GPUREG_FOG_LUT_DATA4"},
142 {0x0ED, "GPUREG_FOG_LUT_DATA5"},
143 {0x0EE, "GPUREG_FOG_LUT_DATA6"},
144 {0x0EF, "GPUREG_FOG_LUT_DATA7"},
145 {0x0F0, "GPUREG_TEXENV4_SOURCE"},
146 {0x0F1, "GPUREG_TEXENV4_OPERAND"},
147 {0x0F2, "GPUREG_TEXENV4_COMBINER"},
148 {0x0F3, "GPUREG_TEXENV4_COLOR"},
149 {0x0F4, "GPUREG_TEXENV4_SCALE"},
150
151 {0x0F8, "GPUREG_TEXENV5_SOURCE"},
152 {0x0F9, "GPUREG_TEXENV5_OPERAND"},
153 {0x0FA, "GPUREG_TEXENV5_COMBINER"},
154 {0x0FB, "GPUREG_TEXENV5_COLOR"},
155 {0x0FC, "GPUREG_TEXENV5_SCALE"},
156 {0x0FD, "GPUREG_TEXENV_BUFFER_COLOR"},
157
158 {0x100, "GPUREG_COLOR_OPERATION"},
159 {0x101, "GPUREG_BLEND_FUNC"},
160 {0x102, "GPUREG_LOGIC_OP"},
161 {0x103, "GPUREG_BLEND_COLOR"},
162 {0x104, "GPUREG_FRAGOP_ALPHA_TEST"},
163 {0x105, "GPUREG_STENCIL_TEST"},
164 {0x106, "GPUREG_STENCIL_OP"},
165 {0x107, "GPUREG_DEPTH_COLOR_MASK"},
166
167 {0x110, "GPUREG_FRAMEBUFFER_INVALIDATE"},
168 {0x111, "GPUREG_FRAMEBUFFER_FLUSH"},
169 {0x112, "GPUREG_COLORBUFFER_READ"},
170 {0x113, "GPUREG_COLORBUFFER_WRITE"},
171 {0x114, "GPUREG_DEPTHBUFFER_READ"},
172 {0x115, "GPUREG_DEPTHBUFFER_WRITE"},
173 {0x116, "GPUREG_DEPTHBUFFER_FORMAT"},
174 {0x117, "GPUREG_COLORBUFFER_FORMAT"},
175 {0x118, "GPUREG_EARLYDEPTH_TEST2"},
176
177 {0x11B, "GPUREG_FRAMEBUFFER_BLOCK32"},
178 {0x11C, "GPUREG_DEPTHBUFFER_LOC"},
179 {0x11D, "GPUREG_COLORBUFFER_LOC"},
180 {0x11E, "GPUREG_FRAMEBUFFER_DIM"},
181
182 {0x120, "GPUREG_GAS_LIGHT_XY"},
183 {0x121, "GPUREG_GAS_LIGHT_Z"},
184 {0x122, "GPUREG_GAS_LIGHT_Z_COLOR"},
185 {0x123, "GPUREG_GAS_LUT_INDEX"},
186 {0x124, "GPUREG_GAS_LUT_DATA"},
187
188 {0x126, "GPUREG_GAS_DELTAZ_DEPTH"},
189
190 {0x130, "GPUREG_FRAGOP_SHADOW"},
191
192 {0x140, "GPUREG_LIGHT0_SPECULAR0"},
193 {0x141, "GPUREG_LIGHT0_SPECULAR1"},
194 {0x142, "GPUREG_LIGHT0_DIFFUSE"},
195 {0x143, "GPUREG_LIGHT0_AMBIENT"},
196 {0x144, "GPUREG_LIGHT0_XY"},
197 {0x145, "GPUREG_LIGHT0_Z"},
198 {0x146, "GPUREG_LIGHT0_SPOTDIR_XY"},
199 {0x147, "GPUREG_LIGHT0_SPOTDIR_Z"},
200
201 {0x149, "GPUREG_LIGHT0_CONFIG"},
202 {0x14A, "GPUREG_LIGHT0_ATTENUATION_BIAS"},
203 {0x14B, "GPUREG_LIGHT0_ATTENUATION_SCALE"},
204
205 {0x150, "GPUREG_LIGHT1_SPECULAR0"},
206 {0x151, "GPUREG_LIGHT1_SPECULAR1"},
207 {0x152, "GPUREG_LIGHT1_DIFFUSE"},
208 {0x153, "GPUREG_LIGHT1_AMBIENT"},
209 {0x154, "GPUREG_LIGHT1_XY"},
210 {0x155, "GPUREG_LIGHT1_Z"},
211 {0x156, "GPUREG_LIGHT1_SPOTDIR_XY"},
212 {0x157, "GPUREG_LIGHT1_SPOTDIR_Z"},
213
214 {0x159, "GPUREG_LIGHT1_CONFIG"},
215 {0x15A, "GPUREG_LIGHT1_ATTENUATION_BIAS"},
216 {0x15B, "GPUREG_LIGHT1_ATTENUATION_SCALE"},
217
218 {0x160, "GPUREG_LIGHT2_SPECULAR0"},
219 {0x161, "GPUREG_LIGHT2_SPECULAR1"},
220 {0x162, "GPUREG_LIGHT2_DIFFUSE"},
221 {0x163, "GPUREG_LIGHT2_AMBIENT"},
222 {0x164, "GPUREG_LIGHT2_XY"},
223 {0x165, "GPUREG_LIGHT2_Z"},
224 {0x166, "GPUREG_LIGHT2_SPOTDIR_XY"},
225 {0x167, "GPUREG_LIGHT2_SPOTDIR_Z"},
226
227 {0x169, "GPUREG_LIGHT2_CONFIG"},
228 {0x16A, "GPUREG_LIGHT2_ATTENUATION_BIAS"},
229 {0x16B, "GPUREG_LIGHT2_ATTENUATION_SCALE"},
230
231 {0x170, "GPUREG_LIGHT3_SPECULAR0"},
232 {0x171, "GPUREG_LIGHT3_SPECULAR1"},
233 {0x172, "GPUREG_LIGHT3_DIFFUSE"},
234 {0x173, "GPUREG_LIGHT3_AMBIENT"},
235 {0x174, "GPUREG_LIGHT3_XY"},
236 {0x175, "GPUREG_LIGHT3_Z"},
237 {0x176, "GPUREG_LIGHT3_SPOTDIR_XY"},
238 {0x177, "GPUREG_LIGHT3_SPOTDIR_Z"},
239
240 {0x179, "GPUREG_LIGHT3_CONFIG"},
241 {0x17A, "GPUREG_LIGHT3_ATTENUATION_BIAS"},
242 {0x17B, "GPUREG_LIGHT3_ATTENUATION_SCALE"},
243
244 {0x180, "GPUREG_LIGHT4_SPECULAR0"},
245 {0x181, "GPUREG_LIGHT4_SPECULAR1"},
246 {0x182, "GPUREG_LIGHT4_DIFFUSE"},
247 {0x183, "GPUREG_LIGHT4_AMBIENT"},
248 {0x184, "GPUREG_LIGHT4_XY"},
249 {0x185, "GPUREG_LIGHT4_Z"},
250 {0x186, "GPUREG_LIGHT4_SPOTDIR_XY"},
251 {0x187, "GPUREG_LIGHT4_SPOTDIR_Z"},
252
253 {0x189, "GPUREG_LIGHT4_CONFIG"},
254 {0x18A, "GPUREG_LIGHT4_ATTENUATION_BIAS"},
255 {0x18B, "GPUREG_LIGHT4_ATTENUATION_SCALE"},
256
257 {0x190, "GPUREG_LIGHT5_SPECULAR0"},
258 {0x191, "GPUREG_LIGHT5_SPECULAR1"},
259 {0x192, "GPUREG_LIGHT5_DIFFUSE"},
260 {0x193, "GPUREG_LIGHT5_AMBIENT"},
261 {0x194, "GPUREG_LIGHT5_XY"},
262 {0x195, "GPUREG_LIGHT5_Z"},
263 {0x196, "GPUREG_LIGHT5_SPOTDIR_XY"},
264 {0x197, "GPUREG_LIGHT5_SPOTDIR_Z"},
265
266 {0x199, "GPUREG_LIGHT5_CONFIG"},
267 {0x19A, "GPUREG_LIGHT5_ATTENUATION_BIAS"},
268 {0x19B, "GPUREG_LIGHT5_ATTENUATION_SCALE"},
269
270 {0x1A0, "GPUREG_LIGHT6_SPECULAR0"},
271 {0x1A1, "GPUREG_LIGHT6_SPECULAR1"},
272 {0x1A2, "GPUREG_LIGHT6_DIFFUSE"},
273 {0x1A3, "GPUREG_LIGHT6_AMBIENT"},
274 {0x1A4, "GPUREG_LIGHT6_XY"},
275 {0x1A5, "GPUREG_LIGHT6_Z"},
276 {0x1A6, "GPUREG_LIGHT6_SPOTDIR_XY"},
277 {0x1A7, "GPUREG_LIGHT6_SPOTDIR_Z"},
278
279 {0x1A9, "GPUREG_LIGHT6_CONFIG"},
280 {0x1AA, "GPUREG_LIGHT6_ATTENUATION_BIAS"},
281 {0x1AB, "GPUREG_LIGHT6_ATTENUATION_SCALE"},
282
283 {0x1B0, "GPUREG_LIGHT7_SPECULAR0"},
284 {0x1B1, "GPUREG_LIGHT7_SPECULAR1"},
285 {0x1B2, "GPUREG_LIGHT7_DIFFUSE"},
286 {0x1B3, "GPUREG_LIGHT7_AMBIENT"},
287 {0x1B4, "GPUREG_LIGHT7_XY"},
288 {0x1B5, "GPUREG_LIGHT7_Z"},
289 {0x1B6, "GPUREG_LIGHT7_SPOTDIR_XY"},
290 {0x1B7, "GPUREG_LIGHT7_SPOTDIR_Z"},
291
292 {0x1B9, "GPUREG_LIGHT7_CONFIG"},
293 {0x1BA, "GPUREG_LIGHT7_ATTENUATION_BIAS"},
294 {0x1BB, "GPUREG_LIGHT7_ATTENUATION_SCALE"},
295
296 {0x1C0, "GPUREG_LIGHTING_AMBIENT"},
297
298 {0x1C2, "GPUREG_LIGHTING_NUM_LIGHTS"},
299 {0x1C3, "GPUREG_LIGHTING_CONFIG0"},
300 {0x1C4, "GPUREG_LIGHTING_CONFIG1"},
301 {0x1C5, "GPUREG_LIGHTING_LUT_INDEX"},
302 {0x1C6, "GPUREG_LIGHTING_ENABLE1"},
303
304 {0x1C8, "GPUREG_LIGHTING_LUT_DATA0"},
305 {0x1C9, "GPUREG_LIGHTING_LUT_DATA1"},
306 {0x1CA, "GPUREG_LIGHTING_LUT_DATA2"},
307 {0x1CB, "GPUREG_LIGHTING_LUT_DATA3"},
308 {0x1CC, "GPUREG_LIGHTING_LUT_DATA4"},
309 {0x1CD, "GPUREG_LIGHTING_LUT_DATA5"},
310 {0x1CE, "GPUREG_LIGHTING_LUT_DATA6"},
311 {0x1CF, "GPUREG_LIGHTING_LUT_DATA7"},
312 {0x1D0, "GPUREG_LIGHTING_LUTINPUT_ABS"},
313 {0x1D1, "GPUREG_LIGHTING_LUTINPUT_SELECT"},
314 {0x1D2, "GPUREG_LIGHTING_LUTINPUT_SCALE"},
315
316 {0x1D9, "GPUREG_LIGHTING_LIGHT_PERMUTATION"},
317
318 {0x200, "GPUREG_ATTRIBBUFFERS_LOC"},
319 {0x201, "GPUREG_ATTRIBBUFFERS_FORMAT_LOW"},
320 {0x202, "GPUREG_ATTRIBBUFFERS_FORMAT_HIGH"},
321 {0x203, "GPUREG_ATTRIBBUFFER0_OFFSET"},
322 {0x204, "GPUREG_ATTRIBBUFFER0_CONFIG1"},
323 {0x205, "GPUREG_ATTRIBBUFFER0_CONFIG2"},
324 {0x206, "GPUREG_ATTRIBBUFFER1_OFFSET"},
325 {0x207, "GPUREG_ATTRIBBUFFER1_CONFIG1"},
326 {0x208, "GPUREG_ATTRIBBUFFER1_CONFIG2"},
327 {0x209, "GPUREG_ATTRIBBUFFER2_OFFSET"},
328 {0x20A, "GPUREG_ATTRIBBUFFER2_CONFIG1"},
329 {0x20B, "GPUREG_ATTRIBBUFFER2_CONFIG2"},
330 {0x20C, "GPUREG_ATTRIBBUFFER3_OFFSET"},
331 {0x20D, "GPUREG_ATTRIBBUFFER3_CONFIG1"},
332 {0x20E, "GPUREG_ATTRIBBUFFER3_CONFIG2"},
333 {0x20F, "GPUREG_ATTRIBBUFFER4_OFFSET"},
334 {0x210, "GPUREG_ATTRIBBUFFER4_CONFIG1"},
335 {0x211, "GPUREG_ATTRIBBUFFER4_CONFIG2"},
336 {0x212, "GPUREG_ATTRIBBUFFER5_OFFSET"},
337 {0x213, "GPUREG_ATTRIBBUFFER5_CONFIG1"},
338 {0x214, "GPUREG_ATTRIBBUFFER5_CONFIG2"},
339 {0x215, "GPUREG_ATTRIBBUFFER6_OFFSET"},
340 {0x216, "GPUREG_ATTRIBBUFFER6_CONFIG1"},
341 {0x217, "GPUREG_ATTRIBBUFFER6_CONFIG2"},
342 {0x218, "GPUREG_ATTRIBBUFFER7_OFFSET"},
343 {0x219, "GPUREG_ATTRIBBUFFER7_CONFIG1"},
344 {0x21A, "GPUREG_ATTRIBBUFFER7_CONFIG2"},
345 {0x21B, "GPUREG_ATTRIBBUFFER8_OFFSET"},
346 {0x21C, "GPUREG_ATTRIBBUFFER8_CONFIG1"},
347 {0x21D, "GPUREG_ATTRIBBUFFER8_CONFIG2"},
348 {0x21E, "GPUREG_ATTRIBBUFFER9_OFFSET"},
349 {0x21F, "GPUREG_ATTRIBBUFFER9_CONFIG1"},
350 {0x220, "GPUREG_ATTRIBBUFFER9_CONFIG2"},
351 {0x221, "GPUREG_ATTRIBBUFFER10_OFFSET"},
352 {0x222, "GPUREG_ATTRIBBUFFER10_CONFIG1"},
353 {0x223, "GPUREG_ATTRIBBUFFER10_CONFIG2"},
354 {0x224, "GPUREG_ATTRIBBUFFER11_OFFSET"},
355 {0x225, "GPUREG_ATTRIBBUFFER11_CONFIG1"},
356 {0x226, "GPUREG_ATTRIBBUFFER11_CONFIG2"},
357 {0x227, "GPUREG_INDEXBUFFER_CONFIG"},
358 {0x228, "GPUREG_NUMVERTICES"},
359 {0x229, "GPUREG_GEOSTAGE_CONFIG"},
360 {0x22A, "GPUREG_VERTEX_OFFSET"},
361
362 {0x22D, "GPUREG_POST_VERTEX_CACHE_NUM"},
363 {0x22E, "GPUREG_DRAWARRAYS"},
364 {0x22F, "GPUREG_DRAWELEMENTS"},
365
366 {0x231, "GPUREG_VTX_FUNC"},
367 {0x232, "GPUREG_FIXEDATTRIB_INDEX"},
368 {0x233, "GPUREG_FIXEDATTRIB_DATA0"},
369 {0x234, "GPUREG_FIXEDATTRIB_DATA1"},
370 {0x235, "GPUREG_FIXEDATTRIB_DATA2"},
371
372 {0x238, "GPUREG_CMDBUF_SIZE0"},
373 {0x239, "GPUREG_CMDBUF_SIZE1"},
374 {0x23A, "GPUREG_CMDBUF_ADDR0"},
375 {0x23B, "GPUREG_CMDBUF_ADDR1"},
376 {0x23C, "GPUREG_CMDBUF_JUMP0"},
377 {0x23D, "GPUREG_CMDBUF_JUMP1"},
378
379 {0x242, "GPUREG_VSH_NUM_ATTR"},
380
381 {0x244, "GPUREG_VSH_COM_MODE"},
382 {0x245, "GPUREG_START_DRAW_FUNC0"},
383
384 {0x24A, "GPUREG_VSH_OUTMAP_TOTAL1"},
385
386 {0x251, "GPUREG_VSH_OUTMAP_TOTAL2"},
387 {0x252, "GPUREG_GSH_MISC0"},
388 {0x253, "GPUREG_GEOSTAGE_CONFIG2"},
389 {0x254, "GPUREG_GSH_MISC1"},
390
391 {0x25E, "GPUREG_PRIMITIVE_CONFIG"},
392 {0x25F, "GPUREG_RESTART_PRIMITIVE"},
393
394 {0x280, "GPUREG_GSH_BOOLUNIFORM"},
395 {0x281, "GPUREG_GSH_INTUNIFORM_I0"},
396 {0x282, "GPUREG_GSH_INTUNIFORM_I1"},
397 {0x283, "GPUREG_GSH_INTUNIFORM_I2"},
398 {0x284, "GPUREG_GSH_INTUNIFORM_I3"},
399
400 {0x289, "GPUREG_GSH_INPUTBUFFER_CONFIG"},
401 {0x28A, "GPUREG_GSH_ENTRYPOINT"},
402 {0x28B, "GPUREG_GSH_ATTRIBUTES_PERMUTATION_LOW"},
403 {0x28C, "GPUREG_GSH_ATTRIBUTES_PERMUTATION_HIGH"},
404 {0x28D, "GPUREG_GSH_OUTMAP_MASK"},
405
406 {0x28F, "GPUREG_GSH_CODETRANSFER_END"},
407 {0x290, "GPUREG_GSH_FLOATUNIFORM_INDEX"},
408 {0x291, "GPUREG_GSH_FLOATUNIFORM_DATA0"},
409 {0x292, "GPUREG_GSH_FLOATUNIFORM_DATA1"},
410 {0x293, "GPUREG_GSH_FLOATUNIFORM_DATA2"},
411 {0x294, "GPUREG_GSH_FLOATUNIFORM_DATA3"},
412 {0x295, "GPUREG_GSH_FLOATUNIFORM_DATA4"},
413 {0x296, "GPUREG_GSH_FLOATUNIFORM_DATA5"},
414 {0x297, "GPUREG_GSH_FLOATUNIFORM_DATA6"},
415 {0x298, "GPUREG_GSH_FLOATUNIFORM_DATA7"},
416
417 {0x29B, "GPUREG_GSH_CODETRANSFER_INDEX"},
418 {0x29C, "GPUREG_GSH_CODETRANSFER_DATA0"},
419 {0x29D, "GPUREG_GSH_CODETRANSFER_DATA1"},
420 {0x29E, "GPUREG_GSH_CODETRANSFER_DATA2"},
421 {0x29F, "GPUREG_GSH_CODETRANSFER_DATA3"},
422 {0x2A0, "GPUREG_GSH_CODETRANSFER_DATA4"},
423 {0x2A1, "GPUREG_GSH_CODETRANSFER_DATA5"},
424 {0x2A2, "GPUREG_GSH_CODETRANSFER_DATA6"},
425 {0x2A3, "GPUREG_GSH_CODETRANSFER_DATA7"},
426
427 {0x2A5, "GPUREG_GSH_OPDESCS_INDEX"},
428 {0x2A6, "GPUREG_GSH_OPDESCS_DATA0"},
429 {0x2A7, "GPUREG_GSH_OPDESCS_DATA1"},
430 {0x2A8, "GPUREG_GSH_OPDESCS_DATA2"},
431 {0x2A9, "GPUREG_GSH_OPDESCS_DATA3"},
432 {0x2AA, "GPUREG_GSH_OPDESCS_DATA4"},
433 {0x2AB, "GPUREG_GSH_OPDESCS_DATA5"},
434 {0x2AC, "GPUREG_GSH_OPDESCS_DATA6"},
435 {0x2AD, "GPUREG_GSH_OPDESCS_DATA7"},
436
437 {0x2B0, "GPUREG_VSH_BOOLUNIFORM"},
438 {0x2B1, "GPUREG_VSH_INTUNIFORM_I0"},
439 {0x2B2, "GPUREG_VSH_INTUNIFORM_I1"},
440 {0x2B3, "GPUREG_VSH_INTUNIFORM_I2"},
441 {0x2B4, "GPUREG_VSH_INTUNIFORM_I3"},
442
443 {0x2B9, "GPUREG_VSH_INPUTBUFFER_CONFIG"},
444 {0x2BA, "GPUREG_VSH_ENTRYPOINT"},
445 {0x2BB, "GPUREG_VSH_ATTRIBUTES_PERMUTATION_LOW"},
446 {0x2BC, "GPUREG_VSH_ATTRIBUTES_PERMUTATION_HIGH"},
447 {0x2BD, "GPUREG_VSH_OUTMAP_MASK"},
448
449 {0x2BF, "GPUREG_VSH_CODETRANSFER_END"},
450 {0x2C0, "GPUREG_VSH_FLOATUNIFORM_INDEX"},
451 {0x2C1, "GPUREG_VSH_FLOATUNIFORM_DATA0"},
452 {0x2C2, "GPUREG_VSH_FLOATUNIFORM_DATA1"},
453 {0x2C3, "GPUREG_VSH_FLOATUNIFORM_DATA2"},
454 {0x2C4, "GPUREG_VSH_FLOATUNIFORM_DATA3"},
455 {0x2C5, "GPUREG_VSH_FLOATUNIFORM_DATA4"},
456 {0x2C6, "GPUREG_VSH_FLOATUNIFORM_DATA5"},
457 {0x2C7, "GPUREG_VSH_FLOATUNIFORM_DATA6"},
458 {0x2C8, "GPUREG_VSH_FLOATUNIFORM_DATA7"},
459
460 {0x2CB, "GPUREG_VSH_CODETRANSFER_INDEX"},
461 {0x2CC, "GPUREG_VSH_CODETRANSFER_DATA0"},
462 {0x2CD, "GPUREG_VSH_CODETRANSFER_DATA1"},
463 {0x2CE, "GPUREG_VSH_CODETRANSFER_DATA2"},
464 {0x2CF, "GPUREG_VSH_CODETRANSFER_DATA3"},
465 {0x2D0, "GPUREG_VSH_CODETRANSFER_DATA4"},
466 {0x2D1, "GPUREG_VSH_CODETRANSFER_DATA5"},
467 {0x2D2, "GPUREG_VSH_CODETRANSFER_DATA6"},
468 {0x2D3, "GPUREG_VSH_CODETRANSFER_DATA7"},
469
470 {0x2D5, "GPUREG_VSH_OPDESCS_INDEX"},
471 {0x2D6, "GPUREG_VSH_OPDESCS_DATA0"},
472 {0x2D7, "GPUREG_VSH_OPDESCS_DATA1"},
473 {0x2D8, "GPUREG_VSH_OPDESCS_DATA2"},
474 {0x2D9, "GPUREG_VSH_OPDESCS_DATA3"},
475 {0x2DA, "GPUREG_VSH_OPDESCS_DATA4"},
476 {0x2DB, "GPUREG_VSH_OPDESCS_DATA5"},
477 {0x2DC, "GPUREG_VSH_OPDESCS_DATA6"},
478 {0x2DD, "GPUREG_VSH_OPDESCS_DATA7"},
479};
480
481std::string Regs::GetCommandName(int index) {
482 static std::unordered_map<u32, const char*> map;
483
484 if (map.empty()) {
485 map.insert(std::begin(register_names), std::end(register_names));
486 }
487
488 // Return empty string if no match is found
489 auto it = map.find(index);
490 if (it != map.end()) {
491 return it->second;
492 } else {
493 return std::string();
494 }
495}
496
497void Init() { 14void Init() {
498 g_state.Reset(); 15 g_state.Reset();
499} 16}
500 17
501void Shutdown() { 18void Shutdown() {
502 Shader::ClearCache(); 19 Shader::Shutdown();
503} 20}
504 21
505template <typename T> 22template <typename T>
@@ -513,6 +30,6 @@ void State::Reset() {
513 Zero(gs); 30 Zero(gs);
514 Zero(cmd_list); 31 Zero(cmd_list);
515 Zero(immediate); 32 Zero(immediate);
516 primitive_assembler.Reconfigure(Regs::TriangleTopology::List); 33 primitive_assembler.Reconfigure(PipelineRegs::TriangleTopology::List);
517} 34}
518} 35}
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index b2db609ec..dc8aa6670 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -4,1412 +4,9 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array> 7#include "video_core/regs_texturing.h"
8#include <cstddef>
9#include <string>
10
11#ifndef _MSC_VER
12#include <type_traits> // for std::enable_if
13#endif
14
15#include "common/assert.h"
16#include "common/bit_field.h"
17#include "common/common_funcs.h"
18#include "common/common_types.h"
19#include "common/logging/log.h"
20#include "common/vector_math.h"
21
22namespace Pica { 8namespace Pica {
23 9
24// Returns index corresponding to the Regs member labeled by field_name
25// TODO: Due to Visual studio bug 209229, offsetof does not return constant expressions
26// when used with array elements (e.g. PICA_REG_INDEX(vs_uniform_setup.set_value[1])).
27// For details cf.
28// https://connect.microsoft.com/VisualStudio/feedback/details/209229/offsetof-does-not-produce-a-constant-expression-for-array-members
29// Hopefully, this will be fixed sometime in the future.
30// For lack of better alternatives, we currently hardcode the offsets when constant
31// expressions are needed via PICA_REG_INDEX_WORKAROUND (on sane compilers, static_asserts
32// will then make sure the offsets indeed match the automatically calculated ones).
33#define PICA_REG_INDEX(field_name) (offsetof(Pica::Regs, field_name) / sizeof(u32))
34#if defined(_MSC_VER)
35#define PICA_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) (backup_workaround_index)
36#else
37// NOTE: Yeah, hacking in a static_assert here just to workaround the lacking MSVC compiler
38// really is this annoying. This macro just forwards its first argument to PICA_REG_INDEX
39// and then performs a (no-op) cast to size_t iff the second argument matches the expected
40// field offset. Otherwise, the compiler will fail to compile this code.
41#define PICA_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) \
42 ((typename std::enable_if<backup_workaround_index == PICA_REG_INDEX(field_name), \
43 size_t>::type)PICA_REG_INDEX(field_name))
44#endif // _MSC_VER
45
46struct Regs {
47
48 INSERT_PADDING_WORDS(0x10);
49
50 u32 trigger_irq;
51
52 INSERT_PADDING_WORDS(0x2f);
53
54 enum class CullMode : u32 {
55 // Select which polygons are considered to be "frontfacing".
56 KeepAll = 0,
57 KeepClockWise = 1,
58 KeepCounterClockWise = 2,
59 // TODO: What does the third value imply?
60 };
61
62 union {
63 BitField<0, 2, CullMode> cull_mode;
64 };
65
66 BitField<0, 24, u32> viewport_size_x;
67
68 INSERT_PADDING_WORDS(0x1);
69
70 BitField<0, 24, u32> viewport_size_y;
71
72 INSERT_PADDING_WORDS(0x9);
73
74 BitField<0, 24, u32> viewport_depth_range; // float24
75 BitField<0, 24, u32> viewport_depth_near_plane; // float24
76
77 BitField<0, 3, u32> vs_output_total;
78
79 union VSOutputAttributes {
80 // Maps components of output vertex attributes to semantics
81 enum Semantic : u32 {
82 POSITION_X = 0,
83 POSITION_Y = 1,
84 POSITION_Z = 2,
85 POSITION_W = 3,
86
87 QUATERNION_X = 4,
88 QUATERNION_Y = 5,
89 QUATERNION_Z = 6,
90 QUATERNION_W = 7,
91
92 COLOR_R = 8,
93 COLOR_G = 9,
94 COLOR_B = 10,
95 COLOR_A = 11,
96
97 TEXCOORD0_U = 12,
98 TEXCOORD0_V = 13,
99 TEXCOORD1_U = 14,
100 TEXCOORD1_V = 15,
101
102 // TODO: Not verified
103 VIEW_X = 18,
104 VIEW_Y = 19,
105 VIEW_Z = 20,
106
107 TEXCOORD2_U = 22,
108 TEXCOORD2_V = 23,
109
110 INVALID = 31,
111 };
112
113 BitField<0, 5, Semantic> map_x;
114 BitField<8, 5, Semantic> map_y;
115 BitField<16, 5, Semantic> map_z;
116 BitField<24, 5, Semantic> map_w;
117 } vs_output_attributes[7];
118
119 INSERT_PADDING_WORDS(0xe);
120
121 enum class ScissorMode : u32 {
122 Disabled = 0,
123 Exclude = 1, // Exclude pixels inside the scissor box
124
125 Include = 3 // Exclude pixels outside the scissor box
126 };
127
128 struct {
129 BitField<0, 2, ScissorMode> mode;
130
131 union {
132 BitField<0, 16, u32> x1;
133 BitField<16, 16, u32> y1;
134 };
135
136 union {
137 BitField<0, 16, u32> x2;
138 BitField<16, 16, u32> y2;
139 };
140 } scissor_test;
141
142 union {
143 BitField<0, 10, s32> x;
144 BitField<16, 10, s32> y;
145 } viewport_corner;
146
147 INSERT_PADDING_WORDS(0x1);
148
149 // TODO: early depth
150 INSERT_PADDING_WORDS(0x1);
151
152 INSERT_PADDING_WORDS(0x2);
153
154 enum DepthBuffering : u32 {
155 WBuffering = 0,
156 ZBuffering = 1,
157 };
158 BitField<0, 1, DepthBuffering> depthmap_enable;
159
160 INSERT_PADDING_WORDS(0x12);
161
162 struct TextureConfig {
163 enum TextureType : u32 {
164 Texture2D = 0,
165 TextureCube = 1,
166 Shadow2D = 2,
167 Projection2D = 3,
168 ShadowCube = 4,
169 Disabled = 5,
170 };
171
172 enum WrapMode : u32 {
173 ClampToEdge = 0,
174 ClampToBorder = 1,
175 Repeat = 2,
176 MirroredRepeat = 3,
177 };
178
179 enum TextureFilter : u32 {
180 Nearest = 0,
181 Linear = 1,
182 };
183
184 union {
185 u32 raw;
186 BitField<0, 8, u32> r;
187 BitField<8, 8, u32> g;
188 BitField<16, 8, u32> b;
189 BitField<24, 8, u32> a;
190 } border_color;
191
192 union {
193 BitField<0, 16, u32> height;
194 BitField<16, 16, u32> width;
195 };
196
197 union {
198 BitField<1, 1, TextureFilter> mag_filter;
199 BitField<2, 1, TextureFilter> min_filter;
200 BitField<8, 2, WrapMode> wrap_t;
201 BitField<12, 2, WrapMode> wrap_s;
202 BitField<28, 2, TextureType>
203 type; ///< @note Only valid for texture 0 according to 3DBrew.
204 };
205
206 INSERT_PADDING_WORDS(0x1);
207
208 u32 address;
209
210 u32 GetPhysicalAddress() const {
211 return DecodeAddressRegister(address);
212 }
213
214 // texture1 and texture2 store the texture format directly after the address
215 // whereas texture0 inserts some additional flags inbetween.
216 // Hence, we store the format separately so that all other parameters can be described
217 // in a single structure.
218 };
219
220 enum class TextureFormat : u32 {
221 RGBA8 = 0,
222 RGB8 = 1,
223 RGB5A1 = 2,
224 RGB565 = 3,
225 RGBA4 = 4,
226 IA8 = 5,
227 RG8 = 6, ///< @note Also called HILO8 in 3DBrew.
228 I8 = 7,
229 A8 = 8,
230 IA4 = 9,
231 I4 = 10,
232 A4 = 11,
233 ETC1 = 12, // compressed
234 ETC1A4 = 13, // compressed
235 };
236
237 enum class LogicOp : u32 {
238 Clear = 0,
239 And = 1,
240 AndReverse = 2,
241 Copy = 3,
242 Set = 4,
243 CopyInverted = 5,
244 NoOp = 6,
245 Invert = 7,
246 Nand = 8,
247 Or = 9,
248 Nor = 10,
249 Xor = 11,
250 Equiv = 12,
251 AndInverted = 13,
252 OrReverse = 14,
253 OrInverted = 15,
254 };
255
256 static unsigned NibblesPerPixel(TextureFormat format) {
257 switch (format) {
258 case TextureFormat::RGBA8:
259 return 8;
260
261 case TextureFormat::RGB8:
262 return 6;
263
264 case TextureFormat::RGB5A1:
265 case TextureFormat::RGB565:
266 case TextureFormat::RGBA4:
267 case TextureFormat::IA8:
268 case TextureFormat::RG8:
269 return 4;
270
271 case TextureFormat::I4:
272 case TextureFormat::A4:
273 return 1;
274
275 case TextureFormat::I8:
276 case TextureFormat::A8:
277 case TextureFormat::IA4:
278 default: // placeholder for yet unknown formats
279 return 2;
280 }
281 }
282
283 union {
284 BitField<0, 1, u32> texture0_enable;
285 BitField<1, 1, u32> texture1_enable;
286 BitField<2, 1, u32> texture2_enable;
287 };
288 TextureConfig texture0;
289 INSERT_PADDING_WORDS(0x8);
290 BitField<0, 4, TextureFormat> texture0_format;
291 BitField<0, 1, u32> fragment_lighting_enable;
292 INSERT_PADDING_WORDS(0x1);
293 TextureConfig texture1;
294 BitField<0, 4, TextureFormat> texture1_format;
295 INSERT_PADDING_WORDS(0x2);
296 TextureConfig texture2;
297 BitField<0, 4, TextureFormat> texture2_format;
298 INSERT_PADDING_WORDS(0x21);
299
300 struct FullTextureConfig {
301 const bool enabled;
302 const TextureConfig config;
303 const TextureFormat format;
304 };
305 const std::array<FullTextureConfig, 3> GetTextures() const {
306 return {{
307 {texture0_enable.ToBool(), texture0, texture0_format},
308 {texture1_enable.ToBool(), texture1, texture1_format},
309 {texture2_enable.ToBool(), texture2, texture2_format},
310 }};
311 }
312
313 // 0xc0-0xff: Texture Combiner (akin to glTexEnv)
314 struct TevStageConfig {
315 enum class Source : u32 {
316 PrimaryColor = 0x0,
317 PrimaryFragmentColor = 0x1,
318 SecondaryFragmentColor = 0x2,
319
320 Texture0 = 0x3,
321 Texture1 = 0x4,
322 Texture2 = 0x5,
323 Texture3 = 0x6,
324
325 PreviousBuffer = 0xd,
326 Constant = 0xe,
327 Previous = 0xf,
328 };
329
330 enum class ColorModifier : u32 {
331 SourceColor = 0x0,
332 OneMinusSourceColor = 0x1,
333 SourceAlpha = 0x2,
334 OneMinusSourceAlpha = 0x3,
335 SourceRed = 0x4,
336 OneMinusSourceRed = 0x5,
337
338 SourceGreen = 0x8,
339 OneMinusSourceGreen = 0x9,
340
341 SourceBlue = 0xc,
342 OneMinusSourceBlue = 0xd,
343 };
344
345 enum class AlphaModifier : u32 {
346 SourceAlpha = 0x0,
347 OneMinusSourceAlpha = 0x1,
348 SourceRed = 0x2,
349 OneMinusSourceRed = 0x3,
350 SourceGreen = 0x4,
351 OneMinusSourceGreen = 0x5,
352 SourceBlue = 0x6,
353 OneMinusSourceBlue = 0x7,
354 };
355
356 enum class Operation : u32 {
357 Replace = 0,
358 Modulate = 1,
359 Add = 2,
360 AddSigned = 3,
361 Lerp = 4,
362 Subtract = 5,
363 Dot3_RGB = 6,
364
365 MultiplyThenAdd = 8,
366 AddThenMultiply = 9,
367 };
368
369 union {
370 u32 sources_raw;
371 BitField<0, 4, Source> color_source1;
372 BitField<4, 4, Source> color_source2;
373 BitField<8, 4, Source> color_source3;
374 BitField<16, 4, Source> alpha_source1;
375 BitField<20, 4, Source> alpha_source2;
376 BitField<24, 4, Source> alpha_source3;
377 };
378
379 union {
380 u32 modifiers_raw;
381 BitField<0, 4, ColorModifier> color_modifier1;
382 BitField<4, 4, ColorModifier> color_modifier2;
383 BitField<8, 4, ColorModifier> color_modifier3;
384 BitField<12, 3, AlphaModifier> alpha_modifier1;
385 BitField<16, 3, AlphaModifier> alpha_modifier2;
386 BitField<20, 3, AlphaModifier> alpha_modifier3;
387 };
388
389 union {
390 u32 ops_raw;
391 BitField<0, 4, Operation> color_op;
392 BitField<16, 4, Operation> alpha_op;
393 };
394
395 union {
396 u32 const_color;
397 BitField<0, 8, u32> const_r;
398 BitField<8, 8, u32> const_g;
399 BitField<16, 8, u32> const_b;
400 BitField<24, 8, u32> const_a;
401 };
402
403 union {
404 u32 scales_raw;
405 BitField<0, 2, u32> color_scale;
406 BitField<16, 2, u32> alpha_scale;
407 };
408
409 inline unsigned GetColorMultiplier() const {
410 return (color_scale < 3) ? (1 << color_scale) : 1;
411 }
412
413 inline unsigned GetAlphaMultiplier() const {
414 return (alpha_scale < 3) ? (1 << alpha_scale) : 1;
415 }
416 };
417
418 TevStageConfig tev_stage0;
419 INSERT_PADDING_WORDS(0x3);
420 TevStageConfig tev_stage1;
421 INSERT_PADDING_WORDS(0x3);
422 TevStageConfig tev_stage2;
423 INSERT_PADDING_WORDS(0x3);
424 TevStageConfig tev_stage3;
425 INSERT_PADDING_WORDS(0x3);
426
427 enum class FogMode : u32 {
428 None = 0,
429 Fog = 5,
430 Gas = 7,
431 };
432
433 union {
434 BitField<0, 3, FogMode> fog_mode;
435 BitField<16, 1, u32> fog_flip;
436
437 union {
438 // Tev stages 0-3 write their output to the combiner buffer if the corresponding bit in
439 // these masks are set
440 BitField<8, 4, u32> update_mask_rgb;
441 BitField<12, 4, u32> update_mask_a;
442
443 bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const {
444 return (stage_index < 4) && (update_mask_rgb & (1 << stage_index));
445 }
446
447 bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const {
448 return (stage_index < 4) && (update_mask_a & (1 << stage_index));
449 }
450 } tev_combiner_buffer_input;
451 };
452
453 union {
454 u32 raw;
455 BitField<0, 8, u32> r;
456 BitField<8, 8, u32> g;
457 BitField<16, 8, u32> b;
458 } fog_color;
459
460 INSERT_PADDING_WORDS(0x4);
461
462 BitField<0, 16, u32> fog_lut_offset;
463
464 INSERT_PADDING_WORDS(0x1);
465
466 u32 fog_lut_data[8];
467
468 TevStageConfig tev_stage4;
469 INSERT_PADDING_WORDS(0x3);
470 TevStageConfig tev_stage5;
471
472 union {
473 u32 raw;
474 BitField<0, 8, u32> r;
475 BitField<8, 8, u32> g;
476 BitField<16, 8, u32> b;
477 BitField<24, 8, u32> a;
478 } tev_combiner_buffer_color;
479
480 INSERT_PADDING_WORDS(0x2);
481
482 const std::array<Regs::TevStageConfig, 6> GetTevStages() const {
483 return {{tev_stage0, tev_stage1, tev_stage2, tev_stage3, tev_stage4, tev_stage5}};
484 };
485
486 enum class BlendEquation : u32 {
487 Add = 0,
488 Subtract = 1,
489 ReverseSubtract = 2,
490 Min = 3,
491 Max = 4,
492 };
493
494 enum class BlendFactor : u32 {
495 Zero = 0,
496 One = 1,
497 SourceColor = 2,
498 OneMinusSourceColor = 3,
499 DestColor = 4,
500 OneMinusDestColor = 5,
501 SourceAlpha = 6,
502 OneMinusSourceAlpha = 7,
503 DestAlpha = 8,
504 OneMinusDestAlpha = 9,
505 ConstantColor = 10,
506 OneMinusConstantColor = 11,
507 ConstantAlpha = 12,
508 OneMinusConstantAlpha = 13,
509 SourceAlphaSaturate = 14,
510 };
511
512 enum class CompareFunc : u32 {
513 Never = 0,
514 Always = 1,
515 Equal = 2,
516 NotEqual = 3,
517 LessThan = 4,
518 LessThanOrEqual = 5,
519 GreaterThan = 6,
520 GreaterThanOrEqual = 7,
521 };
522
523 enum class StencilAction : u32 {
524 Keep = 0,
525 Zero = 1,
526 Replace = 2,
527 Increment = 3,
528 Decrement = 4,
529 Invert = 5,
530 IncrementWrap = 6,
531 DecrementWrap = 7,
532 };
533
534 struct {
535 union {
536 // If false, logic blending is used
537 BitField<8, 1, u32> alphablend_enable;
538 };
539
540 union {
541 BitField<0, 8, BlendEquation> blend_equation_rgb;
542 BitField<8, 8, BlendEquation> blend_equation_a;
543
544 BitField<16, 4, BlendFactor> factor_source_rgb;
545 BitField<20, 4, BlendFactor> factor_dest_rgb;
546
547 BitField<24, 4, BlendFactor> factor_source_a;
548 BitField<28, 4, BlendFactor> factor_dest_a;
549 } alpha_blending;
550
551 union {
552 BitField<0, 4, LogicOp> logic_op;
553 };
554
555 union {
556 u32 raw;
557 BitField<0, 8, u32> r;
558 BitField<8, 8, u32> g;
559 BitField<16, 8, u32> b;
560 BitField<24, 8, u32> a;
561 } blend_const;
562
563 union {
564 BitField<0, 1, u32> enable;
565 BitField<4, 3, CompareFunc> func;
566 BitField<8, 8, u32> ref;
567 } alpha_test;
568
569 struct {
570 union {
571 // Raw value of this register
572 u32 raw_func;
573
574 // If true, enable stencil testing
575 BitField<0, 1, u32> enable;
576
577 // Comparison operation for stencil testing
578 BitField<4, 3, CompareFunc> func;
579
580 // Mask used to control writing to the stencil buffer
581 BitField<8, 8, u32> write_mask;
582
583 // Value to compare against for stencil testing
584 BitField<16, 8, u32> reference_value;
585
586 // Mask to apply on stencil test inputs
587 BitField<24, 8, u32> input_mask;
588 };
589
590 union {
591 // Raw value of this register
592 u32 raw_op;
593
594 // Action to perform when the stencil test fails
595 BitField<0, 3, StencilAction> action_stencil_fail;
596
597 // Action to perform when stencil testing passed but depth testing fails
598 BitField<4, 3, StencilAction> action_depth_fail;
599
600 // Action to perform when both stencil and depth testing pass
601 BitField<8, 3, StencilAction> action_depth_pass;
602 };
603 } stencil_test;
604
605 union {
606 BitField<0, 1, u32> depth_test_enable;
607 BitField<4, 3, CompareFunc> depth_test_func;
608 BitField<8, 1, u32> red_enable;
609 BitField<9, 1, u32> green_enable;
610 BitField<10, 1, u32> blue_enable;
611 BitField<11, 1, u32> alpha_enable;
612 BitField<12, 1, u32> depth_write_enable;
613 };
614
615 INSERT_PADDING_WORDS(0x8);
616 } output_merger;
617
618 // Components are laid out in reverse byte order, most significant bits first.
619 enum class ColorFormat : u32 {
620 RGBA8 = 0,
621 RGB8 = 1,
622 RGB5A1 = 2,
623 RGB565 = 3,
624 RGBA4 = 4,
625 };
626
627 enum class DepthFormat : u32 {
628 D16 = 0,
629 D24 = 2,
630 D24S8 = 3,
631 };
632
633 // Returns the number of bytes in the specified color format
634 static unsigned BytesPerColorPixel(ColorFormat format) {
635 switch (format) {
636 case ColorFormat::RGBA8:
637 return 4;
638 case ColorFormat::RGB8:
639 return 3;
640 case ColorFormat::RGB5A1:
641 case ColorFormat::RGB565:
642 case ColorFormat::RGBA4:
643 return 2;
644 default:
645 LOG_CRITICAL(HW_GPU, "Unknown color format %u", format);
646 UNIMPLEMENTED();
647 }
648 }
649
650 struct FramebufferConfig {
651 INSERT_PADDING_WORDS(0x3);
652
653 union {
654 BitField<0, 4, u32> allow_color_write; // 0 = disable, else enable
655 };
656
657 INSERT_PADDING_WORDS(0x1);
658
659 union {
660 BitField<0, 2, u32> allow_depth_stencil_write; // 0 = disable, else enable
661 };
662
663 DepthFormat depth_format; // TODO: Should be a BitField!
664 BitField<16, 3, ColorFormat> color_format;
665
666 INSERT_PADDING_WORDS(0x4);
667
668 u32 depth_buffer_address;
669 u32 color_buffer_address;
670
671 union {
672 // Apparently, the framebuffer width is stored as expected,
673 // while the height is stored as the actual height minus one.
674 // Hence, don't access these fields directly but use the accessors
675 // GetWidth() and GetHeight() instead.
676 BitField<0, 11, u32> width;
677 BitField<12, 10, u32> height;
678 };
679
680 INSERT_PADDING_WORDS(0x1);
681
682 inline u32 GetColorBufferPhysicalAddress() const {
683 return DecodeAddressRegister(color_buffer_address);
684 }
685 inline u32 GetDepthBufferPhysicalAddress() const {
686 return DecodeAddressRegister(depth_buffer_address);
687 }
688
689 inline u32 GetWidth() const {
690 return width;
691 }
692
693 inline u32 GetHeight() const {
694 return height + 1;
695 }
696 } framebuffer;
697
698 // Returns the number of bytes in the specified depth format
699 static u32 BytesPerDepthPixel(DepthFormat format) {
700 switch (format) {
701 case DepthFormat::D16:
702 return 2;
703 case DepthFormat::D24:
704 return 3;
705 case DepthFormat::D24S8:
706 return 4;
707 default:
708 LOG_CRITICAL(HW_GPU, "Unknown depth format %u", format);
709 UNIMPLEMENTED();
710 }
711 }
712
713 // Returns the number of bits per depth component of the specified depth format
714 static u32 DepthBitsPerPixel(DepthFormat format) {
715 switch (format) {
716 case DepthFormat::D16:
717 return 16;
718 case DepthFormat::D24:
719 case DepthFormat::D24S8:
720 return 24;
721 default:
722 LOG_CRITICAL(HW_GPU, "Unknown depth format %u", format);
723 UNIMPLEMENTED();
724 }
725 }
726
727 INSERT_PADDING_WORDS(0x20);
728
729 enum class LightingSampler {
730 Distribution0 = 0,
731 Distribution1 = 1,
732 Fresnel = 3,
733 ReflectBlue = 4,
734 ReflectGreen = 5,
735 ReflectRed = 6,
736 SpotlightAttenuation = 8,
737 DistanceAttenuation = 16,
738 };
739
740 /**
741 * Pica fragment lighting supports using different LUTs for each lighting component:
742 * Reflectance R, G, and B channels, distribution function for specular components 0 and 1,
743 * fresnel factor, and spotlight attenuation. Furthermore, which LUTs are used for each channel
744 * (or whether a channel is enabled at all) is specified by various pre-defined lighting
745 * configurations. With configurations that require more LUTs, more cycles are required on HW to
746 * perform lighting computations.
747 */
748 enum class LightingConfig {
749 Config0 = 0, ///< Reflect Red, Distribution 0, Spotlight
750 Config1 = 1, ///< Reflect Red, Fresnel, Spotlight
751 Config2 = 2, ///< Reflect Red, Distribution 0/1
752 Config3 = 3, ///< Distribution 0/1, Fresnel
753 Config4 = 4, ///< Reflect Red/Green/Blue, Distribution 0/1, Spotlight
754 Config5 = 5, ///< Reflect Red/Green/Blue, Distribution 0, Fresnel, Spotlight
755 Config6 = 6, ///< Reflect Red, Distribution 0/1, Fresnel, Spotlight
756 Config7 = 8, ///< Reflect Red/Green/Blue, Distribution 0/1, Fresnel, Spotlight
757 ///< NOTE: '8' is intentional, '7' does not appear to be a valid configuration
758 };
759
760 /// Selects which lighting components are affected by fresnel
761 enum class LightingFresnelSelector {
762 None = 0, ///< Fresnel is disabled
763 PrimaryAlpha = 1, ///< Primary (diffuse) lighting alpha is affected by fresnel
764 SecondaryAlpha = 2, ///< Secondary (specular) lighting alpha is affected by fresnel
765 Both =
766 PrimaryAlpha |
767 SecondaryAlpha, ///< Both primary and secondary lighting alphas are affected by fresnel
768 };
769
770 /// Factor used to scale the output of a lighting LUT
771 enum class LightingScale {
772 Scale1 = 0, ///< Scale is 1x
773 Scale2 = 1, ///< Scale is 2x
774 Scale4 = 2, ///< Scale is 4x
775 Scale8 = 3, ///< Scale is 8x
776 Scale1_4 = 6, ///< Scale is 0.25x
777 Scale1_2 = 7, ///< Scale is 0.5x
778 };
779
780 enum class LightingLutInput {
781 NH = 0, // Cosine of the angle between the normal and half-angle vectors
782 VH = 1, // Cosine of the angle between the view and half-angle vectors
783 NV = 2, // Cosine of the angle between the normal and the view vector
784 LN = 3, // Cosine of the angle between the light and the normal vectors
785 };
786
787 enum class LightingBumpMode : u32 {
788 None = 0,
789 NormalMap = 1,
790 TangentMap = 2,
791 };
792
793 union LightColor {
794 BitField<0, 10, u32> b;
795 BitField<10, 10, u32> g;
796 BitField<20, 10, u32> r;
797
798 Math::Vec3f ToVec3f() const {
799 // These fields are 10 bits wide, however 255 corresponds to 1.0f for each color
800 // component
801 return Math::MakeVec((f32)r / 255.f, (f32)g / 255.f, (f32)b / 255.f);
802 }
803 };
804
805 /// Returns true if the specified lighting sampler is supported by the current Pica lighting
806 /// configuration
807 static bool IsLightingSamplerSupported(LightingConfig config, LightingSampler sampler) {
808 switch (sampler) {
809 case LightingSampler::Distribution0:
810 return (config != LightingConfig::Config1);
811
812 case LightingSampler::Distribution1:
813 return (config != LightingConfig::Config0) && (config != LightingConfig::Config1) &&
814 (config != LightingConfig::Config5);
815
816 case LightingSampler::Fresnel:
817 return (config != LightingConfig::Config0) && (config != LightingConfig::Config2) &&
818 (config != LightingConfig::Config4);
819
820 case LightingSampler::ReflectRed:
821 return (config != LightingConfig::Config3);
822
823 case LightingSampler::ReflectGreen:
824 case LightingSampler::ReflectBlue:
825 return (config == LightingConfig::Config4) || (config == LightingConfig::Config5) ||
826 (config == LightingConfig::Config7);
827 default:
828 UNREACHABLE_MSG("Regs::IsLightingSamplerSupported: Reached "
829 "unreachable section, sampler should be one "
830 "of Distribution0, Distribution1, Fresnel, "
831 "ReflectRed, ReflectGreen or ReflectBlue, instead "
832 "got %i",
833 static_cast<int>(config));
834 }
835 }
836
837 struct {
838 struct LightSrc {
839 LightColor specular_0; // material.specular_0 * light.specular_0
840 LightColor specular_1; // material.specular_1 * light.specular_1
841 LightColor diffuse; // material.diffuse * light.diffuse
842 LightColor ambient; // material.ambient * light.ambient
843
844 // Encoded as 16-bit floating point
845 union {
846 BitField<0, 16, u32> x;
847 BitField<16, 16, u32> y;
848 };
849 union {
850 BitField<0, 16, u32> z;
851 };
852
853 INSERT_PADDING_WORDS(0x3);
854
855 union {
856 BitField<0, 1, u32> directional;
857 BitField<1, 1, u32> two_sided_diffuse; // When disabled, clamp dot-product to 0
858 } config;
859
860 BitField<0, 20, u32> dist_atten_bias;
861 BitField<0, 20, u32> dist_atten_scale;
862
863 INSERT_PADDING_WORDS(0x4);
864 };
865 static_assert(sizeof(LightSrc) == 0x10 * sizeof(u32),
866 "LightSrc structure must be 0x10 words");
867
868 LightSrc light[8];
869 LightColor global_ambient; // Emission + (material.ambient * lighting.ambient)
870 INSERT_PADDING_WORDS(0x1);
871 BitField<0, 3, u32> num_lights; // Number of enabled lights - 1
872
873 union {
874 BitField<2, 2, LightingFresnelSelector> fresnel_selector;
875 BitField<4, 4, LightingConfig> config;
876 BitField<22, 2, u32> bump_selector; // 0: Texture 0, 1: Texture 1, 2: Texture 2
877 BitField<27, 1, u32> clamp_highlights;
878 BitField<28, 2, LightingBumpMode> bump_mode;
879 BitField<30, 1, u32> disable_bump_renorm;
880 } config0;
881
882 union {
883 BitField<16, 1, u32> disable_lut_d0;
884 BitField<17, 1, u32> disable_lut_d1;
885 BitField<19, 1, u32> disable_lut_fr;
886 BitField<20, 1, u32> disable_lut_rr;
887 BitField<21, 1, u32> disable_lut_rg;
888 BitField<22, 1, u32> disable_lut_rb;
889
890 // Each bit specifies whether distance attenuation should be applied for the
891 // corresponding light
892
893 BitField<24, 1, u32> disable_dist_atten_light_0;
894 BitField<25, 1, u32> disable_dist_atten_light_1;
895 BitField<26, 1, u32> disable_dist_atten_light_2;
896 BitField<27, 1, u32> disable_dist_atten_light_3;
897 BitField<28, 1, u32> disable_dist_atten_light_4;
898 BitField<29, 1, u32> disable_dist_atten_light_5;
899 BitField<30, 1, u32> disable_dist_atten_light_6;
900 BitField<31, 1, u32> disable_dist_atten_light_7;
901 } config1;
902
903 bool IsDistAttenDisabled(unsigned index) const {
904 const unsigned disable[] = {
905 config1.disable_dist_atten_light_0, config1.disable_dist_atten_light_1,
906 config1.disable_dist_atten_light_2, config1.disable_dist_atten_light_3,
907 config1.disable_dist_atten_light_4, config1.disable_dist_atten_light_5,
908 config1.disable_dist_atten_light_6, config1.disable_dist_atten_light_7};
909 return disable[index] != 0;
910 }
911
912 union {
913 BitField<0, 8, u32> index; ///< Index at which to set data in the LUT
914 BitField<8, 5, u32> type; ///< Type of LUT for which to set data
915 } lut_config;
916
917 BitField<0, 1, u32> disable;
918 INSERT_PADDING_WORDS(0x1);
919
920 // When data is written to any of these registers, it gets written to the lookup table of
921 // the selected type at the selected index, specified above in the `lut_config` register.
922 // With each write, `lut_config.index` is incremented. It does not matter which of these
923 // registers is written to, the behavior will be the same.
924 u32 lut_data[8];
925
926 // These are used to specify if absolute (abs) value should be used for each LUT index. When
927 // abs mode is disabled, LUT indexes are in the range of (-1.0, 1.0). Otherwise, they are in
928 // the range of (0.0, 1.0).
929 union {
930 BitField<1, 1, u32> disable_d0;
931 BitField<5, 1, u32> disable_d1;
932 BitField<9, 1, u32> disable_sp;
933 BitField<13, 1, u32> disable_fr;
934 BitField<17, 1, u32> disable_rb;
935 BitField<21, 1, u32> disable_rg;
936 BitField<25, 1, u32> disable_rr;
937 } abs_lut_input;
938
939 union {
940 BitField<0, 3, LightingLutInput> d0;
941 BitField<4, 3, LightingLutInput> d1;
942 BitField<8, 3, LightingLutInput> sp;
943 BitField<12, 3, LightingLutInput> fr;
944 BitField<16, 3, LightingLutInput> rb;
945 BitField<20, 3, LightingLutInput> rg;
946 BitField<24, 3, LightingLutInput> rr;
947 } lut_input;
948
949 union {
950 BitField<0, 3, LightingScale> d0;
951 BitField<4, 3, LightingScale> d1;
952 BitField<8, 3, LightingScale> sp;
953 BitField<12, 3, LightingScale> fr;
954 BitField<16, 3, LightingScale> rb;
955 BitField<20, 3, LightingScale> rg;
956 BitField<24, 3, LightingScale> rr;
957
958 static float GetScale(LightingScale scale) {
959 switch (scale) {
960 case LightingScale::Scale1:
961 return 1.0f;
962 case LightingScale::Scale2:
963 return 2.0f;
964 case LightingScale::Scale4:
965 return 4.0f;
966 case LightingScale::Scale8:
967 return 8.0f;
968 case LightingScale::Scale1_4:
969 return 0.25f;
970 case LightingScale::Scale1_2:
971 return 0.5f;
972 }
973 return 0.0f;
974 }
975 } lut_scale;
976
977 INSERT_PADDING_WORDS(0x6);
978
979 union {
980 // There are 8 light enable "slots", corresponding to the total number of lights
981 // supported by Pica. For N enabled lights (specified by register 0x1c2, or 'src_num'
982 // above), the first N slots below will be set to integers within the range of 0-7,
983 // corresponding to the actual light that is enabled for each slot.
984
985 BitField<0, 3, u32> slot_0;
986 BitField<4, 3, u32> slot_1;
987 BitField<8, 3, u32> slot_2;
988 BitField<12, 3, u32> slot_3;
989 BitField<16, 3, u32> slot_4;
990 BitField<20, 3, u32> slot_5;
991 BitField<24, 3, u32> slot_6;
992 BitField<28, 3, u32> slot_7;
993
994 unsigned GetNum(unsigned index) const {
995 const unsigned enable_slots[] = {slot_0, slot_1, slot_2, slot_3,
996 slot_4, slot_5, slot_6, slot_7};
997 return enable_slots[index];
998 }
999 } light_enable;
1000 } lighting;
1001
1002 INSERT_PADDING_WORDS(0x26);
1003
1004 enum class VertexAttributeFormat : u64 {
1005 BYTE = 0,
1006 UBYTE = 1,
1007 SHORT = 2,
1008 FLOAT = 3,
1009 };
1010
1011 struct {
1012 BitField<0, 29, u32> base_address;
1013
1014 u32 GetPhysicalBaseAddress() const {
1015 return DecodeAddressRegister(base_address);
1016 }
1017
1018 // Descriptor for internal vertex attributes
1019 union {
1020 BitField<0, 2, VertexAttributeFormat> format0; // size of one element
1021 BitField<2, 2, u64> size0; // number of elements minus 1
1022 BitField<4, 2, VertexAttributeFormat> format1;
1023 BitField<6, 2, u64> size1;
1024 BitField<8, 2, VertexAttributeFormat> format2;
1025 BitField<10, 2, u64> size2;
1026 BitField<12, 2, VertexAttributeFormat> format3;
1027 BitField<14, 2, u64> size3;
1028 BitField<16, 2, VertexAttributeFormat> format4;
1029 BitField<18, 2, u64> size4;
1030 BitField<20, 2, VertexAttributeFormat> format5;
1031 BitField<22, 2, u64> size5;
1032 BitField<24, 2, VertexAttributeFormat> format6;
1033 BitField<26, 2, u64> size6;
1034 BitField<28, 2, VertexAttributeFormat> format7;
1035 BitField<30, 2, u64> size7;
1036 BitField<32, 2, VertexAttributeFormat> format8;
1037 BitField<34, 2, u64> size8;
1038 BitField<36, 2, VertexAttributeFormat> format9;
1039 BitField<38, 2, u64> size9;
1040 BitField<40, 2, VertexAttributeFormat> format10;
1041 BitField<42, 2, u64> size10;
1042 BitField<44, 2, VertexAttributeFormat> format11;
1043 BitField<46, 2, u64> size11;
1044
1045 BitField<48, 12, u64> attribute_mask;
1046
1047 // number of total attributes minus 1
1048 BitField<60, 4, u64> num_extra_attributes;
1049 };
1050
1051 inline VertexAttributeFormat GetFormat(int n) const {
1052 VertexAttributeFormat formats[] = {format0, format1, format2, format3,
1053 format4, format5, format6, format7,
1054 format8, format9, format10, format11};
1055 return formats[n];
1056 }
1057
1058 inline int GetNumElements(int n) const {
1059 u64 sizes[] = {size0, size1, size2, size3, size4, size5,
1060 size6, size7, size8, size9, size10, size11};
1061 return (int)sizes[n] + 1;
1062 }
1063
1064 inline int GetElementSizeInBytes(int n) const {
1065 return (GetFormat(n) == VertexAttributeFormat::FLOAT)
1066 ? 4
1067 : (GetFormat(n) == VertexAttributeFormat::SHORT) ? 2 : 1;
1068 }
1069
1070 inline int GetStride(int n) const {
1071 return GetNumElements(n) * GetElementSizeInBytes(n);
1072 }
1073
1074 inline bool IsDefaultAttribute(int id) const {
1075 return (id >= 12) || (attribute_mask & (1ULL << id)) != 0;
1076 }
1077
1078 inline int GetNumTotalAttributes() const {
1079 return (int)num_extra_attributes + 1;
1080 }
1081
1082 // Attribute loaders map the source vertex data to input attributes
1083 // This e.g. allows to load different attributes from different memory locations
1084 struct {
1085 // Source attribute data offset from the base address
1086 u32 data_offset;
1087
1088 union {
1089 BitField<0, 4, u64> comp0;
1090 BitField<4, 4, u64> comp1;
1091 BitField<8, 4, u64> comp2;
1092 BitField<12, 4, u64> comp3;
1093 BitField<16, 4, u64> comp4;
1094 BitField<20, 4, u64> comp5;
1095 BitField<24, 4, u64> comp6;
1096 BitField<28, 4, u64> comp7;
1097 BitField<32, 4, u64> comp8;
1098 BitField<36, 4, u64> comp9;
1099 BitField<40, 4, u64> comp10;
1100 BitField<44, 4, u64> comp11;
1101
1102 // bytes for a single vertex in this loader
1103 BitField<48, 8, u64> byte_count;
1104
1105 BitField<60, 4, u64> component_count;
1106 };
1107
1108 inline int GetComponent(int n) const {
1109 u64 components[] = {comp0, comp1, comp2, comp3, comp4, comp5,
1110 comp6, comp7, comp8, comp9, comp10, comp11};
1111 return (int)components[n];
1112 }
1113 } attribute_loaders[12];
1114 } vertex_attributes;
1115
1116 struct {
1117 enum IndexFormat : u32 {
1118 BYTE = 0,
1119 SHORT = 1,
1120 };
1121
1122 union {
1123 BitField<0, 31, u32> offset; // relative to base attribute address
1124 BitField<31, 1, IndexFormat> format;
1125 };
1126 } index_array;
1127
1128 // Number of vertices to render
1129 u32 num_vertices;
1130
1131 INSERT_PADDING_WORDS(0x1);
1132
1133 // The index of the first vertex to render
1134 u32 vertex_offset;
1135
1136 INSERT_PADDING_WORDS(0x3);
1137
1138 // These two trigger rendering of triangles
1139 u32 trigger_draw;
1140 u32 trigger_draw_indexed;
1141
1142 INSERT_PADDING_WORDS(0x2);
1143
1144 // These registers are used to setup the default "fall-back" vertex shader attributes
1145 struct {
1146 // Index of the current default attribute
1147 u32 index;
1148
1149 // Writing to these registers sets the "current" default attribute.
1150 u32 set_value[3];
1151 } vs_default_attributes_setup;
1152
1153 INSERT_PADDING_WORDS(0x2);
1154
1155 struct {
1156 // There are two channels that can be used to configure the next command buffer, which
1157 // can be then executed by writing to the "trigger" registers. There are two reasons why a
1158 // game might use this feature:
1159 // 1) With this, an arbitrary number of additional command buffers may be executed in
1160 // sequence without requiring any intervention of the CPU after the initial one is
1161 // kicked off.
1162 // 2) Games can configure these registers to provide a command list subroutine mechanism.
1163
1164 BitField<0, 20, u32> size[2]; ///< Size (in bytes / 8) of each channel's command buffer
1165 BitField<0, 28, u32> addr[2]; ///< Physical address / 8 of each channel's command buffer
1166 u32 trigger[2]; ///< Triggers execution of the channel's command buffer when written to
1167
1168 unsigned GetSize(unsigned index) const {
1169 ASSERT(index < 2);
1170 return 8 * size[index];
1171 }
1172
1173 PAddr GetPhysicalAddress(unsigned index) const {
1174 ASSERT(index < 2);
1175 return (PAddr)(8 * addr[index]);
1176 }
1177 } command_buffer;
1178
1179 INSERT_PADDING_WORDS(0x07);
1180
1181 enum class GPUMode : u32 {
1182 Drawing = 0,
1183 Configuring = 1,
1184 };
1185
1186 GPUMode gpu_mode;
1187
1188 INSERT_PADDING_WORDS(0x18);
1189
1190 enum class TriangleTopology : u32 {
1191 List = 0,
1192 Strip = 1,
1193 Fan = 2,
1194 Shader = 3, // Programmable setup unit implemented in a geometry shader
1195 };
1196
1197 BitField<8, 2, TriangleTopology> triangle_topology;
1198
1199 u32 restart_primitive;
1200
1201 INSERT_PADDING_WORDS(0x20);
1202
1203 struct ShaderConfig {
1204 BitField<0, 16, u32> bool_uniforms;
1205
1206 union {
1207 BitField<0, 8, u32> x;
1208 BitField<8, 8, u32> y;
1209 BitField<16, 8, u32> z;
1210 BitField<24, 8, u32> w;
1211 } int_uniforms[4];
1212
1213 INSERT_PADDING_WORDS(0x4);
1214
1215 union {
1216 // Number of input attributes to shader unit - 1
1217 BitField<0, 4, u32> num_input_attributes;
1218 };
1219
1220 // Offset to shader program entry point (in words)
1221 BitField<0, 16, u32> main_offset;
1222
1223 union {
1224 BitField<0, 4, u64> attribute0_register;
1225 BitField<4, 4, u64> attribute1_register;
1226 BitField<8, 4, u64> attribute2_register;
1227 BitField<12, 4, u64> attribute3_register;
1228 BitField<16, 4, u64> attribute4_register;
1229 BitField<20, 4, u64> attribute5_register;
1230 BitField<24, 4, u64> attribute6_register;
1231 BitField<28, 4, u64> attribute7_register;
1232 BitField<32, 4, u64> attribute8_register;
1233 BitField<36, 4, u64> attribute9_register;
1234 BitField<40, 4, u64> attribute10_register;
1235 BitField<44, 4, u64> attribute11_register;
1236 BitField<48, 4, u64> attribute12_register;
1237 BitField<52, 4, u64> attribute13_register;
1238 BitField<56, 4, u64> attribute14_register;
1239 BitField<60, 4, u64> attribute15_register;
1240
1241 int GetRegisterForAttribute(int attribute_index) const {
1242 u64 fields[] = {
1243 attribute0_register, attribute1_register, attribute2_register,
1244 attribute3_register, attribute4_register, attribute5_register,
1245 attribute6_register, attribute7_register, attribute8_register,
1246 attribute9_register, attribute10_register, attribute11_register,
1247 attribute12_register, attribute13_register, attribute14_register,
1248 attribute15_register,
1249 };
1250 return (int)fields[attribute_index];
1251 }
1252 } input_register_map;
1253
1254 BitField<0, 16, u32> output_mask;
1255
1256 // 0x28E, CODETRANSFER_END
1257 INSERT_PADDING_WORDS(0x2);
1258
1259 struct {
1260 enum Format : u32 {
1261 FLOAT24 = 0,
1262 FLOAT32 = 1,
1263 };
1264
1265 bool IsFloat32() const {
1266 return format == FLOAT32;
1267 }
1268
1269 union {
1270 // Index of the next uniform to write to
1271 // TODO: ctrulib uses 8 bits for this, however that seems to yield lots of invalid
1272 // indices
1273 // TODO: Maybe the uppermost index is for the geometry shader? Investigate!
1274 BitField<0, 7, u32> index;
1275
1276 BitField<31, 1, Format> format;
1277 };
1278
1279 // Writing to these registers sets the current uniform.
1280 u32 set_value[8];
1281
1282 } uniform_setup;
1283
1284 INSERT_PADDING_WORDS(0x2);
1285
1286 struct {
1287 // Offset of the next instruction to write code to.
1288 // Incremented with each instruction write.
1289 u32 offset;
1290
1291 // Writing to these registers sets the "current" word in the shader program.
1292 u32 set_word[8];
1293 } program;
1294
1295 INSERT_PADDING_WORDS(0x1);
1296
1297 // This register group is used to load an internal table of swizzling patterns,
1298 // which are indexed by each shader instruction to specify vector component swizzling.
1299 struct {
1300 // Offset of the next swizzle pattern to write code to.
1301 // Incremented with each instruction write.
1302 u32 offset;
1303
1304 // Writing to these registers sets the current swizzle pattern in the table.
1305 u32 set_word[8];
1306 } swizzle_patterns;
1307
1308 INSERT_PADDING_WORDS(0x2);
1309 };
1310
1311 ShaderConfig gs;
1312 ShaderConfig vs;
1313
1314 INSERT_PADDING_WORDS(0x20);
1315
1316 // Map register indices to names readable by humans
1317 // Used for debugging purposes, so performance is not an issue here
1318 static std::string GetCommandName(int index);
1319
1320 static constexpr size_t NumIds() {
1321 return sizeof(Regs) / sizeof(u32);
1322 }
1323
1324 const u32& operator[](int index) const {
1325 const u32* content = reinterpret_cast<const u32*>(this);
1326 return content[index];
1327 }
1328
1329 u32& operator[](int index) {
1330 u32* content = reinterpret_cast<u32*>(this);
1331 return content[index];
1332 }
1333
1334private:
1335 /*
1336 * Most physical addresses which Pica registers refer to are 8-byte aligned.
1337 * This function should be used to get the address from a raw register value.
1338 */
1339 static inline u32 DecodeAddressRegister(u32 register_value) {
1340 return register_value * 8;
1341 }
1342};
1343
1344// TODO: MSVC does not support using offsetof() on non-static data members even though this
1345// is technically allowed since C++11. This macro should be enabled once MSVC adds
1346// support for that.
1347#ifndef _MSC_VER
1348#define ASSERT_REG_POSITION(field_name, position) \
1349 static_assert(offsetof(Regs, field_name) == position * 4, \
1350 "Field " #field_name " has invalid position")
1351
1352ASSERT_REG_POSITION(trigger_irq, 0x10);
1353ASSERT_REG_POSITION(cull_mode, 0x40);
1354ASSERT_REG_POSITION(viewport_size_x, 0x41);
1355ASSERT_REG_POSITION(viewport_size_y, 0x43);
1356ASSERT_REG_POSITION(viewport_depth_range, 0x4d);
1357ASSERT_REG_POSITION(viewport_depth_near_plane, 0x4e);
1358ASSERT_REG_POSITION(vs_output_attributes[0], 0x50);
1359ASSERT_REG_POSITION(vs_output_attributes[1], 0x51);
1360ASSERT_REG_POSITION(scissor_test, 0x65);
1361ASSERT_REG_POSITION(viewport_corner, 0x68);
1362ASSERT_REG_POSITION(depthmap_enable, 0x6D);
1363ASSERT_REG_POSITION(texture0_enable, 0x80);
1364ASSERT_REG_POSITION(texture0, 0x81);
1365ASSERT_REG_POSITION(texture0_format, 0x8e);
1366ASSERT_REG_POSITION(fragment_lighting_enable, 0x8f);
1367ASSERT_REG_POSITION(texture1, 0x91);
1368ASSERT_REG_POSITION(texture1_format, 0x96);
1369ASSERT_REG_POSITION(texture2, 0x99);
1370ASSERT_REG_POSITION(texture2_format, 0x9e);
1371ASSERT_REG_POSITION(tev_stage0, 0xc0);
1372ASSERT_REG_POSITION(tev_stage1, 0xc8);
1373ASSERT_REG_POSITION(tev_stage2, 0xd0);
1374ASSERT_REG_POSITION(tev_stage3, 0xd8);
1375ASSERT_REG_POSITION(tev_combiner_buffer_input, 0xe0);
1376ASSERT_REG_POSITION(fog_mode, 0xe0);
1377ASSERT_REG_POSITION(fog_color, 0xe1);
1378ASSERT_REG_POSITION(fog_lut_offset, 0xe6);
1379ASSERT_REG_POSITION(fog_lut_data, 0xe8);
1380ASSERT_REG_POSITION(tev_stage4, 0xf0);
1381ASSERT_REG_POSITION(tev_stage5, 0xf8);
1382ASSERT_REG_POSITION(tev_combiner_buffer_color, 0xfd);
1383ASSERT_REG_POSITION(output_merger, 0x100);
1384ASSERT_REG_POSITION(framebuffer, 0x110);
1385ASSERT_REG_POSITION(lighting, 0x140);
1386ASSERT_REG_POSITION(vertex_attributes, 0x200);
1387ASSERT_REG_POSITION(index_array, 0x227);
1388ASSERT_REG_POSITION(num_vertices, 0x228);
1389ASSERT_REG_POSITION(vertex_offset, 0x22a);
1390ASSERT_REG_POSITION(trigger_draw, 0x22e);
1391ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f);
1392ASSERT_REG_POSITION(vs_default_attributes_setup, 0x232);
1393ASSERT_REG_POSITION(command_buffer, 0x238);
1394ASSERT_REG_POSITION(gpu_mode, 0x245);
1395ASSERT_REG_POSITION(triangle_topology, 0x25e);
1396ASSERT_REG_POSITION(restart_primitive, 0x25f);
1397ASSERT_REG_POSITION(gs, 0x280);
1398ASSERT_REG_POSITION(vs, 0x2b0);
1399
1400#undef ASSERT_REG_POSITION
1401#endif // !defined(_MSC_VER)
1402
1403static_assert(sizeof(Regs::ShaderConfig) == 0x30 * sizeof(u32),
1404 "ShaderConfig structure has incorrect size");
1405
1406// The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value
1407// anyway.
1408static_assert(sizeof(Regs) <= 0x300 * sizeof(u32),
1409 "Register set structure larger than it should be");
1410static_assert(sizeof(Regs) >= 0x300 * sizeof(u32),
1411 "Register set structure smaller than it should be");
1412
1413/// Initialize Pica state 10/// Initialize Pica state
1414void Init(); 11void Init();
1415 12
diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h
index e4f2e6d5d..af7536d11 100644
--- a/src/video_core/pica_state.h
+++ b/src/video_core/pica_state.h
@@ -7,8 +7,8 @@
7#include <array> 7#include <array>
8#include "common/bit_field.h" 8#include "common/bit_field.h"
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "video_core/pica.h"
11#include "video_core/primitive_assembly.h" 10#include "video_core/primitive_assembly.h"
11#include "video_core/regs.h"
12#include "video_core/shader/shader.h" 12#include "video_core/shader/shader.h"
13 13
14namespace Pica { 14namespace Pica {
@@ -23,7 +23,7 @@ struct State {
23 Shader::ShaderSetup vs; 23 Shader::ShaderSetup vs;
24 Shader::ShaderSetup gs; 24 Shader::ShaderSetup gs;
25 25
26 std::array<Math::Vec4<float24>, 16> vs_default_attributes; 26 Shader::AttributeBuffer input_default_attributes;
27 27
28 struct { 28 struct {
29 union LutEntry { 29 union LutEntry {
@@ -66,7 +66,7 @@ struct State {
66 /// Struct used to describe immediate mode rendering state 66 /// Struct used to describe immediate mode rendering state
67 struct ImmediateModeState { 67 struct ImmediateModeState {
68 // Used to buffer partial vertices for immediate-mode rendering. 68 // Used to buffer partial vertices for immediate-mode rendering.
69 Shader::InputVertex input_vertex; 69 Shader::AttributeBuffer input_vertex;
70 // Index of the next attribute to be loaded into `input_vertex`. 70 // Index of the next attribute to be loaded into `input_vertex`.
71 u32 current_attribute = 0; 71 u32 current_attribute = 0;
72 } immediate; 72 } immediate;
diff --git a/src/video_core/primitive_assembly.cpp b/src/video_core/primitive_assembly.cpp
index be7377290..acd2ac5e2 100644
--- a/src/video_core/primitive_assembly.cpp
+++ b/src/video_core/primitive_assembly.cpp
@@ -3,23 +3,23 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/logging/log.h" 5#include "common/logging/log.h"
6#include "video_core/pica.h"
7#include "video_core/primitive_assembly.h" 6#include "video_core/primitive_assembly.h"
7#include "video_core/regs_pipeline.h"
8#include "video_core/shader/shader.h" 8#include "video_core/shader/shader.h"
9 9
10namespace Pica { 10namespace Pica {
11 11
12template <typename VertexType> 12template <typename VertexType>
13PrimitiveAssembler<VertexType>::PrimitiveAssembler(Regs::TriangleTopology topology) 13PrimitiveAssembler<VertexType>::PrimitiveAssembler(PipelineRegs::TriangleTopology topology)
14 : topology(topology), buffer_index(0) {} 14 : topology(topology), buffer_index(0) {}
15 15
16template <typename VertexType> 16template <typename VertexType>
17void PrimitiveAssembler<VertexType>::SubmitVertex(VertexType& vtx, 17void PrimitiveAssembler<VertexType>::SubmitVertex(const VertexType& vtx,
18 TriangleHandler triangle_handler) { 18 TriangleHandler triangle_handler) {
19 switch (topology) { 19 switch (topology) {
20 // TODO: Figure out what's different with TriangleTopology::Shader. 20 // TODO: Figure out what's different with TriangleTopology::Shader.
21 case Regs::TriangleTopology::List: 21 case PipelineRegs::TriangleTopology::List:
22 case Regs::TriangleTopology::Shader: 22 case PipelineRegs::TriangleTopology::Shader:
23 if (buffer_index < 2) { 23 if (buffer_index < 2) {
24 buffer[buffer_index++] = vtx; 24 buffer[buffer_index++] = vtx;
25 } else { 25 } else {
@@ -29,8 +29,8 @@ void PrimitiveAssembler<VertexType>::SubmitVertex(VertexType& vtx,
29 } 29 }
30 break; 30 break;
31 31
32 case Regs::TriangleTopology::Strip: 32 case PipelineRegs::TriangleTopology::Strip:
33 case Regs::TriangleTopology::Fan: 33 case PipelineRegs::TriangleTopology::Fan:
34 if (strip_ready) 34 if (strip_ready)
35 triangle_handler(buffer[0], buffer[1], vtx); 35 triangle_handler(buffer[0], buffer[1], vtx);
36 36
@@ -38,9 +38,9 @@ void PrimitiveAssembler<VertexType>::SubmitVertex(VertexType& vtx,
38 38
39 strip_ready |= (buffer_index == 1); 39 strip_ready |= (buffer_index == 1);
40 40
41 if (topology == Regs::TriangleTopology::Strip) 41 if (topology == PipelineRegs::TriangleTopology::Strip)
42 buffer_index = !buffer_index; 42 buffer_index = !buffer_index;
43 else if (topology == Regs::TriangleTopology::Fan) 43 else if (topology == PipelineRegs::TriangleTopology::Fan)
44 buffer_index = 1; 44 buffer_index = 1;
45 break; 45 break;
46 46
@@ -57,7 +57,7 @@ void PrimitiveAssembler<VertexType>::Reset() {
57} 57}
58 58
59template <typename VertexType> 59template <typename VertexType>
60void PrimitiveAssembler<VertexType>::Reconfigure(Regs::TriangleTopology topology) { 60void PrimitiveAssembler<VertexType>::Reconfigure(PipelineRegs::TriangleTopology topology) {
61 Reset(); 61 Reset();
62 this->topology = topology; 62 this->topology = topology;
63} 63}
diff --git a/src/video_core/primitive_assembly.h b/src/video_core/primitive_assembly.h
index 0384d5984..e8eccdf27 100644
--- a/src/video_core/primitive_assembly.h
+++ b/src/video_core/primitive_assembly.h
@@ -5,7 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include <functional> 7#include <functional>
8#include "video_core/pica.h" 8#include "video_core/regs_pipeline.h"
9 9
10namespace Pica { 10namespace Pica {
11 11
@@ -15,9 +15,11 @@ namespace Pica {
15 */ 15 */
16template <typename VertexType> 16template <typename VertexType>
17struct PrimitiveAssembler { 17struct PrimitiveAssembler {
18 using TriangleHandler = std::function<void(VertexType& v0, VertexType& v1, VertexType& v2)>; 18 using TriangleHandler =
19 std::function<void(const VertexType& v0, const VertexType& v1, const VertexType& v2)>;
19 20
20 PrimitiveAssembler(Regs::TriangleTopology topology = Regs::TriangleTopology::List); 21 PrimitiveAssembler(
22 PipelineRegs::TriangleTopology topology = PipelineRegs::TriangleTopology::List);
21 23
22 /* 24 /*
23 * Queues a vertex, builds primitives from the vertex queue according to the given 25 * Queues a vertex, builds primitives from the vertex queue according to the given
@@ -25,7 +27,7 @@ struct PrimitiveAssembler {
25 * NOTE: We could specify the triangle handler in the constructor, but this way we can 27 * NOTE: We could specify the triangle handler in the constructor, but this way we can
26 * keep event and handler code next to each other. 28 * keep event and handler code next to each other.
27 */ 29 */
28 void SubmitVertex(VertexType& vtx, TriangleHandler triangle_handler); 30 void SubmitVertex(const VertexType& vtx, TriangleHandler triangle_handler);
29 31
30 /** 32 /**
31 * Resets the internal state of the PrimitiveAssembler. 33 * Resets the internal state of the PrimitiveAssembler.
@@ -35,10 +37,10 @@ struct PrimitiveAssembler {
35 /** 37 /**
36 * Reconfigures the PrimitiveAssembler to use a different triangle topology. 38 * Reconfigures the PrimitiveAssembler to use a different triangle topology.
37 */ 39 */
38 void Reconfigure(Regs::TriangleTopology topology); 40 void Reconfigure(PipelineRegs::TriangleTopology topology);
39 41
40private: 42private:
41 Regs::TriangleTopology topology; 43 PipelineRegs::TriangleTopology topology;
42 44
43 int buffer_index; 45 int buffer_index;
44 VertexType buffer[2]; 46 VertexType buffer[2];
diff --git a/src/video_core/rasterizer.h b/src/video_core/rasterizer.h
deleted file mode 100644
index 6cbda3067..000000000
--- a/src/video_core/rasterizer.h
+++ /dev/null
@@ -1,20 +0,0 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7namespace Pica {
8
9namespace Shader {
10struct OutputVertex;
11}
12
13namespace Rasterizer {
14
15void ProcessTriangle(const Shader::OutputVertex& v0, const Shader::OutputVertex& v1,
16 const Shader::OutputVertex& v2);
17
18} // namespace Rasterizer
19
20} // namespace Pica
diff --git a/src/video_core/regs.cpp b/src/video_core/regs.cpp
new file mode 100644
index 000000000..2699e710a
--- /dev/null
+++ b/src/video_core/regs.cpp
@@ -0,0 +1,488 @@
1// Copyright 2015 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <iterator>
7#include <utility>
8
9#include "common/common_types.h"
10#include "video_core/regs.h"
11
12namespace Pica {
13
14static const std::pair<u16, const char*> register_names[] = {
15 {0x010, "GPUREG_FINALIZE"},
16
17 {0x040, "GPUREG_FACECULLING_CONFIG"},
18 {0x041, "GPUREG_VIEWPORT_WIDTH"},
19 {0x042, "GPUREG_VIEWPORT_INVW"},
20 {0x043, "GPUREG_VIEWPORT_HEIGHT"},
21 {0x044, "GPUREG_VIEWPORT_INVH"},
22
23 {0x047, "GPUREG_FRAGOP_CLIP"},
24 {0x048, "GPUREG_FRAGOP_CLIP_DATA0"},
25 {0x049, "GPUREG_FRAGOP_CLIP_DATA1"},
26 {0x04A, "GPUREG_FRAGOP_CLIP_DATA2"},
27 {0x04B, "GPUREG_FRAGOP_CLIP_DATA3"},
28
29 {0x04D, "GPUREG_DEPTHMAP_SCALE"},
30 {0x04E, "GPUREG_DEPTHMAP_OFFSET"},
31 {0x04F, "GPUREG_SH_OUTMAP_TOTAL"},
32 {0x050, "GPUREG_SH_OUTMAP_O0"},
33 {0x051, "GPUREG_SH_OUTMAP_O1"},
34 {0x052, "GPUREG_SH_OUTMAP_O2"},
35 {0x053, "GPUREG_SH_OUTMAP_O3"},
36 {0x054, "GPUREG_SH_OUTMAP_O4"},
37 {0x055, "GPUREG_SH_OUTMAP_O5"},
38 {0x056, "GPUREG_SH_OUTMAP_O6"},
39
40 {0x061, "GPUREG_EARLYDEPTH_FUNC"},
41 {0x062, "GPUREG_EARLYDEPTH_TEST1"},
42 {0x063, "GPUREG_EARLYDEPTH_CLEAR"},
43 {0x064, "GPUREG_SH_OUTATTR_MODE"},
44 {0x065, "GPUREG_SCISSORTEST_MODE"},
45 {0x066, "GPUREG_SCISSORTEST_POS"},
46 {0x067, "GPUREG_SCISSORTEST_DIM"},
47 {0x068, "GPUREG_VIEWPORT_XY"},
48
49 {0x06A, "GPUREG_EARLYDEPTH_DATA"},
50
51 {0x06D, "GPUREG_DEPTHMAP_ENABLE"},
52 {0x06E, "GPUREG_RENDERBUF_DIM"},
53 {0x06F, "GPUREG_SH_OUTATTR_CLOCK"},
54
55 {0x080, "GPUREG_TEXUNIT_CONFIG"},
56 {0x081, "GPUREG_TEXUNIT0_BORDER_COLOR"},
57 {0x082, "GPUREG_TEXUNIT0_DIM"},
58 {0x083, "GPUREG_TEXUNIT0_PARAM"},
59 {0x084, "GPUREG_TEXUNIT0_LOD"},
60 {0x085, "GPUREG_TEXUNIT0_ADDR1"},
61 {0x086, "GPUREG_TEXUNIT0_ADDR2"},
62 {0x087, "GPUREG_TEXUNIT0_ADDR3"},
63 {0x088, "GPUREG_TEXUNIT0_ADDR4"},
64 {0x089, "GPUREG_TEXUNIT0_ADDR5"},
65 {0x08A, "GPUREG_TEXUNIT0_ADDR6"},
66 {0x08B, "GPUREG_TEXUNIT0_SHADOW"},
67
68 {0x08E, "GPUREG_TEXUNIT0_TYPE"},
69 {0x08F, "GPUREG_LIGHTING_ENABLE0"},
70
71 {0x091, "GPUREG_TEXUNIT1_BORDER_COLOR"},
72 {0x092, "GPUREG_TEXUNIT1_DIM"},
73 {0x093, "GPUREG_TEXUNIT1_PARAM"},
74 {0x094, "GPUREG_TEXUNIT1_LOD"},
75 {0x095, "GPUREG_TEXUNIT1_ADDR"},
76 {0x096, "GPUREG_TEXUNIT1_TYPE"},
77
78 {0x099, "GPUREG_TEXUNIT2_BORDER_COLOR"},
79 {0x09A, "GPUREG_TEXUNIT2_DIM"},
80 {0x09B, "GPUREG_TEXUNIT2_PARAM"},
81 {0x09C, "GPUREG_TEXUNIT2_LOD"},
82 {0x09D, "GPUREG_TEXUNIT2_ADDR"},
83 {0x09E, "GPUREG_TEXUNIT2_TYPE"},
84
85 {0x0A8, "GPUREG_TEXUNIT3_PROCTEX0"},
86 {0x0A9, "GPUREG_TEXUNIT3_PROCTEX1"},
87 {0x0AA, "GPUREG_TEXUNIT3_PROCTEX2"},
88 {0x0AB, "GPUREG_TEXUNIT3_PROCTEX3"},
89 {0x0AC, "GPUREG_TEXUNIT3_PROCTEX4"},
90 {0x0AD, "GPUREG_TEXUNIT3_PROCTEX5"},
91
92 {0x0AF, "GPUREG_PROCTEX_LUT"},
93 {0x0B0, "GPUREG_PROCTEX_LUT_DATA0"},
94 {0x0B1, "GPUREG_PROCTEX_LUT_DATA1"},
95 {0x0B2, "GPUREG_PROCTEX_LUT_DATA2"},
96 {0x0B3, "GPUREG_PROCTEX_LUT_DATA3"},
97 {0x0B4, "GPUREG_PROCTEX_LUT_DATA4"},
98 {0x0B5, "GPUREG_PROCTEX_LUT_DATA5"},
99 {0x0B6, "GPUREG_PROCTEX_LUT_DATA6"},
100 {0x0B7, "GPUREG_PROCTEX_LUT_DATA7"},
101
102 {0x0C0, "GPUREG_TEXENV0_SOURCE"},
103 {0x0C1, "GPUREG_TEXENV0_OPERAND"},
104 {0x0C2, "GPUREG_TEXENV0_COMBINER"},
105 {0x0C3, "GPUREG_TEXENV0_COLOR"},
106 {0x0C4, "GPUREG_TEXENV0_SCALE"},
107
108 {0x0C8, "GPUREG_TEXENV1_SOURCE"},
109 {0x0C9, "GPUREG_TEXENV1_OPERAND"},
110 {0x0CA, "GPUREG_TEXENV1_COMBINER"},
111 {0x0CB, "GPUREG_TEXENV1_COLOR"},
112 {0x0CC, "GPUREG_TEXENV1_SCALE"},
113
114 {0x0D0, "GPUREG_TEXENV2_SOURCE"},
115 {0x0D1, "GPUREG_TEXENV2_OPERAND"},
116 {0x0D2, "GPUREG_TEXENV2_COMBINER"},
117 {0x0D3, "GPUREG_TEXENV2_COLOR"},
118 {0x0D4, "GPUREG_TEXENV2_SCALE"},
119
120 {0x0D8, "GPUREG_TEXENV3_SOURCE"},
121 {0x0D9, "GPUREG_TEXENV3_OPERAND"},
122 {0x0DA, "GPUREG_TEXENV3_COMBINER"},
123 {0x0DB, "GPUREG_TEXENV3_COLOR"},
124 {0x0DC, "GPUREG_TEXENV3_SCALE"},
125
126 {0x0E0, "GPUREG_TEXENV_UPDATE_BUFFER"},
127 {0x0E1, "GPUREG_FOG_COLOR"},
128
129 {0x0E4, "GPUREG_GAS_ATTENUATION"},
130 {0x0E5, "GPUREG_GAS_ACCMAX"},
131 {0x0E6, "GPUREG_FOG_LUT_INDEX"},
132
133 {0x0E8, "GPUREG_FOG_LUT_DATA0"},
134 {0x0E9, "GPUREG_FOG_LUT_DATA1"},
135 {0x0EA, "GPUREG_FOG_LUT_DATA2"},
136 {0x0EB, "GPUREG_FOG_LUT_DATA3"},
137 {0x0EC, "GPUREG_FOG_LUT_DATA4"},
138 {0x0ED, "GPUREG_FOG_LUT_DATA5"},
139 {0x0EE, "GPUREG_FOG_LUT_DATA6"},
140 {0x0EF, "GPUREG_FOG_LUT_DATA7"},
141 {0x0F0, "GPUREG_TEXENV4_SOURCE"},
142 {0x0F1, "GPUREG_TEXENV4_OPERAND"},
143 {0x0F2, "GPUREG_TEXENV4_COMBINER"},
144 {0x0F3, "GPUREG_TEXENV4_COLOR"},
145 {0x0F4, "GPUREG_TEXENV4_SCALE"},
146
147 {0x0F8, "GPUREG_TEXENV5_SOURCE"},
148 {0x0F9, "GPUREG_TEXENV5_OPERAND"},
149 {0x0FA, "GPUREG_TEXENV5_COMBINER"},
150 {0x0FB, "GPUREG_TEXENV5_COLOR"},
151 {0x0FC, "GPUREG_TEXENV5_SCALE"},
152 {0x0FD, "GPUREG_TEXENV_BUFFER_COLOR"},
153
154 {0x100, "GPUREG_COLOR_OPERATION"},
155 {0x101, "GPUREG_BLEND_FUNC"},
156 {0x102, "GPUREG_LOGIC_OP"},
157 {0x103, "GPUREG_BLEND_COLOR"},
158 {0x104, "GPUREG_FRAGOP_ALPHA_TEST"},
159 {0x105, "GPUREG_STENCIL_TEST"},
160 {0x106, "GPUREG_STENCIL_OP"},
161 {0x107, "GPUREG_DEPTH_COLOR_MASK"},
162
163 {0x110, "GPUREG_FRAMEBUFFER_INVALIDATE"},
164 {0x111, "GPUREG_FRAMEBUFFER_FLUSH"},
165 {0x112, "GPUREG_COLORBUFFER_READ"},
166 {0x113, "GPUREG_COLORBUFFER_WRITE"},
167 {0x114, "GPUREG_DEPTHBUFFER_READ"},
168 {0x115, "GPUREG_DEPTHBUFFER_WRITE"},
169 {0x116, "GPUREG_DEPTHBUFFER_FORMAT"},
170 {0x117, "GPUREG_COLORBUFFER_FORMAT"},
171 {0x118, "GPUREG_EARLYDEPTH_TEST2"},
172
173 {0x11B, "GPUREG_FRAMEBUFFER_BLOCK32"},
174 {0x11C, "GPUREG_DEPTHBUFFER_LOC"},
175 {0x11D, "GPUREG_COLORBUFFER_LOC"},
176 {0x11E, "GPUREG_FRAMEBUFFER_DIM"},
177
178 {0x120, "GPUREG_GAS_LIGHT_XY"},
179 {0x121, "GPUREG_GAS_LIGHT_Z"},
180 {0x122, "GPUREG_GAS_LIGHT_Z_COLOR"},
181 {0x123, "GPUREG_GAS_LUT_INDEX"},
182 {0x124, "GPUREG_GAS_LUT_DATA"},
183
184 {0x126, "GPUREG_GAS_DELTAZ_DEPTH"},
185
186 {0x130, "GPUREG_FRAGOP_SHADOW"},
187
188 {0x140, "GPUREG_LIGHT0_SPECULAR0"},
189 {0x141, "GPUREG_LIGHT0_SPECULAR1"},
190 {0x142, "GPUREG_LIGHT0_DIFFUSE"},
191 {0x143, "GPUREG_LIGHT0_AMBIENT"},
192 {0x144, "GPUREG_LIGHT0_XY"},
193 {0x145, "GPUREG_LIGHT0_Z"},
194 {0x146, "GPUREG_LIGHT0_SPOTDIR_XY"},
195 {0x147, "GPUREG_LIGHT0_SPOTDIR_Z"},
196
197 {0x149, "GPUREG_LIGHT0_CONFIG"},
198 {0x14A, "GPUREG_LIGHT0_ATTENUATION_BIAS"},
199 {0x14B, "GPUREG_LIGHT0_ATTENUATION_SCALE"},
200
201 {0x150, "GPUREG_LIGHT1_SPECULAR0"},
202 {0x151, "GPUREG_LIGHT1_SPECULAR1"},
203 {0x152, "GPUREG_LIGHT1_DIFFUSE"},
204 {0x153, "GPUREG_LIGHT1_AMBIENT"},
205 {0x154, "GPUREG_LIGHT1_XY"},
206 {0x155, "GPUREG_LIGHT1_Z"},
207 {0x156, "GPUREG_LIGHT1_SPOTDIR_XY"},
208 {0x157, "GPUREG_LIGHT1_SPOTDIR_Z"},
209
210 {0x159, "GPUREG_LIGHT1_CONFIG"},
211 {0x15A, "GPUREG_LIGHT1_ATTENUATION_BIAS"},
212 {0x15B, "GPUREG_LIGHT1_ATTENUATION_SCALE"},
213
214 {0x160, "GPUREG_LIGHT2_SPECULAR0"},
215 {0x161, "GPUREG_LIGHT2_SPECULAR1"},
216 {0x162, "GPUREG_LIGHT2_DIFFUSE"},
217 {0x163, "GPUREG_LIGHT2_AMBIENT"},
218 {0x164, "GPUREG_LIGHT2_XY"},
219 {0x165, "GPUREG_LIGHT2_Z"},
220 {0x166, "GPUREG_LIGHT2_SPOTDIR_XY"},
221 {0x167, "GPUREG_LIGHT2_SPOTDIR_Z"},
222
223 {0x169, "GPUREG_LIGHT2_CONFIG"},
224 {0x16A, "GPUREG_LIGHT2_ATTENUATION_BIAS"},
225 {0x16B, "GPUREG_LIGHT2_ATTENUATION_SCALE"},
226
227 {0x170, "GPUREG_LIGHT3_SPECULAR0"},
228 {0x171, "GPUREG_LIGHT3_SPECULAR1"},
229 {0x172, "GPUREG_LIGHT3_DIFFUSE"},
230 {0x173, "GPUREG_LIGHT3_AMBIENT"},
231 {0x174, "GPUREG_LIGHT3_XY"},
232 {0x175, "GPUREG_LIGHT3_Z"},
233 {0x176, "GPUREG_LIGHT3_SPOTDIR_XY"},
234 {0x177, "GPUREG_LIGHT3_SPOTDIR_Z"},
235
236 {0x179, "GPUREG_LIGHT3_CONFIG"},
237 {0x17A, "GPUREG_LIGHT3_ATTENUATION_BIAS"},
238 {0x17B, "GPUREG_LIGHT3_ATTENUATION_SCALE"},
239
240 {0x180, "GPUREG_LIGHT4_SPECULAR0"},
241 {0x181, "GPUREG_LIGHT4_SPECULAR1"},
242 {0x182, "GPUREG_LIGHT4_DIFFUSE"},
243 {0x183, "GPUREG_LIGHT4_AMBIENT"},
244 {0x184, "GPUREG_LIGHT4_XY"},
245 {0x185, "GPUREG_LIGHT4_Z"},
246 {0x186, "GPUREG_LIGHT4_SPOTDIR_XY"},
247 {0x187, "GPUREG_LIGHT4_SPOTDIR_Z"},
248
249 {0x189, "GPUREG_LIGHT4_CONFIG"},
250 {0x18A, "GPUREG_LIGHT4_ATTENUATION_BIAS"},
251 {0x18B, "GPUREG_LIGHT4_ATTENUATION_SCALE"},
252
253 {0x190, "GPUREG_LIGHT5_SPECULAR0"},
254 {0x191, "GPUREG_LIGHT5_SPECULAR1"},
255 {0x192, "GPUREG_LIGHT5_DIFFUSE"},
256 {0x193, "GPUREG_LIGHT5_AMBIENT"},
257 {0x194, "GPUREG_LIGHT5_XY"},
258 {0x195, "GPUREG_LIGHT5_Z"},
259 {0x196, "GPUREG_LIGHT5_SPOTDIR_XY"},
260 {0x197, "GPUREG_LIGHT5_SPOTDIR_Z"},
261
262 {0x199, "GPUREG_LIGHT5_CONFIG"},
263 {0x19A, "GPUREG_LIGHT5_ATTENUATION_BIAS"},
264 {0x19B, "GPUREG_LIGHT5_ATTENUATION_SCALE"},
265
266 {0x1A0, "GPUREG_LIGHT6_SPECULAR0"},
267 {0x1A1, "GPUREG_LIGHT6_SPECULAR1"},
268 {0x1A2, "GPUREG_LIGHT6_DIFFUSE"},
269 {0x1A3, "GPUREG_LIGHT6_AMBIENT"},
270 {0x1A4, "GPUREG_LIGHT6_XY"},
271 {0x1A5, "GPUREG_LIGHT6_Z"},
272 {0x1A6, "GPUREG_LIGHT6_SPOTDIR_XY"},
273 {0x1A7, "GPUREG_LIGHT6_SPOTDIR_Z"},
274
275 {0x1A9, "GPUREG_LIGHT6_CONFIG"},
276 {0x1AA, "GPUREG_LIGHT6_ATTENUATION_BIAS"},
277 {0x1AB, "GPUREG_LIGHT6_ATTENUATION_SCALE"},
278
279 {0x1B0, "GPUREG_LIGHT7_SPECULAR0"},
280 {0x1B1, "GPUREG_LIGHT7_SPECULAR1"},
281 {0x1B2, "GPUREG_LIGHT7_DIFFUSE"},
282 {0x1B3, "GPUREG_LIGHT7_AMBIENT"},
283 {0x1B4, "GPUREG_LIGHT7_XY"},
284 {0x1B5, "GPUREG_LIGHT7_Z"},
285 {0x1B6, "GPUREG_LIGHT7_SPOTDIR_XY"},
286 {0x1B7, "GPUREG_LIGHT7_SPOTDIR_Z"},
287
288 {0x1B9, "GPUREG_LIGHT7_CONFIG"},
289 {0x1BA, "GPUREG_LIGHT7_ATTENUATION_BIAS"},
290 {0x1BB, "GPUREG_LIGHT7_ATTENUATION_SCALE"},
291
292 {0x1C0, "GPUREG_LIGHTING_AMBIENT"},
293
294 {0x1C2, "GPUREG_LIGHTING_NUM_LIGHTS"},
295 {0x1C3, "GPUREG_LIGHTING_CONFIG0"},
296 {0x1C4, "GPUREG_LIGHTING_CONFIG1"},
297 {0x1C5, "GPUREG_LIGHTING_LUT_INDEX"},
298 {0x1C6, "GPUREG_LIGHTING_ENABLE1"},
299
300 {0x1C8, "GPUREG_LIGHTING_LUT_DATA0"},
301 {0x1C9, "GPUREG_LIGHTING_LUT_DATA1"},
302 {0x1CA, "GPUREG_LIGHTING_LUT_DATA2"},
303 {0x1CB, "GPUREG_LIGHTING_LUT_DATA3"},
304 {0x1CC, "GPUREG_LIGHTING_LUT_DATA4"},
305 {0x1CD, "GPUREG_LIGHTING_LUT_DATA5"},
306 {0x1CE, "GPUREG_LIGHTING_LUT_DATA6"},
307 {0x1CF, "GPUREG_LIGHTING_LUT_DATA7"},
308 {0x1D0, "GPUREG_LIGHTING_LUTINPUT_ABS"},
309 {0x1D1, "GPUREG_LIGHTING_LUTINPUT_SELECT"},
310 {0x1D2, "GPUREG_LIGHTING_LUTINPUT_SCALE"},
311
312 {0x1D9, "GPUREG_LIGHTING_LIGHT_PERMUTATION"},
313
314 {0x200, "GPUREG_ATTRIBBUFFERS_LOC"},
315 {0x201, "GPUREG_ATTRIBBUFFERS_FORMAT_LOW"},
316 {0x202, "GPUREG_ATTRIBBUFFERS_FORMAT_HIGH"},
317 {0x203, "GPUREG_ATTRIBBUFFER0_OFFSET"},
318 {0x204, "GPUREG_ATTRIBBUFFER0_CONFIG1"},
319 {0x205, "GPUREG_ATTRIBBUFFER0_CONFIG2"},
320 {0x206, "GPUREG_ATTRIBBUFFER1_OFFSET"},
321 {0x207, "GPUREG_ATTRIBBUFFER1_CONFIG1"},
322 {0x208, "GPUREG_ATTRIBBUFFER1_CONFIG2"},
323 {0x209, "GPUREG_ATTRIBBUFFER2_OFFSET"},
324 {0x20A, "GPUREG_ATTRIBBUFFER2_CONFIG1"},
325 {0x20B, "GPUREG_ATTRIBBUFFER2_CONFIG2"},
326 {0x20C, "GPUREG_ATTRIBBUFFER3_OFFSET"},
327 {0x20D, "GPUREG_ATTRIBBUFFER3_CONFIG1"},
328 {0x20E, "GPUREG_ATTRIBBUFFER3_CONFIG2"},
329 {0x20F, "GPUREG_ATTRIBBUFFER4_OFFSET"},
330 {0x210, "GPUREG_ATTRIBBUFFER4_CONFIG1"},
331 {0x211, "GPUREG_ATTRIBBUFFER4_CONFIG2"},
332 {0x212, "GPUREG_ATTRIBBUFFER5_OFFSET"},
333 {0x213, "GPUREG_ATTRIBBUFFER5_CONFIG1"},
334 {0x214, "GPUREG_ATTRIBBUFFER5_CONFIG2"},
335 {0x215, "GPUREG_ATTRIBBUFFER6_OFFSET"},
336 {0x216, "GPUREG_ATTRIBBUFFER6_CONFIG1"},
337 {0x217, "GPUREG_ATTRIBBUFFER6_CONFIG2"},
338 {0x218, "GPUREG_ATTRIBBUFFER7_OFFSET"},
339 {0x219, "GPUREG_ATTRIBBUFFER7_CONFIG1"},
340 {0x21A, "GPUREG_ATTRIBBUFFER7_CONFIG2"},
341 {0x21B, "GPUREG_ATTRIBBUFFER8_OFFSET"},
342 {0x21C, "GPUREG_ATTRIBBUFFER8_CONFIG1"},
343 {0x21D, "GPUREG_ATTRIBBUFFER8_CONFIG2"},
344 {0x21E, "GPUREG_ATTRIBBUFFER9_OFFSET"},
345 {0x21F, "GPUREG_ATTRIBBUFFER9_CONFIG1"},
346 {0x220, "GPUREG_ATTRIBBUFFER9_CONFIG2"},
347 {0x221, "GPUREG_ATTRIBBUFFER10_OFFSET"},
348 {0x222, "GPUREG_ATTRIBBUFFER10_CONFIG1"},
349 {0x223, "GPUREG_ATTRIBBUFFER10_CONFIG2"},
350 {0x224, "GPUREG_ATTRIBBUFFER11_OFFSET"},
351 {0x225, "GPUREG_ATTRIBBUFFER11_CONFIG1"},
352 {0x226, "GPUREG_ATTRIBBUFFER11_CONFIG2"},
353 {0x227, "GPUREG_INDEXBUFFER_CONFIG"},
354 {0x228, "GPUREG_NUMVERTICES"},
355 {0x229, "GPUREG_GEOSTAGE_CONFIG"},
356 {0x22A, "GPUREG_VERTEX_OFFSET"},
357
358 {0x22D, "GPUREG_POST_VERTEX_CACHE_NUM"},
359 {0x22E, "GPUREG_DRAWARRAYS"},
360 {0x22F, "GPUREG_DRAWELEMENTS"},
361
362 {0x231, "GPUREG_VTX_FUNC"},
363 {0x232, "GPUREG_FIXEDATTRIB_INDEX"},
364 {0x233, "GPUREG_FIXEDATTRIB_DATA0"},
365 {0x234, "GPUREG_FIXEDATTRIB_DATA1"},
366 {0x235, "GPUREG_FIXEDATTRIB_DATA2"},
367
368 {0x238, "GPUREG_CMDBUF_SIZE0"},
369 {0x239, "GPUREG_CMDBUF_SIZE1"},
370 {0x23A, "GPUREG_CMDBUF_ADDR0"},
371 {0x23B, "GPUREG_CMDBUF_ADDR1"},
372 {0x23C, "GPUREG_CMDBUF_JUMP0"},
373 {0x23D, "GPUREG_CMDBUF_JUMP1"},
374
375 {0x242, "GPUREG_VSH_NUM_ATTR"},
376
377 {0x244, "GPUREG_VSH_COM_MODE"},
378 {0x245, "GPUREG_START_DRAW_FUNC0"},
379
380 {0x24A, "GPUREG_VSH_OUTMAP_TOTAL1"},
381
382 {0x251, "GPUREG_VSH_OUTMAP_TOTAL2"},
383 {0x252, "GPUREG_GSH_MISC0"},
384 {0x253, "GPUREG_GEOSTAGE_CONFIG2"},
385 {0x254, "GPUREG_GSH_MISC1"},
386
387 {0x25E, "GPUREG_PRIMITIVE_CONFIG"},
388 {0x25F, "GPUREG_RESTART_PRIMITIVE"},
389
390 {0x280, "GPUREG_GSH_BOOLUNIFORM"},
391 {0x281, "GPUREG_GSH_INTUNIFORM_I0"},
392 {0x282, "GPUREG_GSH_INTUNIFORM_I1"},
393 {0x283, "GPUREG_GSH_INTUNIFORM_I2"},
394 {0x284, "GPUREG_GSH_INTUNIFORM_I3"},
395
396 {0x289, "GPUREG_GSH_INPUTBUFFER_CONFIG"},
397 {0x28A, "GPUREG_GSH_ENTRYPOINT"},
398 {0x28B, "GPUREG_GSH_ATTRIBUTES_PERMUTATION_LOW"},
399 {0x28C, "GPUREG_GSH_ATTRIBUTES_PERMUTATION_HIGH"},
400 {0x28D, "GPUREG_GSH_OUTMAP_MASK"},
401
402 {0x28F, "GPUREG_GSH_CODETRANSFER_END"},
403 {0x290, "GPUREG_GSH_FLOATUNIFORM_INDEX"},
404 {0x291, "GPUREG_GSH_FLOATUNIFORM_DATA0"},
405 {0x292, "GPUREG_GSH_FLOATUNIFORM_DATA1"},
406 {0x293, "GPUREG_GSH_FLOATUNIFORM_DATA2"},
407 {0x294, "GPUREG_GSH_FLOATUNIFORM_DATA3"},
408 {0x295, "GPUREG_GSH_FLOATUNIFORM_DATA4"},
409 {0x296, "GPUREG_GSH_FLOATUNIFORM_DATA5"},
410 {0x297, "GPUREG_GSH_FLOATUNIFORM_DATA6"},
411 {0x298, "GPUREG_GSH_FLOATUNIFORM_DATA7"},
412
413 {0x29B, "GPUREG_GSH_CODETRANSFER_INDEX"},
414 {0x29C, "GPUREG_GSH_CODETRANSFER_DATA0"},
415 {0x29D, "GPUREG_GSH_CODETRANSFER_DATA1"},
416 {0x29E, "GPUREG_GSH_CODETRANSFER_DATA2"},
417 {0x29F, "GPUREG_GSH_CODETRANSFER_DATA3"},
418 {0x2A0, "GPUREG_GSH_CODETRANSFER_DATA4"},
419 {0x2A1, "GPUREG_GSH_CODETRANSFER_DATA5"},
420 {0x2A2, "GPUREG_GSH_CODETRANSFER_DATA6"},
421 {0x2A3, "GPUREG_GSH_CODETRANSFER_DATA7"},
422
423 {0x2A5, "GPUREG_GSH_OPDESCS_INDEX"},
424 {0x2A6, "GPUREG_GSH_OPDESCS_DATA0"},
425 {0x2A7, "GPUREG_GSH_OPDESCS_DATA1"},
426 {0x2A8, "GPUREG_GSH_OPDESCS_DATA2"},
427 {0x2A9, "GPUREG_GSH_OPDESCS_DATA3"},
428 {0x2AA, "GPUREG_GSH_OPDESCS_DATA4"},
429 {0x2AB, "GPUREG_GSH_OPDESCS_DATA5"},
430 {0x2AC, "GPUREG_GSH_OPDESCS_DATA6"},
431 {0x2AD, "GPUREG_GSH_OPDESCS_DATA7"},
432
433 {0x2B0, "GPUREG_VSH_BOOLUNIFORM"},
434 {0x2B1, "GPUREG_VSH_INTUNIFORM_I0"},
435 {0x2B2, "GPUREG_VSH_INTUNIFORM_I1"},
436 {0x2B3, "GPUREG_VSH_INTUNIFORM_I2"},
437 {0x2B4, "GPUREG_VSH_INTUNIFORM_I3"},
438
439 {0x2B9, "GPUREG_VSH_INPUTBUFFER_CONFIG"},
440 {0x2BA, "GPUREG_VSH_ENTRYPOINT"},
441 {0x2BB, "GPUREG_VSH_ATTRIBUTES_PERMUTATION_LOW"},
442 {0x2BC, "GPUREG_VSH_ATTRIBUTES_PERMUTATION_HIGH"},
443 {0x2BD, "GPUREG_VSH_OUTMAP_MASK"},
444
445 {0x2BF, "GPUREG_VSH_CODETRANSFER_END"},
446 {0x2C0, "GPUREG_VSH_FLOATUNIFORM_INDEX"},
447 {0x2C1, "GPUREG_VSH_FLOATUNIFORM_DATA0"},
448 {0x2C2, "GPUREG_VSH_FLOATUNIFORM_DATA1"},
449 {0x2C3, "GPUREG_VSH_FLOATUNIFORM_DATA2"},
450 {0x2C4, "GPUREG_VSH_FLOATUNIFORM_DATA3"},
451 {0x2C5, "GPUREG_VSH_FLOATUNIFORM_DATA4"},
452 {0x2C6, "GPUREG_VSH_FLOATUNIFORM_DATA5"},
453 {0x2C7, "GPUREG_VSH_FLOATUNIFORM_DATA6"},
454 {0x2C8, "GPUREG_VSH_FLOATUNIFORM_DATA7"},
455
456 {0x2CB, "GPUREG_VSH_CODETRANSFER_INDEX"},
457 {0x2CC, "GPUREG_VSH_CODETRANSFER_DATA0"},
458 {0x2CD, "GPUREG_VSH_CODETRANSFER_DATA1"},
459 {0x2CE, "GPUREG_VSH_CODETRANSFER_DATA2"},
460 {0x2CF, "GPUREG_VSH_CODETRANSFER_DATA3"},
461 {0x2D0, "GPUREG_VSH_CODETRANSFER_DATA4"},
462 {0x2D1, "GPUREG_VSH_CODETRANSFER_DATA5"},
463 {0x2D2, "GPUREG_VSH_CODETRANSFER_DATA6"},
464 {0x2D3, "GPUREG_VSH_CODETRANSFER_DATA7"},
465
466 {0x2D5, "GPUREG_VSH_OPDESCS_INDEX"},
467 {0x2D6, "GPUREG_VSH_OPDESCS_DATA0"},
468 {0x2D7, "GPUREG_VSH_OPDESCS_DATA1"},
469 {0x2D8, "GPUREG_VSH_OPDESCS_DATA2"},
470 {0x2D9, "GPUREG_VSH_OPDESCS_DATA3"},
471 {0x2DA, "GPUREG_VSH_OPDESCS_DATA4"},
472 {0x2DB, "GPUREG_VSH_OPDESCS_DATA5"},
473 {0x2DC, "GPUREG_VSH_OPDESCS_DATA6"},
474 {0x2DD, "GPUREG_VSH_OPDESCS_DATA7"},
475};
476
477const char* Regs::GetRegisterName(u16 index) {
478 auto found = std::lower_bound(std::begin(register_names), std::end(register_names), index,
479 [](auto p, auto i) { return p.first < i; });
480 if (found->first == index) {
481 return found->second;
482 } else {
483 // Return empty string if no match is found
484 return "";
485 }
486}
487
488} // namespace Pica
diff --git a/src/video_core/regs.h b/src/video_core/regs.h
new file mode 100644
index 000000000..86826088b
--- /dev/null
+++ b/src/video_core/regs.h
@@ -0,0 +1,142 @@
1// Copyright 2017 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <cstddef>
9#include <string>
10#ifndef _MSC_VER
11#include <type_traits> // for std::enable_if
12#endif
13
14#include "common/common_funcs.h"
15#include "common/common_types.h"
16#include "video_core/regs_framebuffer.h"
17#include "video_core/regs_lighting.h"
18#include "video_core/regs_pipeline.h"
19#include "video_core/regs_rasterizer.h"
20#include "video_core/regs_shader.h"
21#include "video_core/regs_texturing.h"
22
23namespace Pica {
24
25// Returns index corresponding to the Regs member labeled by field_name
26// TODO: Due to Visual studio bug 209229, offsetof does not return constant expressions
27// when used with array elements (e.g. PICA_REG_INDEX(vs_uniform_setup.set_value[1])).
28// For details cf.
29// https://connect.microsoft.com/VisualStudio/feedback/details/209229/offsetof-does-not-produce-a-constant-expression-for-array-members
30// Hopefully, this will be fixed sometime in the future.
31// For lack of better alternatives, we currently hardcode the offsets when constant
32// expressions are needed via PICA_REG_INDEX_WORKAROUND (on sane compilers, static_asserts
33// will then make sure the offsets indeed match the automatically calculated ones).
34#define PICA_REG_INDEX(field_name) (offsetof(Pica::Regs, field_name) / sizeof(u32))
35#if defined(_MSC_VER)
36#define PICA_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) (backup_workaround_index)
37#else
38// NOTE: Yeah, hacking in a static_assert here just to workaround the lacking MSVC compiler
39// really is this annoying. This macro just forwards its first argument to PICA_REG_INDEX
40// and then performs a (no-op) cast to size_t iff the second argument matches the expected
41// field offset. Otherwise, the compiler will fail to compile this code.
42#define PICA_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) \
43 ((typename std::enable_if<backup_workaround_index == PICA_REG_INDEX(field_name), \
44 size_t>::type)PICA_REG_INDEX(field_name))
45#endif // _MSC_VER
46
47struct Regs {
48 static constexpr size_t NUM_REGS = 0x300;
49
50 union {
51 struct {
52 INSERT_PADDING_WORDS(0x10);
53 u32 trigger_irq;
54 INSERT_PADDING_WORDS(0x2f);
55 RasterizerRegs rasterizer;
56 TexturingRegs texturing;
57 FramebufferRegs framebuffer;
58 LightingRegs lighting;
59 PipelineRegs pipeline;
60 ShaderRegs gs;
61 ShaderRegs vs;
62 INSERT_PADDING_WORDS(0x20);
63 };
64 std::array<u32, NUM_REGS> reg_array;
65 };
66
67 /// Map register indices to names readable by humans
68 static const char* GetRegisterName(u16 index);
69};
70
71static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Regs struct has wrong size");
72
73// TODO: MSVC does not support using offsetof() on non-static data members even though this
74// is technically allowed since C++11. This macro should be enabled once MSVC adds
75// support for that.
76#ifndef _MSC_VER
77#define ASSERT_REG_POSITION(field_name, position) \
78 static_assert(offsetof(Regs, field_name) == position * 4, \
79 "Field " #field_name " has invalid position")
80
81ASSERT_REG_POSITION(trigger_irq, 0x10);
82
83ASSERT_REG_POSITION(rasterizer, 0x40);
84ASSERT_REG_POSITION(rasterizer.cull_mode, 0x40);
85ASSERT_REG_POSITION(rasterizer.viewport_size_x, 0x41);
86ASSERT_REG_POSITION(rasterizer.viewport_size_y, 0x43);
87ASSERT_REG_POSITION(rasterizer.viewport_depth_range, 0x4d);
88ASSERT_REG_POSITION(rasterizer.viewport_depth_near_plane, 0x4e);
89ASSERT_REG_POSITION(rasterizer.vs_output_attributes[0], 0x50);
90ASSERT_REG_POSITION(rasterizer.vs_output_attributes[1], 0x51);
91ASSERT_REG_POSITION(rasterizer.scissor_test, 0x65);
92ASSERT_REG_POSITION(rasterizer.viewport_corner, 0x68);
93ASSERT_REG_POSITION(rasterizer.depthmap_enable, 0x6D);
94
95ASSERT_REG_POSITION(texturing, 0x80);
96ASSERT_REG_POSITION(texturing.texture0_enable, 0x80);
97ASSERT_REG_POSITION(texturing.texture0, 0x81);
98ASSERT_REG_POSITION(texturing.texture0_format, 0x8e);
99ASSERT_REG_POSITION(texturing.fragment_lighting_enable, 0x8f);
100ASSERT_REG_POSITION(texturing.texture1, 0x91);
101ASSERT_REG_POSITION(texturing.texture1_format, 0x96);
102ASSERT_REG_POSITION(texturing.texture2, 0x99);
103ASSERT_REG_POSITION(texturing.texture2_format, 0x9e);
104ASSERT_REG_POSITION(texturing.tev_stage0, 0xc0);
105ASSERT_REG_POSITION(texturing.tev_stage1, 0xc8);
106ASSERT_REG_POSITION(texturing.tev_stage2, 0xd0);
107ASSERT_REG_POSITION(texturing.tev_stage3, 0xd8);
108ASSERT_REG_POSITION(texturing.tev_combiner_buffer_input, 0xe0);
109ASSERT_REG_POSITION(texturing.fog_mode, 0xe0);
110ASSERT_REG_POSITION(texturing.fog_color, 0xe1);
111ASSERT_REG_POSITION(texturing.fog_lut_offset, 0xe6);
112ASSERT_REG_POSITION(texturing.fog_lut_data, 0xe8);
113ASSERT_REG_POSITION(texturing.tev_stage4, 0xf0);
114ASSERT_REG_POSITION(texturing.tev_stage5, 0xf8);
115ASSERT_REG_POSITION(texturing.tev_combiner_buffer_color, 0xfd);
116
117ASSERT_REG_POSITION(framebuffer, 0x100);
118ASSERT_REG_POSITION(framebuffer.output_merger, 0x100);
119ASSERT_REG_POSITION(framebuffer.framebuffer, 0x110);
120
121ASSERT_REG_POSITION(lighting, 0x140);
122
123ASSERT_REG_POSITION(pipeline, 0x200);
124ASSERT_REG_POSITION(pipeline.vertex_attributes, 0x200);
125ASSERT_REG_POSITION(pipeline.index_array, 0x227);
126ASSERT_REG_POSITION(pipeline.num_vertices, 0x228);
127ASSERT_REG_POSITION(pipeline.vertex_offset, 0x22a);
128ASSERT_REG_POSITION(pipeline.trigger_draw, 0x22e);
129ASSERT_REG_POSITION(pipeline.trigger_draw_indexed, 0x22f);
130ASSERT_REG_POSITION(pipeline.vs_default_attributes_setup, 0x232);
131ASSERT_REG_POSITION(pipeline.command_buffer, 0x238);
132ASSERT_REG_POSITION(pipeline.gpu_mode, 0x245);
133ASSERT_REG_POSITION(pipeline.triangle_topology, 0x25e);
134ASSERT_REG_POSITION(pipeline.restart_primitive, 0x25f);
135
136ASSERT_REG_POSITION(gs, 0x280);
137ASSERT_REG_POSITION(vs, 0x2b0);
138
139#undef ASSERT_REG_POSITION
140#endif // !defined(_MSC_VER)
141
142} // namespace Pica
diff --git a/src/video_core/regs_framebuffer.h b/src/video_core/regs_framebuffer.h
new file mode 100644
index 000000000..366782080
--- /dev/null
+++ b/src/video_core/regs_framebuffer.h
@@ -0,0 +1,284 @@
1// Copyright 2017 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8
9#include "common/assert.h"
10#include "common/bit_field.h"
11#include "common/common_funcs.h"
12#include "common/common_types.h"
13#include "common/logging/log.h"
14
15namespace Pica {
16
17struct FramebufferRegs {
18 enum class LogicOp : u32 {
19 Clear = 0,
20 And = 1,
21 AndReverse = 2,
22 Copy = 3,
23 Set = 4,
24 CopyInverted = 5,
25 NoOp = 6,
26 Invert = 7,
27 Nand = 8,
28 Or = 9,
29 Nor = 10,
30 Xor = 11,
31 Equiv = 12,
32 AndInverted = 13,
33 OrReverse = 14,
34 OrInverted = 15,
35 };
36
37 enum class BlendEquation : u32 {
38 Add = 0,
39 Subtract = 1,
40 ReverseSubtract = 2,
41 Min = 3,
42 Max = 4,
43 };
44
45 enum class BlendFactor : u32 {
46 Zero = 0,
47 One = 1,
48 SourceColor = 2,
49 OneMinusSourceColor = 3,
50 DestColor = 4,
51 OneMinusDestColor = 5,
52 SourceAlpha = 6,
53 OneMinusSourceAlpha = 7,
54 DestAlpha = 8,
55 OneMinusDestAlpha = 9,
56 ConstantColor = 10,
57 OneMinusConstantColor = 11,
58 ConstantAlpha = 12,
59 OneMinusConstantAlpha = 13,
60 SourceAlphaSaturate = 14,
61 };
62
63 enum class CompareFunc : u32 {
64 Never = 0,
65 Always = 1,
66 Equal = 2,
67 NotEqual = 3,
68 LessThan = 4,
69 LessThanOrEqual = 5,
70 GreaterThan = 6,
71 GreaterThanOrEqual = 7,
72 };
73
74 enum class StencilAction : u32 {
75 Keep = 0,
76 Zero = 1,
77 Replace = 2,
78 Increment = 3,
79 Decrement = 4,
80 Invert = 5,
81 IncrementWrap = 6,
82 DecrementWrap = 7,
83 };
84
85 struct {
86 union {
87 // If false, logic blending is used
88 BitField<8, 1, u32> alphablend_enable;
89 };
90
91 union {
92 BitField<0, 8, BlendEquation> blend_equation_rgb;
93 BitField<8, 8, BlendEquation> blend_equation_a;
94
95 BitField<16, 4, BlendFactor> factor_source_rgb;
96 BitField<20, 4, BlendFactor> factor_dest_rgb;
97
98 BitField<24, 4, BlendFactor> factor_source_a;
99 BitField<28, 4, BlendFactor> factor_dest_a;
100 } alpha_blending;
101
102 union {
103 BitField<0, 4, LogicOp> logic_op;
104 };
105
106 union {
107 u32 raw;
108 BitField<0, 8, u32> r;
109 BitField<8, 8, u32> g;
110 BitField<16, 8, u32> b;
111 BitField<24, 8, u32> a;
112 } blend_const;
113
114 union {
115 BitField<0, 1, u32> enable;
116 BitField<4, 3, CompareFunc> func;
117 BitField<8, 8, u32> ref;
118 } alpha_test;
119
120 struct {
121 union {
122 // Raw value of this register
123 u32 raw_func;
124
125 // If true, enable stencil testing
126 BitField<0, 1, u32> enable;
127
128 // Comparison operation for stencil testing
129 BitField<4, 3, CompareFunc> func;
130
131 // Mask used to control writing to the stencil buffer
132 BitField<8, 8, u32> write_mask;
133
134 // Value to compare against for stencil testing
135 BitField<16, 8, u32> reference_value;
136
137 // Mask to apply on stencil test inputs
138 BitField<24, 8, u32> input_mask;
139 };
140
141 union {
142 // Raw value of this register
143 u32 raw_op;
144
145 // Action to perform when the stencil test fails
146 BitField<0, 3, StencilAction> action_stencil_fail;
147
148 // Action to perform when stencil testing passed but depth testing fails
149 BitField<4, 3, StencilAction> action_depth_fail;
150
151 // Action to perform when both stencil and depth testing pass
152 BitField<8, 3, StencilAction> action_depth_pass;
153 };
154 } stencil_test;
155
156 union {
157 BitField<0, 1, u32> depth_test_enable;
158 BitField<4, 3, CompareFunc> depth_test_func;
159 BitField<8, 1, u32> red_enable;
160 BitField<9, 1, u32> green_enable;
161 BitField<10, 1, u32> blue_enable;
162 BitField<11, 1, u32> alpha_enable;
163 BitField<12, 1, u32> depth_write_enable;
164 };
165
166 INSERT_PADDING_WORDS(0x8);
167 } output_merger;
168
169 // Components are laid out in reverse byte order, most significant bits first.
170 enum class ColorFormat : u32 {
171 RGBA8 = 0,
172 RGB8 = 1,
173 RGB5A1 = 2,
174 RGB565 = 3,
175 RGBA4 = 4,
176 };
177
178 enum class DepthFormat : u32 {
179 D16 = 0,
180 D24 = 2,
181 D24S8 = 3,
182 };
183
184 // Returns the number of bytes in the specified color format
185 static unsigned BytesPerColorPixel(ColorFormat format) {
186 switch (format) {
187 case ColorFormat::RGBA8:
188 return 4;
189 case ColorFormat::RGB8:
190 return 3;
191 case ColorFormat::RGB5A1:
192 case ColorFormat::RGB565:
193 case ColorFormat::RGBA4:
194 return 2;
195 default:
196 LOG_CRITICAL(HW_GPU, "Unknown color format %u", format);
197 UNIMPLEMENTED();
198 }
199 }
200
201 struct FramebufferConfig {
202 INSERT_PADDING_WORDS(0x3);
203
204 union {
205 BitField<0, 4, u32> allow_color_write; // 0 = disable, else enable
206 };
207
208 INSERT_PADDING_WORDS(0x1);
209
210 union {
211 BitField<0, 2, u32> allow_depth_stencil_write; // 0 = disable, else enable
212 };
213
214 DepthFormat depth_format; // TODO: Should be a BitField!
215 BitField<16, 3, ColorFormat> color_format;
216
217 INSERT_PADDING_WORDS(0x4);
218
219 u32 depth_buffer_address;
220 u32 color_buffer_address;
221
222 union {
223 // Apparently, the framebuffer width is stored as expected,
224 // while the height is stored as the actual height minus one.
225 // Hence, don't access these fields directly but use the accessors
226 // GetWidth() and GetHeight() instead.
227 BitField<0, 11, u32> width;
228 BitField<12, 10, u32> height;
229 };
230
231 INSERT_PADDING_WORDS(0x1);
232
233 inline PAddr GetColorBufferPhysicalAddress() const {
234 return color_buffer_address * 8;
235 }
236 inline PAddr GetDepthBufferPhysicalAddress() const {
237 return depth_buffer_address * 8;
238 }
239
240 inline u32 GetWidth() const {
241 return width;
242 }
243
244 inline u32 GetHeight() const {
245 return height + 1;
246 }
247 } framebuffer;
248
249 // Returns the number of bytes in the specified depth format
250 static u32 BytesPerDepthPixel(DepthFormat format) {
251 switch (format) {
252 case DepthFormat::D16:
253 return 2;
254 case DepthFormat::D24:
255 return 3;
256 case DepthFormat::D24S8:
257 return 4;
258 default:
259 LOG_CRITICAL(HW_GPU, "Unknown depth format %u", format);
260 UNIMPLEMENTED();
261 }
262 }
263
264 // Returns the number of bits per depth component of the specified depth format
265 static u32 DepthBitsPerPixel(DepthFormat format) {
266 switch (format) {
267 case DepthFormat::D16:
268 return 16;
269 case DepthFormat::D24:
270 case DepthFormat::D24S8:
271 return 24;
272 default:
273 LOG_CRITICAL(HW_GPU, "Unknown depth format %u", format);
274 UNIMPLEMENTED();
275 }
276 }
277
278 INSERT_PADDING_WORDS(0x20);
279};
280
281static_assert(sizeof(FramebufferRegs) == 0x40 * sizeof(u32),
282 "FramebufferRegs struct has incorrect size");
283
284} // namespace Pica
diff --git a/src/video_core/regs_lighting.h b/src/video_core/regs_lighting.h
new file mode 100644
index 000000000..6793405d9
--- /dev/null
+++ b/src/video_core/regs_lighting.h
@@ -0,0 +1,294 @@
1// Copyright 2017 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8
9#include "common/assert.h"
10#include "common/bit_field.h"
11#include "common/common_funcs.h"
12#include "common/common_types.h"
13#include "common/vector_math.h"
14
15namespace Pica {
16
17struct LightingRegs {
18 enum class LightingSampler {
19 Distribution0 = 0,
20 Distribution1 = 1,
21 Fresnel = 3,
22 ReflectBlue = 4,
23 ReflectGreen = 5,
24 ReflectRed = 6,
25 SpotlightAttenuation = 8,
26 DistanceAttenuation = 16,
27 };
28
29 /**
30 * Pica fragment lighting supports using different LUTs for each lighting component: Reflectance
31 * R, G, and B channels, distribution function for specular components 0 and 1, fresnel factor,
32 * and spotlight attenuation. Furthermore, which LUTs are used for each channel (or whether a
33 * channel is enabled at all) is specified by various pre-defined lighting configurations. With
34 * configurations that require more LUTs, more cycles are required on HW to perform lighting
35 * computations.
36 */
37 enum class LightingConfig : u32 {
38 Config0 = 0, ///< Reflect Red, Distribution 0, Spotlight
39 Config1 = 1, ///< Reflect Red, Fresnel, Spotlight
40 Config2 = 2, ///< Reflect Red, Distribution 0/1
41 Config3 = 3, ///< Distribution 0/1, Fresnel
42 Config4 = 4, ///< Reflect Red/Green/Blue, Distribution 0/1, Spotlight
43 Config5 = 5, ///< Reflect Red/Green/Blue, Distribution 0, Fresnel, Spotlight
44 Config6 = 6, ///< Reflect Red, Distribution 0/1, Fresnel, Spotlight
45
46 Config7 = 8, ///< Reflect Red/Green/Blue, Distribution 0/1, Fresnel, Spotlight
47 ///< NOTE: '8' is intentional, '7' does not appear to be a valid configuration
48 };
49
50 /// Selects which lighting components are affected by fresnel
51 enum class LightingFresnelSelector : u32 {
52 None = 0, ///< Fresnel is disabled
53 PrimaryAlpha = 1, ///< Primary (diffuse) lighting alpha is affected by fresnel
54 SecondaryAlpha = 2, ///< Secondary (specular) lighting alpha is affected by fresnel
55 Both =
56 PrimaryAlpha |
57 SecondaryAlpha, ///< Both primary and secondary lighting alphas are affected by fresnel
58 };
59
60 /// Factor used to scale the output of a lighting LUT
61 enum class LightingScale : u32 {
62 Scale1 = 0, ///< Scale is 1x
63 Scale2 = 1, ///< Scale is 2x
64 Scale4 = 2, ///< Scale is 4x
65 Scale8 = 3, ///< Scale is 8x
66
67 Scale1_4 = 6, ///< Scale is 0.25x
68 Scale1_2 = 7, ///< Scale is 0.5x
69 };
70
71 enum class LightingLutInput : u32 {
72 NH = 0, // Cosine of the angle between the normal and half-angle vectors
73 VH = 1, // Cosine of the angle between the view and half-angle vectors
74 NV = 2, // Cosine of the angle between the normal and the view vector
75 LN = 3, // Cosine of the angle between the light and the normal vectors
76 };
77
78 enum class LightingBumpMode : u32 {
79 None = 0,
80 NormalMap = 1,
81 TangentMap = 2,
82 };
83
84 union LightColor {
85 BitField<0, 10, u32> b;
86 BitField<10, 10, u32> g;
87 BitField<20, 10, u32> r;
88
89 Math::Vec3f ToVec3f() const {
90 // These fields are 10 bits wide, however 255 corresponds to 1.0f for each color
91 // component
92 return Math::MakeVec((f32)r / 255.f, (f32)g / 255.f, (f32)b / 255.f);
93 }
94 };
95
96 /// Returns true if the specified lighting sampler is supported by the current Pica lighting
97 /// configuration
98 static bool IsLightingSamplerSupported(LightingConfig config, LightingSampler sampler) {
99 switch (sampler) {
100 case LightingSampler::Distribution0:
101 return (config != LightingConfig::Config1);
102
103 case LightingSampler::Distribution1:
104 return (config != LightingConfig::Config0) && (config != LightingConfig::Config1) &&
105 (config != LightingConfig::Config5);
106
107 case LightingSampler::Fresnel:
108 return (config != LightingConfig::Config0) && (config != LightingConfig::Config2) &&
109 (config != LightingConfig::Config4);
110
111 case LightingSampler::ReflectRed:
112 return (config != LightingConfig::Config3);
113
114 case LightingSampler::ReflectGreen:
115 case LightingSampler::ReflectBlue:
116 return (config == LightingConfig::Config4) || (config == LightingConfig::Config5) ||
117 (config == LightingConfig::Config7);
118 default:
119 UNREACHABLE_MSG("Regs::IsLightingSamplerSupported: Reached "
120 "unreachable section, sampler should be one "
121 "of Distribution0, Distribution1, Fresnel, "
122 "ReflectRed, ReflectGreen or ReflectBlue, instead "
123 "got %i",
124 static_cast<int>(config));
125 }
126 }
127
128 struct LightSrc {
129 LightColor specular_0; // material.specular_0 * light.specular_0
130 LightColor specular_1; // material.specular_1 * light.specular_1
131 LightColor diffuse; // material.diffuse * light.diffuse
132 LightColor ambient; // material.ambient * light.ambient
133
134 // Encoded as 16-bit floating point
135 union {
136 BitField<0, 16, u32> x;
137 BitField<16, 16, u32> y;
138 };
139 union {
140 BitField<0, 16, u32> z;
141 };
142
143 INSERT_PADDING_WORDS(0x3);
144
145 union {
146 BitField<0, 1, u32> directional;
147 BitField<1, 1, u32> two_sided_diffuse; // When disabled, clamp dot-product to 0
148 } config;
149
150 BitField<0, 20, u32> dist_atten_bias;
151 BitField<0, 20, u32> dist_atten_scale;
152
153 INSERT_PADDING_WORDS(0x4);
154 };
155 static_assert(sizeof(LightSrc) == 0x10 * sizeof(u32), "LightSrc structure must be 0x10 words");
156
157 LightSrc light[8];
158 LightColor global_ambient; // Emission + (material.ambient * lighting.ambient)
159 INSERT_PADDING_WORDS(0x1);
160 BitField<0, 3, u32> max_light_index; // Number of enabled lights - 1
161
162 union {
163 BitField<2, 2, LightingFresnelSelector> fresnel_selector;
164 BitField<4, 4, LightingConfig> config;
165 BitField<22, 2, u32> bump_selector; // 0: Texture 0, 1: Texture 1, 2: Texture 2
166 BitField<27, 1, u32> clamp_highlights;
167 BitField<28, 2, LightingBumpMode> bump_mode;
168 BitField<30, 1, u32> disable_bump_renorm;
169 } config0;
170
171 union {
172 BitField<16, 1, u32> disable_lut_d0;
173 BitField<17, 1, u32> disable_lut_d1;
174 BitField<19, 1, u32> disable_lut_fr;
175 BitField<20, 1, u32> disable_lut_rr;
176 BitField<21, 1, u32> disable_lut_rg;
177 BitField<22, 1, u32> disable_lut_rb;
178
179 // Each bit specifies whether distance attenuation should be applied for the corresponding
180 // light.
181 BitField<24, 1, u32> disable_dist_atten_light_0;
182 BitField<25, 1, u32> disable_dist_atten_light_1;
183 BitField<26, 1, u32> disable_dist_atten_light_2;
184 BitField<27, 1, u32> disable_dist_atten_light_3;
185 BitField<28, 1, u32> disable_dist_atten_light_4;
186 BitField<29, 1, u32> disable_dist_atten_light_5;
187 BitField<30, 1, u32> disable_dist_atten_light_6;
188 BitField<31, 1, u32> disable_dist_atten_light_7;
189 } config1;
190
191 bool IsDistAttenDisabled(unsigned index) const {
192 const unsigned disable[] = {
193 config1.disable_dist_atten_light_0, config1.disable_dist_atten_light_1,
194 config1.disable_dist_atten_light_2, config1.disable_dist_atten_light_3,
195 config1.disable_dist_atten_light_4, config1.disable_dist_atten_light_5,
196 config1.disable_dist_atten_light_6, config1.disable_dist_atten_light_7};
197 return disable[index] != 0;
198 }
199
200 union {
201 BitField<0, 8, u32> index; ///< Index at which to set data in the LUT
202 BitField<8, 5, u32> type; ///< Type of LUT for which to set data
203 } lut_config;
204
205 BitField<0, 1, u32> disable;
206 INSERT_PADDING_WORDS(0x1);
207
208 // When data is written to any of these registers, it gets written to the lookup table of the
209 // selected type at the selected index, specified above in the `lut_config` register. With each
210 // write, `lut_config.index` is incremented. It does not matter which of these registers is
211 // written to, the behavior will be the same.
212 u32 lut_data[8];
213
214 // These are used to specify if absolute (abs) value should be used for each LUT index. When
215 // abs mode is disabled, LUT indexes are in the range of (-1.0, 1.0). Otherwise, they are in
216 // the range of (0.0, 1.0).
217 union {
218 BitField<1, 1, u32> disable_d0;
219 BitField<5, 1, u32> disable_d1;
220 BitField<9, 1, u32> disable_sp;
221 BitField<13, 1, u32> disable_fr;
222 BitField<17, 1, u32> disable_rb;
223 BitField<21, 1, u32> disable_rg;
224 BitField<25, 1, u32> disable_rr;
225 } abs_lut_input;
226
227 union {
228 BitField<0, 3, LightingLutInput> d0;
229 BitField<4, 3, LightingLutInput> d1;
230 BitField<8, 3, LightingLutInput> sp;
231 BitField<12, 3, LightingLutInput> fr;
232 BitField<16, 3, LightingLutInput> rb;
233 BitField<20, 3, LightingLutInput> rg;
234 BitField<24, 3, LightingLutInput> rr;
235 } lut_input;
236
237 union {
238 BitField<0, 3, LightingScale> d0;
239 BitField<4, 3, LightingScale> d1;
240 BitField<8, 3, LightingScale> sp;
241 BitField<12, 3, LightingScale> fr;
242 BitField<16, 3, LightingScale> rb;
243 BitField<20, 3, LightingScale> rg;
244 BitField<24, 3, LightingScale> rr;
245
246 static float GetScale(LightingScale scale) {
247 switch (scale) {
248 case LightingScale::Scale1:
249 return 1.0f;
250 case LightingScale::Scale2:
251 return 2.0f;
252 case LightingScale::Scale4:
253 return 4.0f;
254 case LightingScale::Scale8:
255 return 8.0f;
256 case LightingScale::Scale1_4:
257 return 0.25f;
258 case LightingScale::Scale1_2:
259 return 0.5f;
260 }
261 return 0.0f;
262 }
263 } lut_scale;
264
265 INSERT_PADDING_WORDS(0x6);
266
267 union {
268 // There are 8 light enable "slots", corresponding to the total number of lights supported
269 // by Pica. For N enabled lights (specified by register 0x1c2, or 'src_num' above), the
270 // first N slots below will be set to integers within the range of 0-7, corresponding to the
271 // actual light that is enabled for each slot.
272
273 BitField<0, 3, u32> slot_0;
274 BitField<4, 3, u32> slot_1;
275 BitField<8, 3, u32> slot_2;
276 BitField<12, 3, u32> slot_3;
277 BitField<16, 3, u32> slot_4;
278 BitField<20, 3, u32> slot_5;
279 BitField<24, 3, u32> slot_6;
280 BitField<28, 3, u32> slot_7;
281
282 unsigned GetNum(unsigned index) const {
283 const unsigned enable_slots[] = {slot_0, slot_1, slot_2, slot_3,
284 slot_4, slot_5, slot_6, slot_7};
285 return enable_slots[index];
286 }
287 } light_enable;
288
289 INSERT_PADDING_WORDS(0x26);
290};
291
292static_assert(sizeof(LightingRegs) == 0xC0 * sizeof(u32), "LightingRegs struct has incorrect size");
293
294} // namespace Pica
diff --git a/src/video_core/regs_pipeline.h b/src/video_core/regs_pipeline.h
new file mode 100644
index 000000000..0a4ec6e1e
--- /dev/null
+++ b/src/video_core/regs_pipeline.h
@@ -0,0 +1,230 @@
1// Copyright 2017 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8
9#include "common/assert.h"
10#include "common/bit_field.h"
11#include "common/common_funcs.h"
12#include "common/common_types.h"
13
14namespace Pica {
15
16struct PipelineRegs {
17 enum class VertexAttributeFormat : u32 {
18 BYTE = 0,
19 UBYTE = 1,
20 SHORT = 2,
21 FLOAT = 3,
22 };
23
24 struct {
25 BitField<0, 29, u32> base_address;
26
27 PAddr GetPhysicalBaseAddress() const {
28 return base_address * 8;
29 }
30
31 // Descriptor for internal vertex attributes
32 union {
33 BitField<0, 2, VertexAttributeFormat> format0; // size of one element
34 BitField<2, 2, u32> size0; // number of elements minus 1
35 BitField<4, 2, VertexAttributeFormat> format1;
36 BitField<6, 2, u32> size1;
37 BitField<8, 2, VertexAttributeFormat> format2;
38 BitField<10, 2, u32> size2;
39 BitField<12, 2, VertexAttributeFormat> format3;
40 BitField<14, 2, u32> size3;
41 BitField<16, 2, VertexAttributeFormat> format4;
42 BitField<18, 2, u32> size4;
43 BitField<20, 2, VertexAttributeFormat> format5;
44 BitField<22, 2, u32> size5;
45 BitField<24, 2, VertexAttributeFormat> format6;
46 BitField<26, 2, u32> size6;
47 BitField<28, 2, VertexAttributeFormat> format7;
48 BitField<30, 2, u32> size7;
49 };
50
51 union {
52 BitField<0, 2, VertexAttributeFormat> format8;
53 BitField<2, 2, u32> size8;
54 BitField<4, 2, VertexAttributeFormat> format9;
55 BitField<6, 2, u32> size9;
56 BitField<8, 2, VertexAttributeFormat> format10;
57 BitField<10, 2, u32> size10;
58 BitField<12, 2, VertexAttributeFormat> format11;
59 BitField<14, 2, u32> size11;
60
61 BitField<16, 12, u32> attribute_mask;
62
63 // number of total attributes minus 1
64 BitField<28, 4, u32> max_attribute_index;
65 };
66
67 inline VertexAttributeFormat GetFormat(int n) const {
68 VertexAttributeFormat formats[] = {format0, format1, format2, format3,
69 format4, format5, format6, format7,
70 format8, format9, format10, format11};
71 return formats[n];
72 }
73
74 inline int GetNumElements(int n) const {
75 u32 sizes[] = {size0, size1, size2, size3, size4, size5,
76 size6, size7, size8, size9, size10, size11};
77 return (int)sizes[n] + 1;
78 }
79
80 inline int GetElementSizeInBytes(int n) const {
81 return (GetFormat(n) == VertexAttributeFormat::FLOAT)
82 ? 4
83 : (GetFormat(n) == VertexAttributeFormat::SHORT) ? 2 : 1;
84 }
85
86 inline int GetStride(int n) const {
87 return GetNumElements(n) * GetElementSizeInBytes(n);
88 }
89
90 inline bool IsDefaultAttribute(int id) const {
91 return (id >= 12) || (attribute_mask & (1ULL << id)) != 0;
92 }
93
94 inline int GetNumTotalAttributes() const {
95 return (int)max_attribute_index + 1;
96 }
97
98 // Attribute loaders map the source vertex data to input attributes
99 // This e.g. allows to load different attributes from different memory locations
100 struct {
101 // Source attribute data offset from the base address
102 u32 data_offset;
103
104 union {
105 BitField<0, 4, u32> comp0;
106 BitField<4, 4, u32> comp1;
107 BitField<8, 4, u32> comp2;
108 BitField<12, 4, u32> comp3;
109 BitField<16, 4, u32> comp4;
110 BitField<20, 4, u32> comp5;
111 BitField<24, 4, u32> comp6;
112 BitField<28, 4, u32> comp7;
113 };
114
115 union {
116 BitField<0, 4, u32> comp8;
117 BitField<4, 4, u32> comp9;
118 BitField<8, 4, u32> comp10;
119 BitField<12, 4, u32> comp11;
120
121 // bytes for a single vertex in this loader
122 BitField<16, 8, u32> byte_count;
123
124 BitField<28, 4, u32> component_count;
125 };
126
127 inline int GetComponent(int n) const {
128 u32 components[] = {comp0, comp1, comp2, comp3, comp4, comp5,
129 comp6, comp7, comp8, comp9, comp10, comp11};
130 return (int)components[n];
131 }
132 } attribute_loaders[12];
133 } vertex_attributes;
134
135 struct {
136 enum IndexFormat : u32 {
137 BYTE = 0,
138 SHORT = 1,
139 };
140
141 union {
142 BitField<0, 31, u32> offset; // relative to base attribute address
143 BitField<31, 1, IndexFormat> format;
144 };
145 } index_array;
146
147 // Number of vertices to render
148 u32 num_vertices;
149
150 INSERT_PADDING_WORDS(0x1);
151
152 // The index of the first vertex to render
153 u32 vertex_offset;
154
155 INSERT_PADDING_WORDS(0x3);
156
157 // These two trigger rendering of triangles
158 u32 trigger_draw;
159 u32 trigger_draw_indexed;
160
161 INSERT_PADDING_WORDS(0x2);
162
163 // These registers are used to setup the default "fall-back" vertex shader attributes
164 struct {
165 // Index of the current default attribute
166 u32 index;
167
168 // Writing to these registers sets the "current" default attribute.
169 u32 set_value[3];
170 } vs_default_attributes_setup;
171
172 INSERT_PADDING_WORDS(0x2);
173
174 struct {
175 // There are two channels that can be used to configure the next command buffer, which can
176 // be then executed by writing to the "trigger" registers. There are two reasons why a game
177 // might use this feature:
178 // 1) With this, an arbitrary number of additional command buffers may be executed in
179 // sequence without requiring any intervention of the CPU after the initial one is
180 // kicked off.
181 // 2) Games can configure these registers to provide a command list subroutine mechanism.
182
183 BitField<0, 20, u32> size[2]; ///< Size (in bytes / 8) of each channel's command buffer
184 BitField<0, 28, u32> addr[2]; ///< Physical address / 8 of each channel's command buffer
185 u32 trigger[2]; ///< Triggers execution of the channel's command buffer when written to
186
187 unsigned GetSize(unsigned index) const {
188 ASSERT(index < 2);
189 return 8 * size[index];
190 }
191
192 PAddr GetPhysicalAddress(unsigned index) const {
193 ASSERT(index < 2);
194 return (PAddr)(8 * addr[index]);
195 }
196 } command_buffer;
197
198 INSERT_PADDING_WORDS(4);
199
200 /// Number of input attributes to the vertex shader minus 1
201 BitField<0, 4, u32> max_input_attrib_index;
202
203 INSERT_PADDING_WORDS(2);
204
205 enum class GPUMode : u32 {
206 Drawing = 0,
207 Configuring = 1,
208 };
209
210 GPUMode gpu_mode;
211
212 INSERT_PADDING_WORDS(0x18);
213
214 enum class TriangleTopology : u32 {
215 List = 0,
216 Strip = 1,
217 Fan = 2,
218 Shader = 3, // Programmable setup unit implemented in a geometry shader
219 };
220
221 BitField<8, 2, TriangleTopology> triangle_topology;
222
223 u32 restart_primitive;
224
225 INSERT_PADDING_WORDS(0x20);
226};
227
228static_assert(sizeof(PipelineRegs) == 0x80 * sizeof(u32), "PipelineRegs struct has incorrect size");
229
230} // namespace Pica
diff --git a/src/video_core/regs_rasterizer.h b/src/video_core/regs_rasterizer.h
new file mode 100644
index 000000000..a471a3b38
--- /dev/null
+++ b/src/video_core/regs_rasterizer.h
@@ -0,0 +1,129 @@
1// Copyright 2017 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8
9#include "common/bit_field.h"
10#include "common/common_funcs.h"
11#include "common/common_types.h"
12
13namespace Pica {
14
15struct RasterizerRegs {
16 enum class CullMode : u32 {
17 // Select which polygons are considered to be "frontfacing".
18 KeepAll = 0,
19 KeepClockWise = 1,
20 KeepCounterClockWise = 2,
21 // TODO: What does the third value imply?
22 };
23
24 union {
25 BitField<0, 2, CullMode> cull_mode;
26 };
27
28 BitField<0, 24, u32> viewport_size_x;
29
30 INSERT_PADDING_WORDS(0x1);
31
32 BitField<0, 24, u32> viewport_size_y;
33
34 INSERT_PADDING_WORDS(0x9);
35
36 BitField<0, 24, u32> viewport_depth_range; // float24
37 BitField<0, 24, u32> viewport_depth_near_plane; // float24
38
39 BitField<0, 3, u32> vs_output_total;
40
41 union VSOutputAttributes {
42 // Maps components of output vertex attributes to semantics
43 enum Semantic : u32 {
44 POSITION_X = 0,
45 POSITION_Y = 1,
46 POSITION_Z = 2,
47 POSITION_W = 3,
48
49 QUATERNION_X = 4,
50 QUATERNION_Y = 5,
51 QUATERNION_Z = 6,
52 QUATERNION_W = 7,
53
54 COLOR_R = 8,
55 COLOR_G = 9,
56 COLOR_B = 10,
57 COLOR_A = 11,
58
59 TEXCOORD0_U = 12,
60 TEXCOORD0_V = 13,
61 TEXCOORD1_U = 14,
62 TEXCOORD1_V = 15,
63
64 TEXCOORD0_W = 16,
65
66 VIEW_X = 18,
67 VIEW_Y = 19,
68 VIEW_Z = 20,
69
70 TEXCOORD2_U = 22,
71 TEXCOORD2_V = 23,
72
73 INVALID = 31,
74 };
75
76 BitField<0, 5, Semantic> map_x;
77 BitField<8, 5, Semantic> map_y;
78 BitField<16, 5, Semantic> map_z;
79 BitField<24, 5, Semantic> map_w;
80 } vs_output_attributes[7];
81
82 INSERT_PADDING_WORDS(0xe);
83
84 enum class ScissorMode : u32 {
85 Disabled = 0,
86 Exclude = 1, // Exclude pixels inside the scissor box
87
88 Include = 3 // Exclude pixels outside the scissor box
89 };
90
91 struct {
92 BitField<0, 2, ScissorMode> mode;
93
94 union {
95 BitField<0, 16, u32> x1;
96 BitField<16, 16, u32> y1;
97 };
98
99 union {
100 BitField<0, 16, u32> x2;
101 BitField<16, 16, u32> y2;
102 };
103 } scissor_test;
104
105 union {
106 BitField<0, 10, s32> x;
107 BitField<16, 10, s32> y;
108 } viewport_corner;
109
110 INSERT_PADDING_WORDS(0x1);
111
112 // TODO: early depth
113 INSERT_PADDING_WORDS(0x1);
114
115 INSERT_PADDING_WORDS(0x2);
116
117 enum DepthBuffering : u32 {
118 WBuffering = 0,
119 ZBuffering = 1,
120 };
121 BitField<0, 1, DepthBuffering> depthmap_enable;
122
123 INSERT_PADDING_WORDS(0x12);
124};
125
126static_assert(sizeof(RasterizerRegs) == 0x40 * sizeof(u32),
127 "RasterizerRegs struct has incorrect size");
128
129} // namespace Pica
diff --git a/src/video_core/regs_shader.h b/src/video_core/regs_shader.h
new file mode 100644
index 000000000..ddb1ee451
--- /dev/null
+++ b/src/video_core/regs_shader.h
@@ -0,0 +1,104 @@
1// Copyright 2017 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8
9#include "common/bit_field.h"
10#include "common/common_funcs.h"
11#include "common/common_types.h"
12
13namespace Pica {
14
15struct ShaderRegs {
16 BitField<0, 16, u32> bool_uniforms;
17
18 union {
19 BitField<0, 8, u32> x;
20 BitField<8, 8, u32> y;
21 BitField<16, 8, u32> z;
22 BitField<24, 8, u32> w;
23 } int_uniforms[4];
24
25 INSERT_PADDING_WORDS(0x4);
26
27 union {
28 // Number of input attributes to shader unit - 1
29 BitField<0, 4, u32> max_input_attribute_index;
30 };
31
32 // Offset to shader program entry point (in words)
33 BitField<0, 16, u32> main_offset;
34
35 /// Maps input attributes to registers. 4-bits per attribute, specifying a register index
36 u32 input_attribute_to_register_map_low;
37 u32 input_attribute_to_register_map_high;
38
39 unsigned int GetRegisterForAttribute(unsigned int attribute_index) const {
40 u64 map = ((u64)input_attribute_to_register_map_high << 32) |
41 (u64)input_attribute_to_register_map_low;
42 return (map >> (attribute_index * 4)) & 0b1111;
43 }
44
45 BitField<0, 16, u32> output_mask;
46
47 // 0x28E, CODETRANSFER_END
48 INSERT_PADDING_WORDS(0x2);
49
50 struct {
51 enum Format : u32 {
52 FLOAT24 = 0,
53 FLOAT32 = 1,
54 };
55
56 bool IsFloat32() const {
57 return format == FLOAT32;
58 }
59
60 union {
61 // Index of the next uniform to write to
62 // TODO: ctrulib uses 8 bits for this, however that seems to yield lots of invalid
63 // indices
64 // TODO: Maybe the uppermost index is for the geometry shader? Investigate!
65 BitField<0, 7, u32> index;
66
67 BitField<31, 1, Format> format;
68 };
69
70 // Writing to these registers sets the current uniform.
71 u32 set_value[8];
72
73 } uniform_setup;
74
75 INSERT_PADDING_WORDS(0x2);
76
77 struct {
78 // Offset of the next instruction to write code to.
79 // Incremented with each instruction write.
80 u32 offset;
81
82 // Writing to these registers sets the "current" word in the shader program.
83 u32 set_word[8];
84 } program;
85
86 INSERT_PADDING_WORDS(0x1);
87
88 // This register group is used to load an internal table of swizzling patterns,
89 // which are indexed by each shader instruction to specify vector component swizzling.
90 struct {
91 // Offset of the next swizzle pattern to write code to.
92 // Incremented with each instruction write.
93 u32 offset;
94
95 // Writing to these registers sets the current swizzle pattern in the table.
96 u32 set_word[8];
97 } swizzle_patterns;
98
99 INSERT_PADDING_WORDS(0x2);
100};
101
102static_assert(sizeof(ShaderRegs) == 0x30 * sizeof(u32), "ShaderRegs struct has incorrect size");
103
104} // namespace Pica
diff --git a/src/video_core/regs_texturing.h b/src/video_core/regs_texturing.h
new file mode 100644
index 000000000..be8bc6826
--- /dev/null
+++ b/src/video_core/regs_texturing.h
@@ -0,0 +1,328 @@
1// Copyright 2017 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8
9#include "common/assert.h"
10#include "common/bit_field.h"
11#include "common/common_funcs.h"
12#include "common/common_types.h"
13
14namespace Pica {
15
16struct TexturingRegs {
17 struct TextureConfig {
18 enum TextureType : u32 {
19 Texture2D = 0,
20 TextureCube = 1,
21 Shadow2D = 2,
22 Projection2D = 3,
23 ShadowCube = 4,
24 Disabled = 5,
25 };
26
27 enum WrapMode : u32 {
28 ClampToEdge = 0,
29 ClampToBorder = 1,
30 Repeat = 2,
31 MirroredRepeat = 3,
32 };
33
34 enum TextureFilter : u32 {
35 Nearest = 0,
36 Linear = 1,
37 };
38
39 union {
40 u32 raw;
41 BitField<0, 8, u32> r;
42 BitField<8, 8, u32> g;
43 BitField<16, 8, u32> b;
44 BitField<24, 8, u32> a;
45 } border_color;
46
47 union {
48 BitField<0, 16, u32> height;
49 BitField<16, 16, u32> width;
50 };
51
52 union {
53 BitField<1, 1, TextureFilter> mag_filter;
54 BitField<2, 1, TextureFilter> min_filter;
55 BitField<8, 2, WrapMode> wrap_t;
56 BitField<12, 2, WrapMode> wrap_s;
57 BitField<28, 2, TextureType>
58 type; ///< @note Only valid for texture 0 according to 3DBrew.
59 };
60
61 INSERT_PADDING_WORDS(0x1);
62
63 u32 address;
64
65 PAddr GetPhysicalAddress() const {
66 return address * 8;
67 }
68
69 // texture1 and texture2 store the texture format directly after the address
70 // whereas texture0 inserts some additional flags inbetween.
71 // Hence, we store the format separately so that all other parameters can be described
72 // in a single structure.
73 };
74
75 enum class TextureFormat : u32 {
76 RGBA8 = 0,
77 RGB8 = 1,
78 RGB5A1 = 2,
79 RGB565 = 3,
80 RGBA4 = 4,
81 IA8 = 5,
82 RG8 = 6, ///< @note Also called HILO8 in 3DBrew.
83 I8 = 7,
84 A8 = 8,
85 IA4 = 9,
86 I4 = 10,
87 A4 = 11,
88 ETC1 = 12, // compressed
89 ETC1A4 = 13, // compressed
90 };
91
92 static unsigned NibblesPerPixel(TextureFormat format) {
93 switch (format) {
94 case TextureFormat::RGBA8:
95 return 8;
96
97 case TextureFormat::RGB8:
98 return 6;
99
100 case TextureFormat::RGB5A1:
101 case TextureFormat::RGB565:
102 case TextureFormat::RGBA4:
103 case TextureFormat::IA8:
104 case TextureFormat::RG8:
105 return 4;
106
107 case TextureFormat::I4:
108 case TextureFormat::A4:
109 return 1;
110
111 case TextureFormat::I8:
112 case TextureFormat::A8:
113 case TextureFormat::IA4:
114
115 default: // placeholder for yet unknown formats
116 UNIMPLEMENTED();
117 return 0;
118 }
119 }
120
121 union {
122 BitField<0, 1, u32> texture0_enable;
123 BitField<1, 1, u32> texture1_enable;
124 BitField<2, 1, u32> texture2_enable;
125 };
126 TextureConfig texture0;
127 INSERT_PADDING_WORDS(0x8);
128 BitField<0, 4, TextureFormat> texture0_format;
129 BitField<0, 1, u32> fragment_lighting_enable;
130 INSERT_PADDING_WORDS(0x1);
131 TextureConfig texture1;
132 BitField<0, 4, TextureFormat> texture1_format;
133 INSERT_PADDING_WORDS(0x2);
134 TextureConfig texture2;
135 BitField<0, 4, TextureFormat> texture2_format;
136 INSERT_PADDING_WORDS(0x21);
137
138 struct FullTextureConfig {
139 const bool enabled;
140 const TextureConfig config;
141 const TextureFormat format;
142 };
143 const std::array<FullTextureConfig, 3> GetTextures() const {
144 return {{
145 {texture0_enable.ToBool(), texture0, texture0_format},
146 {texture1_enable.ToBool(), texture1, texture1_format},
147 {texture2_enable.ToBool(), texture2, texture2_format},
148 }};
149 }
150
151 // 0xc0-0xff: Texture Combiner (akin to glTexEnv)
152 struct TevStageConfig {
153 enum class Source : u32 {
154 PrimaryColor = 0x0,
155 PrimaryFragmentColor = 0x1,
156 SecondaryFragmentColor = 0x2,
157
158 Texture0 = 0x3,
159 Texture1 = 0x4,
160 Texture2 = 0x5,
161 Texture3 = 0x6,
162
163 PreviousBuffer = 0xd,
164 Constant = 0xe,
165 Previous = 0xf,
166 };
167
168 enum class ColorModifier : u32 {
169 SourceColor = 0x0,
170 OneMinusSourceColor = 0x1,
171 SourceAlpha = 0x2,
172 OneMinusSourceAlpha = 0x3,
173 SourceRed = 0x4,
174 OneMinusSourceRed = 0x5,
175
176 SourceGreen = 0x8,
177 OneMinusSourceGreen = 0x9,
178
179 SourceBlue = 0xc,
180 OneMinusSourceBlue = 0xd,
181 };
182
183 enum class AlphaModifier : u32 {
184 SourceAlpha = 0x0,
185 OneMinusSourceAlpha = 0x1,
186 SourceRed = 0x2,
187 OneMinusSourceRed = 0x3,
188 SourceGreen = 0x4,
189 OneMinusSourceGreen = 0x5,
190 SourceBlue = 0x6,
191 OneMinusSourceBlue = 0x7,
192 };
193
194 enum class Operation : u32 {
195 Replace = 0,
196 Modulate = 1,
197 Add = 2,
198 AddSigned = 3,
199 Lerp = 4,
200 Subtract = 5,
201 Dot3_RGB = 6,
202
203 MultiplyThenAdd = 8,
204 AddThenMultiply = 9,
205 };
206
207 union {
208 u32 sources_raw;
209 BitField<0, 4, Source> color_source1;
210 BitField<4, 4, Source> color_source2;
211 BitField<8, 4, Source> color_source3;
212 BitField<16, 4, Source> alpha_source1;
213 BitField<20, 4, Source> alpha_source2;
214 BitField<24, 4, Source> alpha_source3;
215 };
216
217 union {
218 u32 modifiers_raw;
219 BitField<0, 4, ColorModifier> color_modifier1;
220 BitField<4, 4, ColorModifier> color_modifier2;
221 BitField<8, 4, ColorModifier> color_modifier3;
222 BitField<12, 3, AlphaModifier> alpha_modifier1;
223 BitField<16, 3, AlphaModifier> alpha_modifier2;
224 BitField<20, 3, AlphaModifier> alpha_modifier3;
225 };
226
227 union {
228 u32 ops_raw;
229 BitField<0, 4, Operation> color_op;
230 BitField<16, 4, Operation> alpha_op;
231 };
232
233 union {
234 u32 const_color;
235 BitField<0, 8, u32> const_r;
236 BitField<8, 8, u32> const_g;
237 BitField<16, 8, u32> const_b;
238 BitField<24, 8, u32> const_a;
239 };
240
241 union {
242 u32 scales_raw;
243 BitField<0, 2, u32> color_scale;
244 BitField<16, 2, u32> alpha_scale;
245 };
246
247 inline unsigned GetColorMultiplier() const {
248 return (color_scale < 3) ? (1 << color_scale) : 1;
249 }
250
251 inline unsigned GetAlphaMultiplier() const {
252 return (alpha_scale < 3) ? (1 << alpha_scale) : 1;
253 }
254 };
255
256 TevStageConfig tev_stage0;
257 INSERT_PADDING_WORDS(0x3);
258 TevStageConfig tev_stage1;
259 INSERT_PADDING_WORDS(0x3);
260 TevStageConfig tev_stage2;
261 INSERT_PADDING_WORDS(0x3);
262 TevStageConfig tev_stage3;
263 INSERT_PADDING_WORDS(0x3);
264
265 enum class FogMode : u32 {
266 None = 0,
267 Fog = 5,
268 Gas = 7,
269 };
270
271 union {
272 BitField<0, 3, FogMode> fog_mode;
273 BitField<16, 1, u32> fog_flip;
274
275 union {
276 // Tev stages 0-3 write their output to the combiner buffer if the corresponding bit in
277 // these masks are set
278 BitField<8, 4, u32> update_mask_rgb;
279 BitField<12, 4, u32> update_mask_a;
280
281 bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const {
282 return (stage_index < 4) && (update_mask_rgb & (1 << stage_index));
283 }
284
285 bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const {
286 return (stage_index < 4) && (update_mask_a & (1 << stage_index));
287 }
288 } tev_combiner_buffer_input;
289 };
290
291 union {
292 u32 raw;
293 BitField<0, 8, u32> r;
294 BitField<8, 8, u32> g;
295 BitField<16, 8, u32> b;
296 } fog_color;
297
298 INSERT_PADDING_WORDS(0x4);
299
300 BitField<0, 16, u32> fog_lut_offset;
301
302 INSERT_PADDING_WORDS(0x1);
303
304 u32 fog_lut_data[8];
305
306 TevStageConfig tev_stage4;
307 INSERT_PADDING_WORDS(0x3);
308 TevStageConfig tev_stage5;
309
310 union {
311 u32 raw;
312 BitField<0, 8, u32> r;
313 BitField<8, 8, u32> g;
314 BitField<16, 8, u32> b;
315 BitField<24, 8, u32> a;
316 } tev_combiner_buffer_color;
317
318 INSERT_PADDING_WORDS(0x2);
319
320 const std::array<TevStageConfig, 6> GetTevStages() const {
321 return {{tev_stage0, tev_stage1, tev_stage2, tev_stage3, tev_stage4, tev_stage5}};
322 };
323};
324
325static_assert(sizeof(TexturingRegs) == 0x80 * sizeof(u32),
326 "TexturingRegs struct has incorrect size");
327
328} // namespace Pica
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp
index fd38175b3..f6ece5c4b 100644
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -6,7 +6,7 @@
6#include <memory> 6#include <memory>
7#include "video_core/renderer_base.h" 7#include "video_core/renderer_base.h"
8#include "video_core/renderer_opengl/gl_rasterizer.h" 8#include "video_core/renderer_opengl/gl_rasterizer.h"
9#include "video_core/swrasterizer.h" 9#include "video_core/swrasterizer/swrasterizer.h"
10#include "video_core/video_core.h" 10#include "video_core/video_core.h"
11 11
12void RendererBase::RefreshRasterizerSetting() { 12void RendererBase::RefreshRasterizerSetting() {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 5a306a5c8..de1d5eba7 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -14,8 +14,10 @@
14#include "common/microprofile.h" 14#include "common/microprofile.h"
15#include "common/vector_math.h" 15#include "common/vector_math.h"
16#include "core/hw/gpu.h" 16#include "core/hw/gpu.h"
17#include "video_core/pica.h"
18#include "video_core/pica_state.h" 17#include "video_core/pica_state.h"
18#include "video_core/regs_framebuffer.h"
19#include "video_core/regs_rasterizer.h"
20#include "video_core/regs_texturing.h"
19#include "video_core/renderer_opengl/gl_rasterizer.h" 21#include "video_core/renderer_opengl/gl_rasterizer.h"
20#include "video_core/renderer_opengl/gl_shader_gen.h" 22#include "video_core/renderer_opengl/gl_shader_gen.h"
21#include "video_core/renderer_opengl/gl_shader_util.h" 23#include "video_core/renderer_opengl/gl_shader_util.h"
@@ -26,16 +28,6 @@ MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
26MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); 28MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255));
27MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); 29MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
28 30
29static bool IsPassThroughTevStage(const Pica::Regs::TevStageConfig& stage) {
30 return (stage.color_op == Pica::Regs::TevStageConfig::Operation::Replace &&
31 stage.alpha_op == Pica::Regs::TevStageConfig::Operation::Replace &&
32 stage.color_source1 == Pica::Regs::TevStageConfig::Source::Previous &&
33 stage.alpha_source1 == Pica::Regs::TevStageConfig::Source::Previous &&
34 stage.color_modifier1 == Pica::Regs::TevStageConfig::ColorModifier::SourceColor &&
35 stage.alpha_modifier1 == Pica::Regs::TevStageConfig::AlphaModifier::SourceAlpha &&
36 stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1);
37}
38
39RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { 31RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
40 // Create sampler objects 32 // Create sampler objects
41 for (size_t i = 0; i < texture_samplers.size(); ++i) { 33 for (size_t i = 0; i < texture_samplers.size(); ++i) {
@@ -181,7 +173,7 @@ void RasterizerOpenGL::DrawTriangles() {
181 CachedSurface* depth_surface; 173 CachedSurface* depth_surface;
182 MathUtil::Rectangle<int> rect; 174 MathUtil::Rectangle<int> rect;
183 std::tie(color_surface, depth_surface, rect) = 175 std::tie(color_surface, depth_surface, rect) =
184 res_cache.GetFramebufferSurfaces(regs.framebuffer); 176 res_cache.GetFramebufferSurfaces(regs.framebuffer.framebuffer);
185 177
186 state.draw.draw_framebuffer = framebuffer.handle; 178 state.draw.draw_framebuffer = framebuffer.handle;
187 state.Apply(); 179 state.Apply();
@@ -190,20 +182,24 @@ void RasterizerOpenGL::DrawTriangles() {
190 color_surface != nullptr ? color_surface->texture.handle : 0, 0); 182 color_surface != nullptr ? color_surface->texture.handle : 0, 0);
191 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, 183 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
192 depth_surface != nullptr ? depth_surface->texture.handle : 0, 0); 184 depth_surface != nullptr ? depth_surface->texture.handle : 0, 0);
193 bool has_stencil = regs.framebuffer.depth_format == Pica::Regs::DepthFormat::D24S8; 185 bool has_stencil =
186 regs.framebuffer.framebuffer.depth_format == Pica::FramebufferRegs::DepthFormat::D24S8;
194 glFramebufferTexture2D( 187 glFramebufferTexture2D(
195 GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 188 GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
196 (has_stencil && depth_surface != nullptr) ? depth_surface->texture.handle : 0, 0); 189 (has_stencil && depth_surface != nullptr) ? depth_surface->texture.handle : 0, 0);
197 190
198 // Sync the viewport 191 // Sync the viewport
199 // These registers hold half-width and half-height, so must be multiplied by 2 192 // These registers hold half-width and half-height, so must be multiplied by 2
200 GLsizei viewport_width = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_x).ToFloat32() * 2; 193 GLsizei viewport_width =
201 GLsizei viewport_height = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_y).ToFloat32() * 2; 194 (GLsizei)Pica::float24::FromRaw(regs.rasterizer.viewport_size_x).ToFloat32() * 2;
195 GLsizei viewport_height =
196 (GLsizei)Pica::float24::FromRaw(regs.rasterizer.viewport_size_y).ToFloat32() * 2;
202 197
203 glViewport((GLint)(rect.left + regs.viewport_corner.x * color_surface->res_scale_width), 198 glViewport(
204 (GLint)(rect.bottom + regs.viewport_corner.y * color_surface->res_scale_height), 199 (GLint)(rect.left + regs.rasterizer.viewport_corner.x * color_surface->res_scale_width),
205 (GLsizei)(viewport_width * color_surface->res_scale_width), 200 (GLint)(rect.bottom + regs.rasterizer.viewport_corner.y * color_surface->res_scale_height),
206 (GLsizei)(viewport_height * color_surface->res_scale_height)); 201 (GLsizei)(viewport_width * color_surface->res_scale_width),
202 (GLsizei)(viewport_height * color_surface->res_scale_height));
207 203
208 if (uniform_block_data.data.framebuffer_scale[0] != color_surface->res_scale_width || 204 if (uniform_block_data.data.framebuffer_scale[0] != color_surface->res_scale_width ||
209 uniform_block_data.data.framebuffer_scale[1] != color_surface->res_scale_height) { 205 uniform_block_data.data.framebuffer_scale[1] != color_surface->res_scale_height) {
@@ -215,16 +211,16 @@ void RasterizerOpenGL::DrawTriangles() {
215 211
216 // Scissor checks are window-, not viewport-relative, which means that if the cached texture 212 // Scissor checks are window-, not viewport-relative, which means that if the cached texture
217 // sub-rect changes, the scissor bounds also need to be updated. 213 // sub-rect changes, the scissor bounds also need to be updated.
218 GLint scissor_x1 = 214 GLint scissor_x1 = static_cast<GLint>(
219 static_cast<GLint>(rect.left + regs.scissor_test.x1 * color_surface->res_scale_width); 215 rect.left + regs.rasterizer.scissor_test.x1 * color_surface->res_scale_width);
220 GLint scissor_y1 = 216 GLint scissor_y1 = static_cast<GLint>(
221 static_cast<GLint>(rect.bottom + regs.scissor_test.y1 * color_surface->res_scale_height); 217 rect.bottom + regs.rasterizer.scissor_test.y1 * color_surface->res_scale_height);
222 // x2, y2 have +1 added to cover the entire pixel area, otherwise you might get cracks when 218 // x2, y2 have +1 added to cover the entire pixel area, otherwise you might get cracks when
223 // scaling or doing multisampling. 219 // scaling or doing multisampling.
224 GLint scissor_x2 = 220 GLint scissor_x2 = static_cast<GLint>(
225 static_cast<GLint>(rect.left + (regs.scissor_test.x2 + 1) * color_surface->res_scale_width); 221 rect.left + (regs.rasterizer.scissor_test.x2 + 1) * color_surface->res_scale_width);
226 GLint scissor_y2 = static_cast<GLint>( 222 GLint scissor_y2 = static_cast<GLint>(
227 rect.bottom + (regs.scissor_test.y2 + 1) * color_surface->res_scale_height); 223 rect.bottom + (regs.rasterizer.scissor_test.y2 + 1) * color_surface->res_scale_height);
228 224
229 if (uniform_block_data.data.scissor_x1 != scissor_x1 || 225 if (uniform_block_data.data.scissor_x1 != scissor_x1 ||
230 uniform_block_data.data.scissor_x2 != scissor_x2 || 226 uniform_block_data.data.scissor_x2 != scissor_x2 ||
@@ -239,7 +235,7 @@ void RasterizerOpenGL::DrawTriangles() {
239 } 235 }
240 236
241 // Sync and bind the texture surfaces 237 // Sync and bind the texture surfaces
242 const auto pica_textures = regs.GetTextures(); 238 const auto pica_textures = regs.texturing.GetTextures();
243 for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) { 239 for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) {
244 const auto& texture = pica_textures[texture_index]; 240 const auto& texture = pica_textures[texture_index];
245 241
@@ -316,69 +312,69 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
316 312
317 switch (id) { 313 switch (id) {
318 // Culling 314 // Culling
319 case PICA_REG_INDEX(cull_mode): 315 case PICA_REG_INDEX(rasterizer.cull_mode):
320 SyncCullMode(); 316 SyncCullMode();
321 break; 317 break;
322 318
323 // Depth modifiers 319 // Depth modifiers
324 case PICA_REG_INDEX(viewport_depth_range): 320 case PICA_REG_INDEX(rasterizer.viewport_depth_range):
325 SyncDepthScale(); 321 SyncDepthScale();
326 break; 322 break;
327 case PICA_REG_INDEX(viewport_depth_near_plane): 323 case PICA_REG_INDEX(rasterizer.viewport_depth_near_plane):
328 SyncDepthOffset(); 324 SyncDepthOffset();
329 break; 325 break;
330 326
331 // Depth buffering 327 // Depth buffering
332 case PICA_REG_INDEX(depthmap_enable): 328 case PICA_REG_INDEX(rasterizer.depthmap_enable):
333 shader_dirty = true; 329 shader_dirty = true;
334 break; 330 break;
335 331
336 // Blending 332 // Blending
337 case PICA_REG_INDEX(output_merger.alphablend_enable): 333 case PICA_REG_INDEX(framebuffer.output_merger.alphablend_enable):
338 SyncBlendEnabled(); 334 SyncBlendEnabled();
339 break; 335 break;
340 case PICA_REG_INDEX(output_merger.alpha_blending): 336 case PICA_REG_INDEX(framebuffer.output_merger.alpha_blending):
341 SyncBlendFuncs(); 337 SyncBlendFuncs();
342 break; 338 break;
343 case PICA_REG_INDEX(output_merger.blend_const): 339 case PICA_REG_INDEX(framebuffer.output_merger.blend_const):
344 SyncBlendColor(); 340 SyncBlendColor();
345 break; 341 break;
346 342
347 // Fog state 343 // Fog state
348 case PICA_REG_INDEX(fog_color): 344 case PICA_REG_INDEX(texturing.fog_color):
349 SyncFogColor(); 345 SyncFogColor();
350 break; 346 break;
351 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[0], 0xe8): 347 case PICA_REG_INDEX_WORKAROUND(texturing.fog_lut_data[0], 0xe8):
352 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[1], 0xe9): 348 case PICA_REG_INDEX_WORKAROUND(texturing.fog_lut_data[1], 0xe9):
353 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[2], 0xea): 349 case PICA_REG_INDEX_WORKAROUND(texturing.fog_lut_data[2], 0xea):
354 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[3], 0xeb): 350 case PICA_REG_INDEX_WORKAROUND(texturing.fog_lut_data[3], 0xeb):
355 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[4], 0xec): 351 case PICA_REG_INDEX_WORKAROUND(texturing.fog_lut_data[4], 0xec):
356 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[5], 0xed): 352 case PICA_REG_INDEX_WORKAROUND(texturing.fog_lut_data[5], 0xed):
357 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[6], 0xee): 353 case PICA_REG_INDEX_WORKAROUND(texturing.fog_lut_data[6], 0xee):
358 case PICA_REG_INDEX_WORKAROUND(fog_lut_data[7], 0xef): 354 case PICA_REG_INDEX_WORKAROUND(texturing.fog_lut_data[7], 0xef):
359 uniform_block_data.fog_lut_dirty = true; 355 uniform_block_data.fog_lut_dirty = true;
360 break; 356 break;
361 357
362 // Alpha test 358 // Alpha test
363 case PICA_REG_INDEX(output_merger.alpha_test): 359 case PICA_REG_INDEX(framebuffer.output_merger.alpha_test):
364 SyncAlphaTest(); 360 SyncAlphaTest();
365 shader_dirty = true; 361 shader_dirty = true;
366 break; 362 break;
367 363
368 // Sync GL stencil test + stencil write mask 364 // Sync GL stencil test + stencil write mask
369 // (Pica stencil test function register also contains a stencil write mask) 365 // (Pica stencil test function register also contains a stencil write mask)
370 case PICA_REG_INDEX(output_merger.stencil_test.raw_func): 366 case PICA_REG_INDEX(framebuffer.output_merger.stencil_test.raw_func):
371 SyncStencilTest(); 367 SyncStencilTest();
372 SyncStencilWriteMask(); 368 SyncStencilWriteMask();
373 break; 369 break;
374 case PICA_REG_INDEX(output_merger.stencil_test.raw_op): 370 case PICA_REG_INDEX(framebuffer.output_merger.stencil_test.raw_op):
375 case PICA_REG_INDEX(framebuffer.depth_format): 371 case PICA_REG_INDEX(framebuffer.framebuffer.depth_format):
376 SyncStencilTest(); 372 SyncStencilTest();
377 break; 373 break;
378 374
379 // Sync GL depth test + depth and color write mask 375 // Sync GL depth test + depth and color write mask
380 // (Pica depth test function register also contains a depth and color write mask) 376 // (Pica depth test function register also contains a depth and color write mask)
381 case PICA_REG_INDEX(output_merger.depth_test_enable): 377 case PICA_REG_INDEX(framebuffer.output_merger.depth_test_enable):
382 SyncDepthTest(); 378 SyncDepthTest();
383 SyncDepthWriteMask(); 379 SyncDepthWriteMask();
384 SyncColorWriteMask(); 380 SyncColorWriteMask();
@@ -386,88 +382,88 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
386 382
387 // Sync GL depth and stencil write mask 383 // Sync GL depth and stencil write mask
388 // (This is a dedicated combined depth / stencil write-enable register) 384 // (This is a dedicated combined depth / stencil write-enable register)
389 case PICA_REG_INDEX(framebuffer.allow_depth_stencil_write): 385 case PICA_REG_INDEX(framebuffer.framebuffer.allow_depth_stencil_write):
390 SyncDepthWriteMask(); 386 SyncDepthWriteMask();
391 SyncStencilWriteMask(); 387 SyncStencilWriteMask();
392 break; 388 break;
393 389
394 // Sync GL color write mask 390 // Sync GL color write mask
395 // (This is a dedicated color write-enable register) 391 // (This is a dedicated color write-enable register)
396 case PICA_REG_INDEX(framebuffer.allow_color_write): 392 case PICA_REG_INDEX(framebuffer.framebuffer.allow_color_write):
397 SyncColorWriteMask(); 393 SyncColorWriteMask();
398 break; 394 break;
399 395
400 // Scissor test 396 // Scissor test
401 case PICA_REG_INDEX(scissor_test.mode): 397 case PICA_REG_INDEX(rasterizer.scissor_test.mode):
402 shader_dirty = true; 398 shader_dirty = true;
403 break; 399 break;
404 400
405 // Logic op 401 // Logic op
406 case PICA_REG_INDEX(output_merger.logic_op): 402 case PICA_REG_INDEX(framebuffer.output_merger.logic_op):
407 SyncLogicOp(); 403 SyncLogicOp();
408 break; 404 break;
409 405
410 // Texture 0 type 406 // Texture 0 type
411 case PICA_REG_INDEX(texture0.type): 407 case PICA_REG_INDEX(texturing.texture0.type):
412 shader_dirty = true; 408 shader_dirty = true;
413 break; 409 break;
414 410
415 // TEV stages 411 // TEV stages
416 // (This also syncs fog_mode and fog_flip which are part of tev_combiner_buffer_input) 412 // (This also syncs fog_mode and fog_flip which are part of tev_combiner_buffer_input)
417 case PICA_REG_INDEX(tev_stage0.color_source1): 413 case PICA_REG_INDEX(texturing.tev_stage0.color_source1):
418 case PICA_REG_INDEX(tev_stage0.color_modifier1): 414 case PICA_REG_INDEX(texturing.tev_stage0.color_modifier1):
419 case PICA_REG_INDEX(tev_stage0.color_op): 415 case PICA_REG_INDEX(texturing.tev_stage0.color_op):
420 case PICA_REG_INDEX(tev_stage0.color_scale): 416 case PICA_REG_INDEX(texturing.tev_stage0.color_scale):
421 case PICA_REG_INDEX(tev_stage1.color_source1): 417 case PICA_REG_INDEX(texturing.tev_stage1.color_source1):
422 case PICA_REG_INDEX(tev_stage1.color_modifier1): 418 case PICA_REG_INDEX(texturing.tev_stage1.color_modifier1):
423 case PICA_REG_INDEX(tev_stage1.color_op): 419 case PICA_REG_INDEX(texturing.tev_stage1.color_op):
424 case PICA_REG_INDEX(tev_stage1.color_scale): 420 case PICA_REG_INDEX(texturing.tev_stage1.color_scale):
425 case PICA_REG_INDEX(tev_stage2.color_source1): 421 case PICA_REG_INDEX(texturing.tev_stage2.color_source1):
426 case PICA_REG_INDEX(tev_stage2.color_modifier1): 422 case PICA_REG_INDEX(texturing.tev_stage2.color_modifier1):
427 case PICA_REG_INDEX(tev_stage2.color_op): 423 case PICA_REG_INDEX(texturing.tev_stage2.color_op):
428 case PICA_REG_INDEX(tev_stage2.color_scale): 424 case PICA_REG_INDEX(texturing.tev_stage2.color_scale):
429 case PICA_REG_INDEX(tev_stage3.color_source1): 425 case PICA_REG_INDEX(texturing.tev_stage3.color_source1):
430 case PICA_REG_INDEX(tev_stage3.color_modifier1): 426 case PICA_REG_INDEX(texturing.tev_stage3.color_modifier1):
431 case PICA_REG_INDEX(tev_stage3.color_op): 427 case PICA_REG_INDEX(texturing.tev_stage3.color_op):
432 case PICA_REG_INDEX(tev_stage3.color_scale): 428 case PICA_REG_INDEX(texturing.tev_stage3.color_scale):
433 case PICA_REG_INDEX(tev_stage4.color_source1): 429 case PICA_REG_INDEX(texturing.tev_stage4.color_source1):
434 case PICA_REG_INDEX(tev_stage4.color_modifier1): 430 case PICA_REG_INDEX(texturing.tev_stage4.color_modifier1):
435 case PICA_REG_INDEX(tev_stage4.color_op): 431 case PICA_REG_INDEX(texturing.tev_stage4.color_op):
436 case PICA_REG_INDEX(tev_stage4.color_scale): 432 case PICA_REG_INDEX(texturing.tev_stage4.color_scale):
437 case PICA_REG_INDEX(tev_stage5.color_source1): 433 case PICA_REG_INDEX(texturing.tev_stage5.color_source1):
438 case PICA_REG_INDEX(tev_stage5.color_modifier1): 434 case PICA_REG_INDEX(texturing.tev_stage5.color_modifier1):
439 case PICA_REG_INDEX(tev_stage5.color_op): 435 case PICA_REG_INDEX(texturing.tev_stage5.color_op):
440 case PICA_REG_INDEX(tev_stage5.color_scale): 436 case PICA_REG_INDEX(texturing.tev_stage5.color_scale):
441 case PICA_REG_INDEX(tev_combiner_buffer_input): 437 case PICA_REG_INDEX(texturing.tev_combiner_buffer_input):
442 shader_dirty = true; 438 shader_dirty = true;
443 break; 439 break;
444 case PICA_REG_INDEX(tev_stage0.const_r): 440 case PICA_REG_INDEX(texturing.tev_stage0.const_r):
445 SyncTevConstColor(0, regs.tev_stage0); 441 SyncTevConstColor(0, regs.texturing.tev_stage0);
446 break; 442 break;
447 case PICA_REG_INDEX(tev_stage1.const_r): 443 case PICA_REG_INDEX(texturing.tev_stage1.const_r):
448 SyncTevConstColor(1, regs.tev_stage1); 444 SyncTevConstColor(1, regs.texturing.tev_stage1);
449 break; 445 break;
450 case PICA_REG_INDEX(tev_stage2.const_r): 446 case PICA_REG_INDEX(texturing.tev_stage2.const_r):
451 SyncTevConstColor(2, regs.tev_stage2); 447 SyncTevConstColor(2, regs.texturing.tev_stage2);
452 break; 448 break;
453 case PICA_REG_INDEX(tev_stage3.const_r): 449 case PICA_REG_INDEX(texturing.tev_stage3.const_r):
454 SyncTevConstColor(3, regs.tev_stage3); 450 SyncTevConstColor(3, regs.texturing.tev_stage3);
455 break; 451 break;
456 case PICA_REG_INDEX(tev_stage4.const_r): 452 case PICA_REG_INDEX(texturing.tev_stage4.const_r):
457 SyncTevConstColor(4, regs.tev_stage4); 453 SyncTevConstColor(4, regs.texturing.tev_stage4);
458 break; 454 break;
459 case PICA_REG_INDEX(tev_stage5.const_r): 455 case PICA_REG_INDEX(texturing.tev_stage5.const_r):
460 SyncTevConstColor(5, regs.tev_stage5); 456 SyncTevConstColor(5, regs.texturing.tev_stage5);
461 break; 457 break;
462 458
463 // TEV combiner buffer color 459 // TEV combiner buffer color
464 case PICA_REG_INDEX(tev_combiner_buffer_color): 460 case PICA_REG_INDEX(texturing.tev_combiner_buffer_color):
465 SyncCombinerColor(); 461 SyncCombinerColor();
466 break; 462 break;
467 463
468 // Fragment lighting switches 464 // Fragment lighting switches
469 case PICA_REG_INDEX(lighting.disable): 465 case PICA_REG_INDEX(lighting.disable):
470 case PICA_REG_INDEX(lighting.num_lights): 466 case PICA_REG_INDEX(lighting.max_light_index):
471 case PICA_REG_INDEX(lighting.config0): 467 case PICA_REG_INDEX(lighting.config0):
472 case PICA_REG_INDEX(lighting.config1): 468 case PICA_REG_INDEX(lighting.config1):
473 case PICA_REG_INDEX(lighting.abs_lut_input): 469 case PICA_REG_INDEX(lighting.abs_lut_input):
@@ -716,8 +712,6 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(PAddr addr, u32 size) {
716 712
717bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { 713bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) {
718 MICROPROFILE_SCOPE(OpenGL_Blits); 714 MICROPROFILE_SCOPE(OpenGL_Blits);
719 using PixelFormat = CachedSurface::PixelFormat;
720 using SurfaceType = CachedSurface::SurfaceType;
721 715
722 CachedSurface src_params; 716 CachedSurface src_params;
723 src_params.addr = config.GetPhysicalInputAddress(); 717 src_params.addr = config.GetPhysicalInputAddress();
@@ -748,7 +742,8 @@ bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransfe
748 742
749 // Adjust the source rectangle to take into account parts of the input lines being cropped 743 // Adjust the source rectangle to take into account parts of the input lines being cropped
750 if (config.input_width > config.output_width) { 744 if (config.input_width > config.output_width) {
751 src_rect.right -= (config.input_width - config.output_width) * src_surface->res_scale_width; 745 src_rect.right -= static_cast<int>((config.input_width - config.output_width) *
746 src_surface->res_scale_width);
752 } 747 }
753 748
754 // Require destination surface to have same resolution scale as source to preserve scaling 749 // Require destination surface to have same resolution scale as source to preserve scaling
@@ -977,7 +972,9 @@ void RasterizerOpenGL::SamplerInfo::Create() {
977 // Other attributes have correct defaults 972 // Other attributes have correct defaults
978} 973}
979 974
980void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Pica::Regs::TextureConfig& config) { 975void RasterizerOpenGL::SamplerInfo::SyncWithConfig(
976 const Pica::TexturingRegs::TextureConfig& config) {
977
981 GLuint s = sampler.handle; 978 GLuint s = sampler.handle;
982 979
983 if (mag_filter != config.mag_filter) { 980 if (mag_filter != config.mag_filter) {
@@ -1074,67 +1071,69 @@ void RasterizerOpenGL::SetShader() {
1074 1071
1075 current_shader = shader_cache.emplace(config, std::move(shader)).first->second.get(); 1072 current_shader = shader_cache.emplace(config, std::move(shader)).first->second.get();
1076 1073
1077 unsigned int block_index = 1074 GLuint block_index = glGetUniformBlockIndex(current_shader->shader.handle, "shader_data");
1078 glGetUniformBlockIndex(current_shader->shader.handle, "shader_data"); 1075 if (block_index != GL_INVALID_INDEX) {
1079 GLint block_size; 1076 GLint block_size;
1080 glGetActiveUniformBlockiv(current_shader->shader.handle, block_index, 1077 glGetActiveUniformBlockiv(current_shader->shader.handle, block_index,
1081 GL_UNIFORM_BLOCK_DATA_SIZE, &block_size); 1078 GL_UNIFORM_BLOCK_DATA_SIZE, &block_size);
1082 ASSERT_MSG(block_size == sizeof(UniformData), 1079 ASSERT_MSG(block_size == sizeof(UniformData),
1083 "Uniform block size did not match! Got %d, expected %zu", 1080 "Uniform block size did not match! Got %d, expected %zu",
1084 static_cast<int>(block_size), sizeof(UniformData)); 1081 static_cast<int>(block_size), sizeof(UniformData));
1085 glUniformBlockBinding(current_shader->shader.handle, block_index, 0); 1082 glUniformBlockBinding(current_shader->shader.handle, block_index, 0);
1086 1083
1087 // Update uniforms 1084 // Update uniforms
1088 SyncDepthScale(); 1085 SyncDepthScale();
1089 SyncDepthOffset(); 1086 SyncDepthOffset();
1090 SyncAlphaTest(); 1087 SyncAlphaTest();
1091 SyncCombinerColor(); 1088 SyncCombinerColor();
1092 auto& tev_stages = Pica::g_state.regs.GetTevStages(); 1089 auto& tev_stages = Pica::g_state.regs.texturing.GetTevStages();
1093 for (int index = 0; index < tev_stages.size(); ++index) 1090 for (int index = 0; index < tev_stages.size(); ++index)
1094 SyncTevConstColor(index, tev_stages[index]); 1091 SyncTevConstColor(index, tev_stages[index]);
1092
1093 SyncGlobalAmbient();
1094 for (int light_index = 0; light_index < 8; light_index++) {
1095 SyncLightSpecular0(light_index);
1096 SyncLightSpecular1(light_index);
1097 SyncLightDiffuse(light_index);
1098 SyncLightAmbient(light_index);
1099 SyncLightPosition(light_index);
1100 SyncLightDistanceAttenuationBias(light_index);
1101 SyncLightDistanceAttenuationScale(light_index);
1102 }
1095 1103
1096 SyncGlobalAmbient(); 1104 SyncFogColor();
1097 for (int light_index = 0; light_index < 8; light_index++) {
1098 SyncLightSpecular0(light_index);
1099 SyncLightSpecular1(light_index);
1100 SyncLightDiffuse(light_index);
1101 SyncLightAmbient(light_index);
1102 SyncLightPosition(light_index);
1103 SyncLightDistanceAttenuationBias(light_index);
1104 SyncLightDistanceAttenuationScale(light_index);
1105 } 1105 }
1106
1107 SyncFogColor();
1108 } 1106 }
1109} 1107}
1110 1108
1111void RasterizerOpenGL::SyncCullMode() { 1109void RasterizerOpenGL::SyncCullMode() {
1112 const auto& regs = Pica::g_state.regs; 1110 const auto& regs = Pica::g_state.regs;
1113 1111
1114 switch (regs.cull_mode) { 1112 switch (regs.rasterizer.cull_mode) {
1115 case Pica::Regs::CullMode::KeepAll: 1113 case Pica::RasterizerRegs::CullMode::KeepAll:
1116 state.cull.enabled = false; 1114 state.cull.enabled = false;
1117 break; 1115 break;
1118 1116
1119 case Pica::Regs::CullMode::KeepClockWise: 1117 case Pica::RasterizerRegs::CullMode::KeepClockWise:
1120 state.cull.enabled = true; 1118 state.cull.enabled = true;
1121 state.cull.front_face = GL_CW; 1119 state.cull.front_face = GL_CW;
1122 break; 1120 break;
1123 1121
1124 case Pica::Regs::CullMode::KeepCounterClockWise: 1122 case Pica::RasterizerRegs::CullMode::KeepCounterClockWise:
1125 state.cull.enabled = true; 1123 state.cull.enabled = true;
1126 state.cull.front_face = GL_CCW; 1124 state.cull.front_face = GL_CCW;
1127 break; 1125 break;
1128 1126
1129 default: 1127 default:
1130 LOG_CRITICAL(Render_OpenGL, "Unknown cull mode %d", regs.cull_mode.Value()); 1128 LOG_CRITICAL(Render_OpenGL, "Unknown cull mode %d", regs.rasterizer.cull_mode.Value());
1131 UNIMPLEMENTED(); 1129 UNIMPLEMENTED();
1132 break; 1130 break;
1133 } 1131 }
1134} 1132}
1135 1133
1136void RasterizerOpenGL::SyncDepthScale() { 1134void RasterizerOpenGL::SyncDepthScale() {
1137 float depth_scale = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_range).ToFloat32(); 1135 float depth_scale =
1136 Pica::float24::FromRaw(Pica::g_state.regs.rasterizer.viewport_depth_range).ToFloat32();
1138 if (depth_scale != uniform_block_data.data.depth_scale) { 1137 if (depth_scale != uniform_block_data.data.depth_scale) {
1139 uniform_block_data.data.depth_scale = depth_scale; 1138 uniform_block_data.data.depth_scale = depth_scale;
1140 uniform_block_data.dirty = true; 1139 uniform_block_data.dirty = true;
@@ -1143,7 +1142,7 @@ void RasterizerOpenGL::SyncDepthScale() {
1143 1142
1144void RasterizerOpenGL::SyncDepthOffset() { 1143void RasterizerOpenGL::SyncDepthOffset() {
1145 float depth_offset = 1144 float depth_offset =
1146 Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_near_plane).ToFloat32(); 1145 Pica::float24::FromRaw(Pica::g_state.regs.rasterizer.viewport_depth_near_plane).ToFloat32();
1147 if (depth_offset != uniform_block_data.data.depth_offset) { 1146 if (depth_offset != uniform_block_data.data.depth_offset) {
1148 uniform_block_data.data.depth_offset = depth_offset; 1147 uniform_block_data.data.depth_offset = depth_offset;
1149 uniform_block_data.dirty = true; 1148 uniform_block_data.dirty = true;
@@ -1151,25 +1150,28 @@ void RasterizerOpenGL::SyncDepthOffset() {
1151} 1150}
1152 1151
1153void RasterizerOpenGL::SyncBlendEnabled() { 1152void RasterizerOpenGL::SyncBlendEnabled() {
1154 state.blend.enabled = (Pica::g_state.regs.output_merger.alphablend_enable == 1); 1153 state.blend.enabled = (Pica::g_state.regs.framebuffer.output_merger.alphablend_enable == 1);
1155} 1154}
1156 1155
1157void RasterizerOpenGL::SyncBlendFuncs() { 1156void RasterizerOpenGL::SyncBlendFuncs() {
1158 const auto& regs = Pica::g_state.regs; 1157 const auto& regs = Pica::g_state.regs;
1159 state.blend.rgb_equation = 1158 state.blend.rgb_equation =
1160 PicaToGL::BlendEquation(regs.output_merger.alpha_blending.blend_equation_rgb); 1159 PicaToGL::BlendEquation(regs.framebuffer.output_merger.alpha_blending.blend_equation_rgb);
1161 state.blend.a_equation = 1160 state.blend.a_equation =
1162 PicaToGL::BlendEquation(regs.output_merger.alpha_blending.blend_equation_a); 1161 PicaToGL::BlendEquation(regs.framebuffer.output_merger.alpha_blending.blend_equation_a);
1163 state.blend.src_rgb_func = 1162 state.blend.src_rgb_func =
1164 PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_rgb); 1163 PicaToGL::BlendFunc(regs.framebuffer.output_merger.alpha_blending.factor_source_rgb);
1165 state.blend.dst_rgb_func = 1164 state.blend.dst_rgb_func =
1166 PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_dest_rgb); 1165 PicaToGL::BlendFunc(regs.framebuffer.output_merger.alpha_blending.factor_dest_rgb);
1167 state.blend.src_a_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_a); 1166 state.blend.src_a_func =
1168 state.blend.dst_a_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_dest_a); 1167 PicaToGL::BlendFunc(regs.framebuffer.output_merger.alpha_blending.factor_source_a);
1168 state.blend.dst_a_func =
1169 PicaToGL::BlendFunc(regs.framebuffer.output_merger.alpha_blending.factor_dest_a);
1169} 1170}
1170 1171
1171void RasterizerOpenGL::SyncBlendColor() { 1172void RasterizerOpenGL::SyncBlendColor() {
1172 auto blend_color = PicaToGL::ColorRGBA8(Pica::g_state.regs.output_merger.blend_const.raw); 1173 auto blend_color =
1174 PicaToGL::ColorRGBA8(Pica::g_state.regs.framebuffer.output_merger.blend_const.raw);
1173 state.blend.color.red = blend_color[0]; 1175 state.blend.color.red = blend_color[0];
1174 state.blend.color.green = blend_color[1]; 1176 state.blend.color.green = blend_color[1];
1175 state.blend.color.blue = blend_color[2]; 1177 state.blend.color.blue = blend_color[2];
@@ -1179,8 +1181,8 @@ void RasterizerOpenGL::SyncBlendColor() {
1179void RasterizerOpenGL::SyncFogColor() { 1181void RasterizerOpenGL::SyncFogColor() {
1180 const auto& regs = Pica::g_state.regs; 1182 const auto& regs = Pica::g_state.regs;
1181 uniform_block_data.data.fog_color = { 1183 uniform_block_data.data.fog_color = {
1182 regs.fog_color.r.Value() / 255.0f, regs.fog_color.g.Value() / 255.0f, 1184 regs.texturing.fog_color.r.Value() / 255.0f, regs.texturing.fog_color.g.Value() / 255.0f,
1183 regs.fog_color.b.Value() / 255.0f, 1185 regs.texturing.fog_color.b.Value() / 255.0f,
1184 }; 1186 };
1185 uniform_block_data.dirty = true; 1187 uniform_block_data.dirty = true;
1186} 1188}
@@ -1201,70 +1203,78 @@ void RasterizerOpenGL::SyncFogLUT() {
1201 1203
1202void RasterizerOpenGL::SyncAlphaTest() { 1204void RasterizerOpenGL::SyncAlphaTest() {
1203 const auto& regs = Pica::g_state.regs; 1205 const auto& regs = Pica::g_state.regs;
1204 if (regs.output_merger.alpha_test.ref != uniform_block_data.data.alphatest_ref) { 1206 if (regs.framebuffer.output_merger.alpha_test.ref != uniform_block_data.data.alphatest_ref) {
1205 uniform_block_data.data.alphatest_ref = regs.output_merger.alpha_test.ref; 1207 uniform_block_data.data.alphatest_ref = regs.framebuffer.output_merger.alpha_test.ref;
1206 uniform_block_data.dirty = true; 1208 uniform_block_data.dirty = true;
1207 } 1209 }
1208} 1210}
1209 1211
1210void RasterizerOpenGL::SyncLogicOp() { 1212void RasterizerOpenGL::SyncLogicOp() {
1211 state.logic_op = PicaToGL::LogicOp(Pica::g_state.regs.output_merger.logic_op); 1213 state.logic_op = PicaToGL::LogicOp(Pica::g_state.regs.framebuffer.output_merger.logic_op);
1212} 1214}
1213 1215
1214void RasterizerOpenGL::SyncColorWriteMask() { 1216void RasterizerOpenGL::SyncColorWriteMask() {
1215 const auto& regs = Pica::g_state.regs; 1217 const auto& regs = Pica::g_state.regs;
1216 1218
1217 auto IsColorWriteEnabled = [&](u32 value) { 1219 auto IsColorWriteEnabled = [&](u32 value) {
1218 return (regs.framebuffer.allow_color_write != 0 && value != 0) ? GL_TRUE : GL_FALSE; 1220 return (regs.framebuffer.framebuffer.allow_color_write != 0 && value != 0) ? GL_TRUE
1221 : GL_FALSE;
1219 }; 1222 };
1220 1223
1221 state.color_mask.red_enabled = IsColorWriteEnabled(regs.output_merger.red_enable); 1224 state.color_mask.red_enabled = IsColorWriteEnabled(regs.framebuffer.output_merger.red_enable);
1222 state.color_mask.green_enabled = IsColorWriteEnabled(regs.output_merger.green_enable); 1225 state.color_mask.green_enabled =
1223 state.color_mask.blue_enabled = IsColorWriteEnabled(regs.output_merger.blue_enable); 1226 IsColorWriteEnabled(regs.framebuffer.output_merger.green_enable);
1224 state.color_mask.alpha_enabled = IsColorWriteEnabled(regs.output_merger.alpha_enable); 1227 state.color_mask.blue_enabled = IsColorWriteEnabled(regs.framebuffer.output_merger.blue_enable);
1228 state.color_mask.alpha_enabled =
1229 IsColorWriteEnabled(regs.framebuffer.output_merger.alpha_enable);
1225} 1230}
1226 1231
1227void RasterizerOpenGL::SyncStencilWriteMask() { 1232void RasterizerOpenGL::SyncStencilWriteMask() {
1228 const auto& regs = Pica::g_state.regs; 1233 const auto& regs = Pica::g_state.regs;
1229 state.stencil.write_mask = (regs.framebuffer.allow_depth_stencil_write != 0) 1234 state.stencil.write_mask =
1230 ? static_cast<GLuint>(regs.output_merger.stencil_test.write_mask) 1235 (regs.framebuffer.framebuffer.allow_depth_stencil_write != 0)
1231 : 0; 1236 ? static_cast<GLuint>(regs.framebuffer.output_merger.stencil_test.write_mask)
1237 : 0;
1232} 1238}
1233 1239
1234void RasterizerOpenGL::SyncDepthWriteMask() { 1240void RasterizerOpenGL::SyncDepthWriteMask() {
1235 const auto& regs = Pica::g_state.regs; 1241 const auto& regs = Pica::g_state.regs;
1236 state.depth.write_mask = 1242 state.depth.write_mask = (regs.framebuffer.framebuffer.allow_depth_stencil_write != 0 &&
1237 (regs.framebuffer.allow_depth_stencil_write != 0 && regs.output_merger.depth_write_enable) 1243 regs.framebuffer.output_merger.depth_write_enable)
1238 ? GL_TRUE 1244 ? GL_TRUE
1239 : GL_FALSE; 1245 : GL_FALSE;
1240} 1246}
1241 1247
1242void RasterizerOpenGL::SyncStencilTest() { 1248void RasterizerOpenGL::SyncStencilTest() {
1243 const auto& regs = Pica::g_state.regs; 1249 const auto& regs = Pica::g_state.regs;
1244 state.stencil.test_enabled = regs.output_merger.stencil_test.enable && 1250 state.stencil.test_enabled =
1245 regs.framebuffer.depth_format == Pica::Regs::DepthFormat::D24S8; 1251 regs.framebuffer.output_merger.stencil_test.enable &&
1246 state.stencil.test_func = PicaToGL::CompareFunc(regs.output_merger.stencil_test.func); 1252 regs.framebuffer.framebuffer.depth_format == Pica::FramebufferRegs::DepthFormat::D24S8;
1247 state.stencil.test_ref = regs.output_merger.stencil_test.reference_value; 1253 state.stencil.test_func =
1248 state.stencil.test_mask = regs.output_merger.stencil_test.input_mask; 1254 PicaToGL::CompareFunc(regs.framebuffer.output_merger.stencil_test.func);
1255 state.stencil.test_ref = regs.framebuffer.output_merger.stencil_test.reference_value;
1256 state.stencil.test_mask = regs.framebuffer.output_merger.stencil_test.input_mask;
1249 state.stencil.action_stencil_fail = 1257 state.stencil.action_stencil_fail =
1250 PicaToGL::StencilOp(regs.output_merger.stencil_test.action_stencil_fail); 1258 PicaToGL::StencilOp(regs.framebuffer.output_merger.stencil_test.action_stencil_fail);
1251 state.stencil.action_depth_fail = 1259 state.stencil.action_depth_fail =
1252 PicaToGL::StencilOp(regs.output_merger.stencil_test.action_depth_fail); 1260 PicaToGL::StencilOp(regs.framebuffer.output_merger.stencil_test.action_depth_fail);
1253 state.stencil.action_depth_pass = 1261 state.stencil.action_depth_pass =
1254 PicaToGL::StencilOp(regs.output_merger.stencil_test.action_depth_pass); 1262 PicaToGL::StencilOp(regs.framebuffer.output_merger.stencil_test.action_depth_pass);
1255} 1263}
1256 1264
1257void RasterizerOpenGL::SyncDepthTest() { 1265void RasterizerOpenGL::SyncDepthTest() {
1258 const auto& regs = Pica::g_state.regs; 1266 const auto& regs = Pica::g_state.regs;
1259 state.depth.test_enabled = 1267 state.depth.test_enabled = regs.framebuffer.output_merger.depth_test_enable == 1 ||
1260 regs.output_merger.depth_test_enable == 1 || regs.output_merger.depth_write_enable == 1; 1268 regs.framebuffer.output_merger.depth_write_enable == 1;
1261 state.depth.test_func = regs.output_merger.depth_test_enable == 1 1269 state.depth.test_func =
1262 ? PicaToGL::CompareFunc(regs.output_merger.depth_test_func) 1270 regs.framebuffer.output_merger.depth_test_enable == 1
1263 : GL_ALWAYS; 1271 ? PicaToGL::CompareFunc(regs.framebuffer.output_merger.depth_test_func)
1272 : GL_ALWAYS;
1264} 1273}
1265 1274
1266void RasterizerOpenGL::SyncCombinerColor() { 1275void RasterizerOpenGL::SyncCombinerColor() {
1267 auto combiner_color = PicaToGL::ColorRGBA8(Pica::g_state.regs.tev_combiner_buffer_color.raw); 1276 auto combiner_color =
1277 PicaToGL::ColorRGBA8(Pica::g_state.regs.texturing.tev_combiner_buffer_color.raw);
1268 if (combiner_color != uniform_block_data.data.tev_combiner_buffer_color) { 1278 if (combiner_color != uniform_block_data.data.tev_combiner_buffer_color) {
1269 uniform_block_data.data.tev_combiner_buffer_color = combiner_color; 1279 uniform_block_data.data.tev_combiner_buffer_color = combiner_color;
1270 uniform_block_data.dirty = true; 1280 uniform_block_data.dirty = true;
@@ -1272,7 +1282,7 @@ void RasterizerOpenGL::SyncCombinerColor() {
1272} 1282}
1273 1283
1274void RasterizerOpenGL::SyncTevConstColor(int stage_index, 1284void RasterizerOpenGL::SyncTevConstColor(int stage_index,
1275 const Pica::Regs::TevStageConfig& tev_stage) { 1285 const Pica::TexturingRegs::TevStageConfig& tev_stage) {
1276 auto const_color = PicaToGL::ColorRGBA8(tev_stage.const_color); 1286 auto const_color = PicaToGL::ColorRGBA8(tev_stage.const_color);
1277 if (const_color != uniform_block_data.data.const_color[stage_index]) { 1287 if (const_color != uniform_block_data.data.const_color[stage_index]) {
1278 uniform_block_data.data.const_color[stage_index] = const_color; 1288 uniform_block_data.data.const_color[stage_index] = const_color;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index e1a9cb361..ecf737438 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -16,10 +16,13 @@
16#include "common/hash.h" 16#include "common/hash.h"
17#include "common/vector_math.h" 17#include "common/vector_math.h"
18#include "core/hw/gpu.h" 18#include "core/hw/gpu.h"
19#include "video_core/pica.h"
20#include "video_core/pica_state.h" 19#include "video_core/pica_state.h"
21#include "video_core/pica_types.h" 20#include "video_core/pica_types.h"
22#include "video_core/rasterizer_interface.h" 21#include "video_core/rasterizer_interface.h"
22#include "video_core/regs_framebuffer.h"
23#include "video_core/regs_lighting.h"
24#include "video_core/regs_rasterizer.h"
25#include "video_core/regs_texturing.h"
23#include "video_core/renderer_opengl/gl_rasterizer_cache.h" 26#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
24#include "video_core/renderer_opengl/gl_resource_manager.h" 27#include "video_core/renderer_opengl/gl_resource_manager.h"
25#include "video_core/renderer_opengl/gl_state.h" 28#include "video_core/renderer_opengl/gl_state.h"
@@ -52,20 +55,20 @@ union PicaShaderConfig {
52 55
53 const auto& regs = Pica::g_state.regs; 56 const auto& regs = Pica::g_state.regs;
54 57
55 state.scissor_test_mode = regs.scissor_test.mode; 58 state.scissor_test_mode = regs.rasterizer.scissor_test.mode;
56 59
57 state.depthmap_enable = regs.depthmap_enable; 60 state.depthmap_enable = regs.rasterizer.depthmap_enable;
58 61
59 state.alpha_test_func = regs.output_merger.alpha_test.enable 62 state.alpha_test_func = regs.framebuffer.output_merger.alpha_test.enable
60 ? regs.output_merger.alpha_test.func.Value() 63 ? regs.framebuffer.output_merger.alpha_test.func.Value()
61 : Pica::Regs::CompareFunc::Always; 64 : Pica::FramebufferRegs::CompareFunc::Always;
62 65
63 state.texture0_type = regs.texture0.type; 66 state.texture0_type = regs.texturing.texture0.type;
64 67
65 // Copy relevant tev stages fields. 68 // Copy relevant tev stages fields.
66 // We don't sync const_color here because of the high variance, it is a 69 // We don't sync const_color here because of the high variance, it is a
67 // shader uniform instead. 70 // shader uniform instead.
68 const auto& tev_stages = regs.GetTevStages(); 71 const auto& tev_stages = regs.texturing.GetTevStages();
69 DEBUG_ASSERT(state.tev_stages.size() == tev_stages.size()); 72 DEBUG_ASSERT(state.tev_stages.size() == tev_stages.size());
70 for (size_t i = 0; i < tev_stages.size(); i++) { 73 for (size_t i = 0; i < tev_stages.size(); i++) {
71 const auto& tev_stage = tev_stages[i]; 74 const auto& tev_stage = tev_stages[i];
@@ -75,16 +78,17 @@ union PicaShaderConfig {
75 state.tev_stages[i].scales_raw = tev_stage.scales_raw; 78 state.tev_stages[i].scales_raw = tev_stage.scales_raw;
76 } 79 }
77 80
78 state.fog_mode = regs.fog_mode; 81 state.fog_mode = regs.texturing.fog_mode;
79 state.fog_flip = regs.fog_flip; 82 state.fog_flip = regs.texturing.fog_flip != 0;
80 83
81 state.combiner_buffer_input = regs.tev_combiner_buffer_input.update_mask_rgb.Value() | 84 state.combiner_buffer_input =
82 regs.tev_combiner_buffer_input.update_mask_a.Value() << 4; 85 regs.texturing.tev_combiner_buffer_input.update_mask_rgb.Value() |
86 regs.texturing.tev_combiner_buffer_input.update_mask_a.Value() << 4;
83 87
84 // Fragment lighting 88 // Fragment lighting
85 89
86 state.lighting.enable = !regs.lighting.disable; 90 state.lighting.enable = !regs.lighting.disable;
87 state.lighting.src_num = regs.lighting.num_lights + 1; 91 state.lighting.src_num = regs.lighting.max_light_index + 1;
88 92
89 for (unsigned light_index = 0; light_index < state.lighting.src_num; ++light_index) { 93 for (unsigned light_index = 0; light_index < state.lighting.src_num; ++light_index) {
90 unsigned num = regs.lighting.light_enable.GetNum(light_index); 94 unsigned num = regs.lighting.light_enable.GetNum(light_index);
@@ -159,8 +163,8 @@ union PicaShaderConfig {
159 u32 modifiers_raw; 163 u32 modifiers_raw;
160 u32 ops_raw; 164 u32 ops_raw;
161 u32 scales_raw; 165 u32 scales_raw;
162 explicit operator Pica::Regs::TevStageConfig() const noexcept { 166 explicit operator Pica::TexturingRegs::TevStageConfig() const noexcept {
163 Pica::Regs::TevStageConfig stage; 167 Pica::TexturingRegs::TevStageConfig stage;
164 stage.sources_raw = sources_raw; 168 stage.sources_raw = sources_raw;
165 stage.modifiers_raw = modifiers_raw; 169 stage.modifiers_raw = modifiers_raw;
166 stage.ops_raw = ops_raw; 170 stage.ops_raw = ops_raw;
@@ -171,14 +175,14 @@ union PicaShaderConfig {
171 }; 175 };
172 176
173 struct State { 177 struct State {
174 Pica::Regs::CompareFunc alpha_test_func; 178 Pica::FramebufferRegs::CompareFunc alpha_test_func;
175 Pica::Regs::ScissorMode scissor_test_mode; 179 Pica::RasterizerRegs::ScissorMode scissor_test_mode;
176 Pica::Regs::TextureConfig::TextureType texture0_type; 180 Pica::TexturingRegs::TextureConfig::TextureType texture0_type;
177 std::array<TevStageConfigRaw, 6> tev_stages; 181 std::array<TevStageConfigRaw, 6> tev_stages;
178 u8 combiner_buffer_input; 182 u8 combiner_buffer_input;
179 183
180 Pica::Regs::DepthBuffering depthmap_enable; 184 Pica::RasterizerRegs::DepthBuffering depthmap_enable;
181 Pica::Regs::FogMode fog_mode; 185 Pica::TexturingRegs::FogMode fog_mode;
182 bool fog_flip; 186 bool fog_flip;
183 187
184 struct { 188 struct {
@@ -191,18 +195,18 @@ union PicaShaderConfig {
191 195
192 bool enable; 196 bool enable;
193 unsigned src_num; 197 unsigned src_num;
194 Pica::Regs::LightingBumpMode bump_mode; 198 Pica::LightingRegs::LightingBumpMode bump_mode;
195 unsigned bump_selector; 199 unsigned bump_selector;
196 bool bump_renorm; 200 bool bump_renorm;
197 bool clamp_highlights; 201 bool clamp_highlights;
198 202
199 Pica::Regs::LightingConfig config; 203 Pica::LightingRegs::LightingConfig config;
200 Pica::Regs::LightingFresnelSelector fresnel_selector; 204 Pica::LightingRegs::LightingFresnelSelector fresnel_selector;
201 205
202 struct { 206 struct {
203 bool enable; 207 bool enable;
204 bool abs_input; 208 bool abs_input;
205 Pica::Regs::LightingLutInput type; 209 Pica::LightingRegs::LightingLutInput type;
206 float scale; 210 float scale;
207 } lut_d0, lut_d1, lut_fr, lut_rr, lut_rg, lut_rb; 211 } lut_d0, lut_d1, lut_fr, lut_rr, lut_rg, lut_rb;
208 } lighting; 212 } lighting;
@@ -251,7 +255,7 @@ public:
251 255
252private: 256private:
253 struct SamplerInfo { 257 struct SamplerInfo {
254 using TextureConfig = Pica::Regs::TextureConfig; 258 using TextureConfig = Pica::TexturingRegs::TextureConfig;
255 259
256 OGLSampler sampler; 260 OGLSampler sampler;
257 261
@@ -398,7 +402,7 @@ private:
398 void SyncCombinerColor(); 402 void SyncCombinerColor();
399 403
400 /// Syncs the TEV constant color to match the PICA register 404 /// Syncs the TEV constant color to match the PICA register
401 void SyncTevConstColor(int tev_index, const Pica::Regs::TevStageConfig& tev_stage); 405 void SyncTevConstColor(int tev_index, const Pica::TexturingRegs::TevStageConfig& tev_stage);
402 406
403 /// Syncs the lighting global ambient color to match the PICA register 407 /// Syncs the lighting global ambient color to match the PICA register
404 void SyncGlobalAmbient(); 408 void SyncGlobalAmbient();
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index ef3b06a7b..0818a87b3 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -17,10 +17,10 @@
17#include "common/vector_math.h" 17#include "common/vector_math.h"
18#include "core/frontend/emu_window.h" 18#include "core/frontend/emu_window.h"
19#include "core/memory.h" 19#include "core/memory.h"
20#include "video_core/debug_utils/debug_utils.h"
21#include "video_core/pica_state.h" 20#include "video_core/pica_state.h"
22#include "video_core/renderer_opengl/gl_rasterizer_cache.h" 21#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
23#include "video_core/renderer_opengl/gl_state.h" 22#include "video_core/renderer_opengl/gl_state.h"
23#include "video_core/texture/texture_decode.h"
24#include "video_core/utils.h" 24#include "video_core/utils.h"
25#include "video_core/video_core.h" 25#include "video_core/video_core.h"
26 26
@@ -172,7 +172,6 @@ bool RasterizerCacheOpenGL::TryBlitSurfaces(CachedSurface* src_surface,
172 const MathUtil::Rectangle<int>& src_rect, 172 const MathUtil::Rectangle<int>& src_rect,
173 CachedSurface* dst_surface, 173 CachedSurface* dst_surface,
174 const MathUtil::Rectangle<int>& dst_rect) { 174 const MathUtil::Rectangle<int>& dst_rect) {
175 using SurfaceType = CachedSurface::SurfaceType;
176 175
177 if (!CachedSurface::CheckFormatsBlittable(src_surface->pixel_format, 176 if (!CachedSurface::CheckFormatsBlittable(src_surface->pixel_format,
178 dst_surface->pixel_format)) { 177 dst_surface->pixel_format)) {
@@ -340,17 +339,16 @@ CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bo
340 339
341 std::vector<Math::Vec4<u8>> tex_buffer(params.width * params.height); 340 std::vector<Math::Vec4<u8>> tex_buffer(params.width * params.height);
342 341
343 Pica::DebugUtils::TextureInfo tex_info; 342 Pica::Texture::TextureInfo tex_info;
344 tex_info.width = params.width; 343 tex_info.width = params.width;
345 tex_info.height = params.height; 344 tex_info.height = params.height;
346 tex_info.stride = 345 tex_info.format = (Pica::TexturingRegs::TextureFormat)params.pixel_format;
347 params.width * CachedSurface::GetFormatBpp(params.pixel_format) / 8; 346 tex_info.SetDefaultStride();
348 tex_info.format = (Pica::Regs::TextureFormat)params.pixel_format;
349 tex_info.physical_address = params.addr; 347 tex_info.physical_address = params.addr;
350 348
351 for (unsigned y = 0; y < params.height; ++y) { 349 for (unsigned y = 0; y < params.height; ++y) {
352 for (unsigned x = 0; x < params.width; ++x) { 350 for (unsigned x = 0; x < params.width; ++x) {
353 tex_buffer[x + params.width * y] = Pica::DebugUtils::LookupTexture( 351 tex_buffer[x + params.width * y] = Pica::Texture::LookupTexture(
354 texture_src_data, x, params.height - 1 - y, tex_info); 352 texture_src_data, x, params.height - 1 - y, tex_info);
355 } 353 }
356 } 354 }
@@ -512,9 +510,10 @@ CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params
512} 510}
513 511
514CachedSurface* RasterizerCacheOpenGL::GetTextureSurface( 512CachedSurface* RasterizerCacheOpenGL::GetTextureSurface(
515 const Pica::Regs::FullTextureConfig& config) { 513 const Pica::TexturingRegs::FullTextureConfig& config) {
516 Pica::DebugUtils::TextureInfo info = 514
517 Pica::DebugUtils::TextureInfo::FromPicaRegister(config.config, config.format); 515 Pica::Texture::TextureInfo info =
516 Pica::Texture::TextureInfo::FromPicaRegister(config.config, config.format);
518 517
519 CachedSurface params; 518 CachedSurface params;
520 params.addr = info.physical_address; 519 params.addr = info.physical_address;
@@ -526,7 +525,9 @@ CachedSurface* RasterizerCacheOpenGL::GetTextureSurface(
526} 525}
527 526
528std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> 527std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>>
529RasterizerCacheOpenGL::GetFramebufferSurfaces(const Pica::Regs::FramebufferConfig& config) { 528RasterizerCacheOpenGL::GetFramebufferSurfaces(
529 const Pica::FramebufferRegs::FramebufferConfig& config) {
530
530 const auto& regs = Pica::g_state.regs; 531 const auto& regs = Pica::g_state.regs;
531 532
532 // Make sur that framebuffers don't overlap if both color and depth are being used 533 // Make sur that framebuffers don't overlap if both color and depth are being used
@@ -538,11 +539,12 @@ RasterizerCacheOpenGL::GetFramebufferSurfaces(const Pica::Regs::FramebufferConfi
538 config.GetColorBufferPhysicalAddress(), 539 config.GetColorBufferPhysicalAddress(),
539 fb_area * GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(config.color_format.Value())), 540 fb_area * GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(config.color_format.Value())),
540 config.GetDepthBufferPhysicalAddress(), 541 config.GetDepthBufferPhysicalAddress(),
541 fb_area * Pica::Regs::BytesPerDepthPixel(config.depth_format)); 542 fb_area * Pica::FramebufferRegs::BytesPerDepthPixel(config.depth_format));
542 bool using_color_fb = config.GetColorBufferPhysicalAddress() != 0; 543 bool using_color_fb = config.GetColorBufferPhysicalAddress() != 0;
543 bool using_depth_fb = config.GetDepthBufferPhysicalAddress() != 0 && 544 bool using_depth_fb =
544 (regs.output_merger.depth_test_enable || 545 config.GetDepthBufferPhysicalAddress() != 0 &&
545 regs.output_merger.depth_write_enable || !framebuffers_overlap); 546 (regs.framebuffer.output_merger.depth_test_enable ||
547 regs.framebuffer.output_merger.depth_write_enable || !framebuffers_overlap);
546 548
547 if (framebuffers_overlap && using_color_fb && using_depth_fb) { 549 if (framebuffers_overlap && using_color_fb && using_depth_fb) {
548 LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; " 550 LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; "
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index b50e8292b..aea20c693 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -8,13 +8,21 @@
8#include <memory> 8#include <memory>
9#include <set> 9#include <set>
10#include <tuple> 10#include <tuple>
11#ifdef __GNUC__
12#pragma GCC diagnostic push
13#pragma GCC diagnostic ignored "-Wunused-local-typedef"
14#endif
11#include <boost/icl/interval_map.hpp> 15#include <boost/icl/interval_map.hpp>
16#ifdef __GNUC__
17#pragma GCC diagnostic pop
18#endif
12#include <glad/glad.h> 19#include <glad/glad.h>
13#include "common/assert.h" 20#include "common/assert.h"
14#include "common/common_funcs.h" 21#include "common/common_funcs.h"
15#include "common/common_types.h" 22#include "common/common_types.h"
16#include "core/hw/gpu.h" 23#include "core/hw/gpu.h"
17#include "video_core/pica.h" 24#include "video_core/regs_framebuffer.h"
25#include "video_core/regs_texturing.h"
18#include "video_core/renderer_opengl/gl_resource_manager.h" 26#include "video_core/renderer_opengl/gl_resource_manager.h"
19 27
20namespace MathUtil { 28namespace MathUtil {
@@ -89,15 +97,15 @@ struct CachedSurface {
89 return bpp_table[(unsigned int)format]; 97 return bpp_table[(unsigned int)format];
90 } 98 }
91 99
92 static PixelFormat PixelFormatFromTextureFormat(Pica::Regs::TextureFormat format) { 100 static PixelFormat PixelFormatFromTextureFormat(Pica::TexturingRegs::TextureFormat format) {
93 return ((unsigned int)format < 14) ? (PixelFormat)format : PixelFormat::Invalid; 101 return ((unsigned int)format < 14) ? (PixelFormat)format : PixelFormat::Invalid;
94 } 102 }
95 103
96 static PixelFormat PixelFormatFromColorFormat(Pica::Regs::ColorFormat format) { 104 static PixelFormat PixelFormatFromColorFormat(Pica::FramebufferRegs::ColorFormat format) {
97 return ((unsigned int)format < 5) ? (PixelFormat)format : PixelFormat::Invalid; 105 return ((unsigned int)format < 5) ? (PixelFormat)format : PixelFormat::Invalid;
98 } 106 }
99 107
100 static PixelFormat PixelFormatFromDepthFormat(Pica::Regs::DepthFormat format) { 108 static PixelFormat PixelFormatFromDepthFormat(Pica::FramebufferRegs::DepthFormat format) {
101 return ((unsigned int)format < 4) ? (PixelFormat)((unsigned int)format + 14) 109 return ((unsigned int)format < 4) ? (PixelFormat)((unsigned int)format + 14)
102 : PixelFormat::Invalid; 110 : PixelFormat::Invalid;
103 } 111 }
@@ -205,12 +213,12 @@ public:
205 bool load_if_create, MathUtil::Rectangle<int>& out_rect); 213 bool load_if_create, MathUtil::Rectangle<int>& out_rect);
206 214
207 /// Gets a surface based on the texture configuration 215 /// Gets a surface based on the texture configuration
208 CachedSurface* GetTextureSurface(const Pica::Regs::FullTextureConfig& config); 216 CachedSurface* GetTextureSurface(const Pica::TexturingRegs::FullTextureConfig& config);
209 217
210 /// Gets the color and depth surfaces and rect (resolution scaled) based on the framebuffer 218 /// Gets the color and depth surfaces and rect (resolution scaled) based on the framebuffer
211 /// configuration 219 /// configuration
212 std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> GetFramebufferSurfaces( 220 std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> GetFramebufferSurfaces(
213 const Pica::Regs::FramebufferConfig& config); 221 const Pica::FramebufferRegs::FramebufferConfig& config);
214 222
215 /// Attempt to get a surface that exactly matches the fill region and format 223 /// Attempt to get a surface that exactly matches the fill region and format
216 CachedSurface* TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config); 224 CachedSurface* TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config);
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 4c4f98ac9..7abdeba05 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -7,13 +7,19 @@
7#include "common/assert.h" 7#include "common/assert.h"
8#include "common/bit_field.h" 8#include "common/bit_field.h"
9#include "common/logging/log.h" 9#include "common/logging/log.h"
10#include "video_core/pica.h" 10#include "video_core/regs_framebuffer.h"
11#include "video_core/regs_lighting.h"
12#include "video_core/regs_rasterizer.h"
13#include "video_core/regs_texturing.h"
11#include "video_core/renderer_opengl/gl_rasterizer.h" 14#include "video_core/renderer_opengl/gl_rasterizer.h"
12#include "video_core/renderer_opengl/gl_shader_gen.h" 15#include "video_core/renderer_opengl/gl_shader_gen.h"
13#include "video_core/renderer_opengl/gl_shader_util.h" 16#include "video_core/renderer_opengl/gl_shader_util.h"
14 17
15using Pica::Regs; 18using Pica::FramebufferRegs;
16using TevStageConfig = Regs::TevStageConfig; 19using Pica::LightingRegs;
20using Pica::RasterizerRegs;
21using Pica::TexturingRegs;
22using TevStageConfig = TexturingRegs::TevStageConfig;
17 23
18namespace GLShader { 24namespace GLShader {
19 25
@@ -46,10 +52,10 @@ static void AppendSource(std::string& out, const PicaShaderConfig& config,
46 case Source::Texture0: 52 case Source::Texture0:
47 // Only unit 0 respects the texturing type (according to 3DBrew) 53 // Only unit 0 respects the texturing type (according to 3DBrew)
48 switch (state.texture0_type) { 54 switch (state.texture0_type) {
49 case Pica::Regs::TextureConfig::Texture2D: 55 case TexturingRegs::TextureConfig::Texture2D:
50 out += "texture(tex[0], texcoord[0])"; 56 out += "texture(tex[0], texcoord[0])";
51 break; 57 break;
52 case Pica::Regs::TextureConfig::Projection2D: 58 case TexturingRegs::TextureConfig::Projection2D:
53 out += "textureProj(tex[0], vec3(texcoord[0], texcoord0_w))"; 59 out += "textureProj(tex[0], vec3(texcoord[0], texcoord0_w))";
54 break; 60 break;
55 default: 61 default:
@@ -276,8 +282,8 @@ static void AppendAlphaCombiner(std::string& out, TevStageConfig::Operation oper
276} 282}
277 283
278/// Writes the if-statement condition used to evaluate alpha testing 284/// Writes the if-statement condition used to evaluate alpha testing
279static void AppendAlphaTestCondition(std::string& out, Regs::CompareFunc func) { 285static void AppendAlphaTestCondition(std::string& out, FramebufferRegs::CompareFunc func) {
280 using CompareFunc = Regs::CompareFunc; 286 using CompareFunc = FramebufferRegs::CompareFunc;
281 switch (func) { 287 switch (func) {
282 case CompareFunc::Never: 288 case CompareFunc::Never:
283 out += "true"; 289 out += "true";
@@ -307,7 +313,7 @@ static void AppendAlphaTestCondition(std::string& out, Regs::CompareFunc func) {
307/// Writes the code to emulate the specified TEV stage 313/// Writes the code to emulate the specified TEV stage
308static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsigned index) { 314static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsigned index) {
309 const auto stage = 315 const auto stage =
310 static_cast<const Pica::Regs::TevStageConfig>(config.state.tev_stages[index]); 316 static_cast<const TexturingRegs::TevStageConfig>(config.state.tev_stages[index]);
311 if (!IsPassThroughTevStage(stage)) { 317 if (!IsPassThroughTevStage(stage)) {
312 std::string index_name = std::to_string(index); 318 std::string index_name = std::to_string(index);
313 319
@@ -364,7 +370,7 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
364 "vec3 refl_value = vec3(0.0);\n"; 370 "vec3 refl_value = vec3(0.0);\n";
365 371
366 // Compute fragment normals 372 // Compute fragment normals
367 if (lighting.bump_mode == Pica::Regs::LightingBumpMode::NormalMap) { 373 if (lighting.bump_mode == LightingRegs::LightingBumpMode::NormalMap) {
368 // Bump mapping is enabled using a normal map, read perturbation vector from the selected 374 // Bump mapping is enabled using a normal map, read perturbation vector from the selected
369 // texture 375 // texture
370 std::string bump_selector = std::to_string(lighting.bump_selector); 376 std::string bump_selector = std::to_string(lighting.bump_selector);
@@ -378,7 +384,7 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
378 "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))"; 384 "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))";
379 out += "surface_normal.z = sqrt(max(" + val + ", 0.0));\n"; 385 out += "surface_normal.z = sqrt(max(" + val + ", 0.0));\n";
380 } 386 }
381 } else if (lighting.bump_mode == Pica::Regs::LightingBumpMode::TangentMap) { 387 } else if (lighting.bump_mode == LightingRegs::LightingBumpMode::TangentMap) {
382 // Bump mapping is enabled using a tangent map 388 // Bump mapping is enabled using a tangent map
383 LOG_CRITICAL(HW_GPU, "unimplemented bump mapping mode (tangent mapping)"); 389 LOG_CRITICAL(HW_GPU, "unimplemented bump mapping mode (tangent mapping)");
384 UNIMPLEMENTED(); 390 UNIMPLEMENTED();
@@ -392,23 +398,24 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
392 out += "vec3 normal = normalize(quaternion_rotate(normquat, surface_normal));\n"; 398 out += "vec3 normal = normalize(quaternion_rotate(normquat, surface_normal));\n";
393 399
394 // Gets the index into the specified lookup table for specular lighting 400 // Gets the index into the specified lookup table for specular lighting
395 auto GetLutIndex = [&lighting](unsigned light_num, Regs::LightingLutInput input, bool abs) { 401 auto GetLutIndex = [&lighting](unsigned light_num, LightingRegs::LightingLutInput input,
402 bool abs) {
396 const std::string half_angle = "normalize(normalize(view) + light_vector)"; 403 const std::string half_angle = "normalize(normalize(view) + light_vector)";
397 std::string index; 404 std::string index;
398 switch (input) { 405 switch (input) {
399 case Regs::LightingLutInput::NH: 406 case LightingRegs::LightingLutInput::NH:
400 index = "dot(normal, " + half_angle + ")"; 407 index = "dot(normal, " + half_angle + ")";
401 break; 408 break;
402 409
403 case Regs::LightingLutInput::VH: 410 case LightingRegs::LightingLutInput::VH:
404 index = std::string("dot(normalize(view), " + half_angle + ")"); 411 index = std::string("dot(normalize(view), " + half_angle + ")");
405 break; 412 break;
406 413
407 case Regs::LightingLutInput::NV: 414 case LightingRegs::LightingLutInput::NV:
408 index = std::string("dot(normal, normalize(view))"); 415 index = std::string("dot(normal, normalize(view))");
409 break; 416 break;
410 417
411 case Regs::LightingLutInput::LN: 418 case LightingRegs::LightingLutInput::LN:
412 index = std::string("dot(light_vector, normal)"); 419 index = std::string("dot(light_vector, normal)");
413 break; 420 break;
414 421
@@ -432,7 +439,7 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
432 }; 439 };
433 440
434 // Gets the lighting lookup table value given the specified sampler and index 441 // Gets the lighting lookup table value given the specified sampler and index
435 auto GetLutValue = [](Regs::LightingSampler sampler, std::string lut_index) { 442 auto GetLutValue = [](LightingRegs::LightingSampler sampler, std::string lut_index) {
436 return std::string("texture(lut[" + std::to_string((unsigned)sampler / 4) + "], " + 443 return std::string("texture(lut[" + std::to_string((unsigned)sampler / 4) + "], " +
437 lut_index + ")[" + std::to_string((unsigned)sampler & 3) + "]"); 444 lut_index + ")[" + std::to_string((unsigned)sampler & 3) + "]");
438 }; 445 };
@@ -461,8 +468,8 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
461 light_src + ".position) + " + light_src + ".dist_atten_bias)"; 468 light_src + ".position) + " + light_src + ".dist_atten_bias)";
462 index = "(OFFSET_256 + SCALE_256 * clamp(" + index + ", 0.0, 1.0))"; 469 index = "(OFFSET_256 + SCALE_256 * clamp(" + index + ", 0.0, 1.0))";
463 const unsigned lut_num = 470 const unsigned lut_num =
464 ((unsigned)Regs::LightingSampler::DistanceAttenuation + light_config.num); 471 ((unsigned)LightingRegs::LightingSampler::DistanceAttenuation + light_config.num);
465 dist_atten = GetLutValue((Regs::LightingSampler)lut_num, index); 472 dist_atten = GetLutValue((LightingRegs::LightingSampler)lut_num, index);
466 } 473 }
467 474
468 // If enabled, clamp specular component if lighting result is negative 475 // If enabled, clamp specular component if lighting result is negative
@@ -472,24 +479,24 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
472 // Specular 0 component 479 // Specular 0 component
473 std::string d0_lut_value = "1.0"; 480 std::string d0_lut_value = "1.0";
474 if (lighting.lut_d0.enable && 481 if (lighting.lut_d0.enable &&
475 Pica::Regs::IsLightingSamplerSupported(lighting.config, 482 LightingRegs::IsLightingSamplerSupported(
476 Pica::Regs::LightingSampler::Distribution0)) { 483 lighting.config, LightingRegs::LightingSampler::Distribution0)) {
477 // Lookup specular "distribution 0" LUT value 484 // Lookup specular "distribution 0" LUT value
478 std::string index = 485 std::string index =
479 GetLutIndex(light_config.num, lighting.lut_d0.type, lighting.lut_d0.abs_input); 486 GetLutIndex(light_config.num, lighting.lut_d0.type, lighting.lut_d0.abs_input);
480 d0_lut_value = "(" + std::to_string(lighting.lut_d0.scale) + " * " + 487 d0_lut_value = "(" + std::to_string(lighting.lut_d0.scale) + " * " +
481 GetLutValue(Regs::LightingSampler::Distribution0, index) + ")"; 488 GetLutValue(LightingRegs::LightingSampler::Distribution0, index) + ")";
482 } 489 }
483 std::string specular_0 = "(" + d0_lut_value + " * " + light_src + ".specular_0)"; 490 std::string specular_0 = "(" + d0_lut_value + " * " + light_src + ".specular_0)";
484 491
485 // If enabled, lookup ReflectRed value, otherwise, 1.0 is used 492 // If enabled, lookup ReflectRed value, otherwise, 1.0 is used
486 if (lighting.lut_rr.enable && 493 if (lighting.lut_rr.enable &&
487 Pica::Regs::IsLightingSamplerSupported(lighting.config, 494 LightingRegs::IsLightingSamplerSupported(lighting.config,
488 Pica::Regs::LightingSampler::ReflectRed)) { 495 LightingRegs::LightingSampler::ReflectRed)) {
489 std::string index = 496 std::string index =
490 GetLutIndex(light_config.num, lighting.lut_rr.type, lighting.lut_rr.abs_input); 497 GetLutIndex(light_config.num, lighting.lut_rr.type, lighting.lut_rr.abs_input);
491 std::string value = "(" + std::to_string(lighting.lut_rr.scale) + " * " + 498 std::string value = "(" + std::to_string(lighting.lut_rr.scale) + " * " +
492 GetLutValue(Regs::LightingSampler::ReflectRed, index) + ")"; 499 GetLutValue(LightingRegs::LightingSampler::ReflectRed, index) + ")";
493 out += "refl_value.r = " + value + ";\n"; 500 out += "refl_value.r = " + value + ";\n";
494 } else { 501 } else {
495 out += "refl_value.r = 1.0;\n"; 502 out += "refl_value.r = 1.0;\n";
@@ -497,12 +504,13 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
497 504
498 // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used 505 // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used
499 if (lighting.lut_rg.enable && 506 if (lighting.lut_rg.enable &&
500 Pica::Regs::IsLightingSamplerSupported(lighting.config, 507 LightingRegs::IsLightingSamplerSupported(lighting.config,
501 Pica::Regs::LightingSampler::ReflectGreen)) { 508 LightingRegs::LightingSampler::ReflectGreen)) {
502 std::string index = 509 std::string index =
503 GetLutIndex(light_config.num, lighting.lut_rg.type, lighting.lut_rg.abs_input); 510 GetLutIndex(light_config.num, lighting.lut_rg.type, lighting.lut_rg.abs_input);
504 std::string value = "(" + std::to_string(lighting.lut_rg.scale) + " * " + 511 std::string value = "(" + std::to_string(lighting.lut_rg.scale) + " * " +
505 GetLutValue(Regs::LightingSampler::ReflectGreen, index) + ")"; 512 GetLutValue(LightingRegs::LightingSampler::ReflectGreen, index) +
513 ")";
506 out += "refl_value.g = " + value + ";\n"; 514 out += "refl_value.g = " + value + ";\n";
507 } else { 515 } else {
508 out += "refl_value.g = refl_value.r;\n"; 516 out += "refl_value.g = refl_value.r;\n";
@@ -510,12 +518,13 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
510 518
511 // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used 519 // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used
512 if (lighting.lut_rb.enable && 520 if (lighting.lut_rb.enable &&
513 Pica::Regs::IsLightingSamplerSupported(lighting.config, 521 LightingRegs::IsLightingSamplerSupported(lighting.config,
514 Pica::Regs::LightingSampler::ReflectBlue)) { 522 LightingRegs::LightingSampler::ReflectBlue)) {
515 std::string index = 523 std::string index =
516 GetLutIndex(light_config.num, lighting.lut_rb.type, lighting.lut_rb.abs_input); 524 GetLutIndex(light_config.num, lighting.lut_rb.type, lighting.lut_rb.abs_input);
517 std::string value = "(" + std::to_string(lighting.lut_rb.scale) + " * " + 525 std::string value = "(" + std::to_string(lighting.lut_rb.scale) + " * " +
518 GetLutValue(Regs::LightingSampler::ReflectBlue, index) + ")"; 526 GetLutValue(LightingRegs::LightingSampler::ReflectBlue, index) +
527 ")";
519 out += "refl_value.b = " + value + ";\n"; 528 out += "refl_value.b = " + value + ";\n";
520 } else { 529 } else {
521 out += "refl_value.b = refl_value.r;\n"; 530 out += "refl_value.b = refl_value.r;\n";
@@ -524,35 +533,39 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
524 // Specular 1 component 533 // Specular 1 component
525 std::string d1_lut_value = "1.0"; 534 std::string d1_lut_value = "1.0";
526 if (lighting.lut_d1.enable && 535 if (lighting.lut_d1.enable &&
527 Pica::Regs::IsLightingSamplerSupported(lighting.config, 536 LightingRegs::IsLightingSamplerSupported(
528 Pica::Regs::LightingSampler::Distribution1)) { 537 lighting.config, LightingRegs::LightingSampler::Distribution1)) {
529 // Lookup specular "distribution 1" LUT value 538 // Lookup specular "distribution 1" LUT value
530 std::string index = 539 std::string index =
531 GetLutIndex(light_config.num, lighting.lut_d1.type, lighting.lut_d1.abs_input); 540 GetLutIndex(light_config.num, lighting.lut_d1.type, lighting.lut_d1.abs_input);
532 d1_lut_value = "(" + std::to_string(lighting.lut_d1.scale) + " * " + 541 d1_lut_value = "(" + std::to_string(lighting.lut_d1.scale) + " * " +
533 GetLutValue(Regs::LightingSampler::Distribution1, index) + ")"; 542 GetLutValue(LightingRegs::LightingSampler::Distribution1, index) + ")";
534 } 543 }
535 std::string specular_1 = 544 std::string specular_1 =
536 "(" + d1_lut_value + " * refl_value * " + light_src + ".specular_1)"; 545 "(" + d1_lut_value + " * refl_value * " + light_src + ".specular_1)";
537 546
538 // Fresnel 547 // Fresnel
539 if (lighting.lut_fr.enable && Pica::Regs::IsLightingSamplerSupported( 548 if (lighting.lut_fr.enable &&
540 lighting.config, Pica::Regs::LightingSampler::Fresnel)) { 549 LightingRegs::IsLightingSamplerSupported(lighting.config,
550 LightingRegs::LightingSampler::Fresnel)) {
541 // Lookup fresnel LUT value 551 // Lookup fresnel LUT value
542 std::string index = 552 std::string index =
543 GetLutIndex(light_config.num, lighting.lut_fr.type, lighting.lut_fr.abs_input); 553 GetLutIndex(light_config.num, lighting.lut_fr.type, lighting.lut_fr.abs_input);
544 std::string value = "(" + std::to_string(lighting.lut_fr.scale) + " * " + 554 std::string value = "(" + std::to_string(lighting.lut_fr.scale) + " * " +
545 GetLutValue(Regs::LightingSampler::Fresnel, index) + ")"; 555 GetLutValue(LightingRegs::LightingSampler::Fresnel, index) + ")";
546 556
547 // Enabled for difffuse lighting alpha component 557 // Enabled for difffuse lighting alpha component
548 if (lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::PrimaryAlpha || 558 if (lighting.fresnel_selector == LightingRegs::LightingFresnelSelector::PrimaryAlpha ||
549 lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both) 559 lighting.fresnel_selector == LightingRegs::LightingFresnelSelector::Both) {
550 out += "diffuse_sum.a *= " + value + ";\n"; 560 out += "diffuse_sum.a *= " + value + ";\n";
561 }
551 562
552 // Enabled for the specular lighting alpha component 563 // Enabled for the specular lighting alpha component
553 if (lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::SecondaryAlpha || 564 if (lighting.fresnel_selector ==
554 lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both) 565 LightingRegs::LightingFresnelSelector::SecondaryAlpha ||
566 lighting.fresnel_selector == LightingRegs::LightingFresnelSelector::Both) {
555 out += "specular_sum.a *= " + value + ";\n"; 567 out += "specular_sum.a *= " + value + ";\n";
568 }
556 } 569 }
557 570
558 // Compute primary fragment color (diffuse lighting) function 571 // Compute primary fragment color (diffuse lighting) function
@@ -633,16 +646,16 @@ vec4 secondary_fragment_color = vec4(0.0);
633)"; 646)";
634 647
635 // Do not do any sort of processing if it's obvious we're not going to pass the alpha test 648 // Do not do any sort of processing if it's obvious we're not going to pass the alpha test
636 if (state.alpha_test_func == Regs::CompareFunc::Never) { 649 if (state.alpha_test_func == FramebufferRegs::CompareFunc::Never) {
637 out += "discard; }"; 650 out += "discard; }";
638 return out; 651 return out;
639 } 652 }
640 653
641 // Append the scissor test 654 // Append the scissor test
642 if (state.scissor_test_mode != Regs::ScissorMode::Disabled) { 655 if (state.scissor_test_mode != RasterizerRegs::ScissorMode::Disabled) {
643 out += "if ("; 656 out += "if (";
644 // Negate the condition if we have to keep only the pixels outside the scissor box 657 // Negate the condition if we have to keep only the pixels outside the scissor box
645 if (state.scissor_test_mode == Regs::ScissorMode::Include) 658 if (state.scissor_test_mode == RasterizerRegs::ScissorMode::Include)
646 out += "!"; 659 out += "!";
647 out += "(gl_FragCoord.x >= scissor_x1 && " 660 out += "(gl_FragCoord.x >= scissor_x1 && "
648 "gl_FragCoord.y >= scissor_y1 && " 661 "gl_FragCoord.y >= scissor_y1 && "
@@ -652,7 +665,7 @@ vec4 secondary_fragment_color = vec4(0.0);
652 665
653 out += "float z_over_w = 1.0 - gl_FragCoord.z * 2.0;\n"; 666 out += "float z_over_w = 1.0 - gl_FragCoord.z * 2.0;\n";
654 out += "float depth = z_over_w * depth_scale + depth_offset;\n"; 667 out += "float depth = z_over_w * depth_scale + depth_offset;\n";
655 if (state.depthmap_enable == Pica::Regs::DepthBuffering::WBuffering) { 668 if (state.depthmap_enable == RasterizerRegs::DepthBuffering::WBuffering) {
656 out += "depth /= gl_FragCoord.w;\n"; 669 out += "depth /= gl_FragCoord.w;\n";
657 } 670 }
658 671
@@ -666,14 +679,14 @@ vec4 secondary_fragment_color = vec4(0.0);
666 for (size_t index = 0; index < state.tev_stages.size(); ++index) 679 for (size_t index = 0; index < state.tev_stages.size(); ++index)
667 WriteTevStage(out, config, (unsigned)index); 680 WriteTevStage(out, config, (unsigned)index);
668 681
669 if (state.alpha_test_func != Regs::CompareFunc::Always) { 682 if (state.alpha_test_func != FramebufferRegs::CompareFunc::Always) {
670 out += "if ("; 683 out += "if (";
671 AppendAlphaTestCondition(out, state.alpha_test_func); 684 AppendAlphaTestCondition(out, state.alpha_test_func);
672 out += ") discard;\n"; 685 out += ") discard;\n";
673 } 686 }
674 687
675 // Append fog combiner 688 // Append fog combiner
676 if (state.fog_mode == Regs::FogMode::Fog) { 689 if (state.fog_mode == TexturingRegs::FogMode::Fog) {
677 // Get index into fog LUT 690 // Get index into fog LUT
678 if (state.fog_flip) { 691 if (state.fog_flip) {
679 out += "float fog_index = (1.0 - depth) * 128.0;\n"; 692 out += "float fog_index = (1.0 - depth) * 128.0;\n";
diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h
index cc49867c8..93d7b0b71 100644
--- a/src/video_core/renderer_opengl/pica_to_gl.h
+++ b/src/video_core/renderer_opengl/pica_to_gl.h
@@ -12,7 +12,9 @@
12#include "common/common_funcs.h" 12#include "common/common_funcs.h"
13#include "common/common_types.h" 13#include "common/common_types.h"
14#include "common/logging/log.h" 14#include "common/logging/log.h"
15#include "video_core/pica.h" 15#include "video_core/regs_framebuffer.h"
16#include "video_core/regs_lighting.h"
17#include "video_core/regs_texturing.h"
16 18
17using GLvec2 = std::array<GLfloat, 2>; 19using GLvec2 = std::array<GLfloat, 2>;
18using GLvec3 = std::array<GLfloat, 3>; 20using GLvec3 = std::array<GLfloat, 3>;
@@ -20,7 +22,7 @@ using GLvec4 = std::array<GLfloat, 4>;
20 22
21namespace PicaToGL { 23namespace PicaToGL {
22 24
23inline GLenum TextureFilterMode(Pica::Regs::TextureConfig::TextureFilter mode) { 25inline GLenum TextureFilterMode(Pica::TexturingRegs::TextureConfig::TextureFilter mode) {
24 static const GLenum filter_mode_table[] = { 26 static const GLenum filter_mode_table[] = {
25 GL_NEAREST, // TextureFilter::Nearest 27 GL_NEAREST, // TextureFilter::Nearest
26 GL_LINEAR, // TextureFilter::Linear 28 GL_LINEAR, // TextureFilter::Linear
@@ -47,7 +49,7 @@ inline GLenum TextureFilterMode(Pica::Regs::TextureConfig::TextureFilter mode) {
47 return gl_mode; 49 return gl_mode;
48} 50}
49 51
50inline GLenum WrapMode(Pica::Regs::TextureConfig::WrapMode mode) { 52inline GLenum WrapMode(Pica::TexturingRegs::TextureConfig::WrapMode mode) {
51 static const GLenum wrap_mode_table[] = { 53 static const GLenum wrap_mode_table[] = {
52 GL_CLAMP_TO_EDGE, // WrapMode::ClampToEdge 54 GL_CLAMP_TO_EDGE, // WrapMode::ClampToEdge
53 GL_CLAMP_TO_BORDER, // WrapMode::ClampToBorder 55 GL_CLAMP_TO_BORDER, // WrapMode::ClampToBorder
@@ -76,7 +78,7 @@ inline GLenum WrapMode(Pica::Regs::TextureConfig::WrapMode mode) {
76 return gl_mode; 78 return gl_mode;
77} 79}
78 80
79inline GLenum BlendEquation(Pica::Regs::BlendEquation equation) { 81inline GLenum BlendEquation(Pica::FramebufferRegs::BlendEquation equation) {
80 static const GLenum blend_equation_table[] = { 82 static const GLenum blend_equation_table[] = {
81 GL_FUNC_ADD, // BlendEquation::Add 83 GL_FUNC_ADD, // BlendEquation::Add
82 GL_FUNC_SUBTRACT, // BlendEquation::Subtract 84 GL_FUNC_SUBTRACT, // BlendEquation::Subtract
@@ -96,7 +98,7 @@ inline GLenum BlendEquation(Pica::Regs::BlendEquation equation) {
96 return blend_equation_table[(unsigned)equation]; 98 return blend_equation_table[(unsigned)equation];
97} 99}
98 100
99inline GLenum BlendFunc(Pica::Regs::BlendFactor factor) { 101inline GLenum BlendFunc(Pica::FramebufferRegs::BlendFactor factor) {
100 static const GLenum blend_func_table[] = { 102 static const GLenum blend_func_table[] = {
101 GL_ZERO, // BlendFactor::Zero 103 GL_ZERO, // BlendFactor::Zero
102 GL_ONE, // BlendFactor::One 104 GL_ONE, // BlendFactor::One
@@ -126,7 +128,7 @@ inline GLenum BlendFunc(Pica::Regs::BlendFactor factor) {
126 return blend_func_table[(unsigned)factor]; 128 return blend_func_table[(unsigned)factor];
127} 129}
128 130
129inline GLenum LogicOp(Pica::Regs::LogicOp op) { 131inline GLenum LogicOp(Pica::FramebufferRegs::LogicOp op) {
130 static const GLenum logic_op_table[] = { 132 static const GLenum logic_op_table[] = {
131 GL_CLEAR, // Clear 133 GL_CLEAR, // Clear
132 GL_AND, // And 134 GL_AND, // And
@@ -157,7 +159,7 @@ inline GLenum LogicOp(Pica::Regs::LogicOp op) {
157 return logic_op_table[(unsigned)op]; 159 return logic_op_table[(unsigned)op];
158} 160}
159 161
160inline GLenum CompareFunc(Pica::Regs::CompareFunc func) { 162inline GLenum CompareFunc(Pica::FramebufferRegs::CompareFunc func) {
161 static const GLenum compare_func_table[] = { 163 static const GLenum compare_func_table[] = {
162 GL_NEVER, // CompareFunc::Never 164 GL_NEVER, // CompareFunc::Never
163 GL_ALWAYS, // CompareFunc::Always 165 GL_ALWAYS, // CompareFunc::Always
@@ -180,7 +182,7 @@ inline GLenum CompareFunc(Pica::Regs::CompareFunc func) {
180 return compare_func_table[(unsigned)func]; 182 return compare_func_table[(unsigned)func];
181} 183}
182 184
183inline GLenum StencilOp(Pica::Regs::StencilAction action) { 185inline GLenum StencilOp(Pica::FramebufferRegs::StencilAction action) {
184 static const GLenum stencil_op_table[] = { 186 static const GLenum stencil_op_table[] = {
185 GL_KEEP, // StencilAction::Keep 187 GL_KEEP, // StencilAction::Keep
186 GL_ZERO, // StencilAction::Zero 188 GL_ZERO, // StencilAction::Zero
@@ -210,7 +212,7 @@ inline GLvec4 ColorRGBA8(const u32 color) {
210 }}; 212 }};
211} 213}
212 214
213inline std::array<GLfloat, 3> LightColor(const Pica::Regs::LightColor& color) { 215inline std::array<GLfloat, 3> LightColor(const Pica::LightingRegs::LightColor& color) {
214 return {{ 216 return {{
215 color.r / 255.0f, color.g / 255.0f, color.b / 255.0f, 217 color.r / 255.0f, color.g / 255.0f, color.b / 255.0f,
216 }}; 218 }};
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 2aa90e5c1..e19375466 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -10,8 +10,8 @@
10#include "common/assert.h" 10#include "common/assert.h"
11#include "common/bit_field.h" 11#include "common/bit_field.h"
12#include "common/logging/log.h" 12#include "common/logging/log.h"
13#include "common/profiler_reporting.h" 13#include "core/core.h"
14#include "common/synchronized_wrapper.h" 14#include "core/core_timing.h"
15#include "core/frontend/emu_window.h" 15#include "core/frontend/emu_window.h"
16#include "core/hw/gpu.h" 16#include "core/hw/gpu.h"
17#include "core/hw/hw.h" 17#include "core/hw/hw.h"
@@ -145,21 +145,16 @@ void RendererOpenGL::SwapBuffers() {
145 145
146 DrawScreens(); 146 DrawScreens();
147 147
148 auto& profiler = Common::Profiling::GetProfilingManager(); 148 Core::System::GetInstance().perf_stats.EndSystemFrame();
149 profiler.FinishFrame();
150 {
151 auto aggregator = Common::Profiling::GetTimingResultsAggregator();
152 aggregator->AddFrame(profiler.GetPreviousFrameResults());
153 }
154 149
155 // Swap buffers 150 // Swap buffers
156 render_window->PollEvents(); 151 render_window->PollEvents();
157 render_window->SwapBuffers(); 152 render_window->SwapBuffers();
158 153
159 prev_state.Apply(); 154 Core::System::GetInstance().frame_limiter.DoFrameLimiting(CoreTiming::GetGlobalTimeUs());
160 155 Core::System::GetInstance().perf_stats.BeginSystemFrame();
161 profiler.BeginFrame();
162 156
157 prev_state.Apply();
163 RefreshRasterizerSetting(); 158 RefreshRasterizerSetting();
164 159
165 if (Pica::g_debug_context && Pica::g_debug_context->recorder) { 160 if (Pica::g_debug_context && Pica::g_debug_context->recorder) {
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index a4aa3c9e0..67ed19ba8 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -2,18 +2,14 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <atomic>
6#include <cmath> 5#include <cmath>
7#include <cstring> 6#include <cstring>
8#include <unordered_map> 7#include "common/bit_set.h"
9#include <utility>
10#include <boost/range/algorithm/fill.hpp>
11#include "common/bit_field.h"
12#include "common/hash.h"
13#include "common/logging/log.h" 8#include "common/logging/log.h"
14#include "common/microprofile.h" 9#include "common/microprofile.h"
15#include "video_core/pica.h"
16#include "video_core/pica_state.h" 10#include "video_core/pica_state.h"
11#include "video_core/regs_rasterizer.h"
12#include "video_core/regs_shader.h"
17#include "video_core/shader/shader.h" 13#include "video_core/shader/shader.h"
18#include "video_core/shader/shader_interpreter.h" 14#include "video_core/shader/shader_interpreter.h"
19#ifdef ARCHITECTURE_x86_64 15#ifdef ARCHITECTURE_x86_64
@@ -25,37 +21,31 @@ namespace Pica {
25 21
26namespace Shader { 22namespace Shader {
27 23
28OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) const { 24OutputVertex OutputVertex::FromAttributeBuffer(const RasterizerRegs& regs, AttributeBuffer& input) {
29 // Setup output data 25 // Setup output data
30 OutputVertex ret; 26 union {
31 // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to 27 OutputVertex ret{};
32 // figure out what those circumstances are and enable the remaining outputs then. 28 std::array<float24, 24> vertex_slots;
33 unsigned index = 0; 29 };
34 for (unsigned i = 0; i < 7; ++i) { 30 static_assert(sizeof(vertex_slots) == sizeof(ret), "Struct and array have different sizes.");
35 31
36 if (index >= g_state.regs.vs_output_total) 32 unsigned int num_attributes = regs.vs_output_total;
37 break; 33 ASSERT(num_attributes <= 7);
34 for (unsigned int i = 0; i < num_attributes; ++i) {
35 const auto& output_register_map = regs.vs_output_attributes[i];
38 36
39 if ((config.output_mask & (1 << i)) == 0) 37 RasterizerRegs::VSOutputAttributes::Semantic semantics[4] = {
40 continue; 38 output_register_map.map_x, output_register_map.map_y, output_register_map.map_z,
41 39 output_register_map.map_w};
42 const auto& output_register_map = g_state.regs.vs_output_attributes[index];
43
44 u32 semantics[4] = {output_register_map.map_x, output_register_map.map_y,
45 output_register_map.map_z, output_register_map.map_w};
46 40
47 for (unsigned comp = 0; comp < 4; ++comp) { 41 for (unsigned comp = 0; comp < 4; ++comp) {
48 float24* out = ((float24*)&ret) + semantics[comp]; 42 RasterizerRegs::VSOutputAttributes::Semantic semantic = semantics[comp];
49 if (semantics[comp] != Regs::VSOutputAttributes::INVALID) { 43 if (semantic < vertex_slots.size()) {
50 *out = value[i][comp]; 44 vertex_slots[semantic] = input.attr[i][comp];
51 } else { 45 } else if (semantic != RasterizerRegs::VSOutputAttributes::INVALID) {
52 // Zero output so that attributes which aren't output won't have denormals in them, 46 LOG_ERROR(HW_GPU, "Invalid/unknown semantic id: %u", (unsigned int)semantic);
53 // which would slow us down later.
54 memset(out, 0, sizeof(*out));
55 } 47 }
56 } 48 }
57
58 index++;
59 } 49 }
60 50
61 // The hardware takes the absolute and saturates vertex colors like this, *before* doing 51 // The hardware takes the absolute and saturates vertex colors like this, *before* doing
@@ -76,84 +66,47 @@ OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) const {
76 return ret; 66 return ret;
77} 67}
78 68
79#ifdef ARCHITECTURE_x86_64 69void UnitState::LoadInput(const ShaderRegs& config, const AttributeBuffer& input) {
80static std::unordered_map<u64, std::unique_ptr<JitShader>> shader_map; 70 const unsigned max_attribute = config.max_input_attribute_index;
81static const JitShader* jit_shader;
82#endif // ARCHITECTURE_x86_64
83 71
84void ClearCache() { 72 for (unsigned attr = 0; attr <= max_attribute; ++attr) {
85#ifdef ARCHITECTURE_x86_64 73 unsigned reg = config.GetRegisterForAttribute(attr);
86 shader_map.clear(); 74 registers.input[reg] = input.attr[attr];
87#endif // ARCHITECTURE_x86_64 75 }
88} 76}
89 77
90void ShaderSetup::Setup() { 78void UnitState::WriteOutput(const ShaderRegs& config, AttributeBuffer& output) {
91#ifdef ARCHITECTURE_x86_64 79 unsigned int output_i = 0;
92 if (VideoCore::g_shader_jit_enabled) { 80 for (unsigned int reg : Common::BitSet<u32>(config.output_mask)) {
93 u64 cache_key = 81 output.attr[output_i++] = registers.output[reg];
94 Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^
95 Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data));
96
97 auto iter = shader_map.find(cache_key);
98 if (iter != shader_map.end()) {
99 jit_shader = iter->second.get();
100 } else {
101 auto shader = std::make_unique<JitShader>();
102 shader->Compile();
103 jit_shader = shader.get();
104 shader_map[cache_key] = std::move(shader);
105 }
106 } 82 }
107#endif // ARCHITECTURE_x86_64
108} 83}
109 84
110MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); 85MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240));
111 86
112void ShaderSetup::Run(UnitState& state, const InputVertex& input, int num_attributes) { 87#ifdef ARCHITECTURE_x86_64
113 auto& config = g_state.regs.vs; 88static std::unique_ptr<JitX64Engine> jit_engine;
114 auto& setup = g_state.vs; 89#endif // ARCHITECTURE_x86_64
115 90static InterpreterEngine interpreter_engine;
116 MICROPROFILE_SCOPE(GPU_Shader);
117
118 // Setup input register table
119 const auto& attribute_register_map = config.input_register_map;
120
121 for (unsigned i = 0; i < num_attributes; i++)
122 state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i];
123
124 state.conditional_code[0] = false;
125 state.conditional_code[1] = false;
126 91
92ShaderEngine* GetEngine() {
127#ifdef ARCHITECTURE_x86_64 93#ifdef ARCHITECTURE_x86_64
94 // TODO(yuriks): Re-initialize on each change rather than being persistent
128 if (VideoCore::g_shader_jit_enabled) { 95 if (VideoCore::g_shader_jit_enabled) {
129 jit_shader->Run(setup, state, config.main_offset); 96 if (jit_engine == nullptr) {
130 } else { 97 jit_engine = std::make_unique<JitX64Engine>();
131 DebugData<false> dummy_debug_data; 98 }
132 RunInterpreter(setup, state, dummy_debug_data, config.main_offset); 99 return jit_engine.get();
133 } 100 }
134#else
135 DebugData<false> dummy_debug_data;
136 RunInterpreter(setup, state, dummy_debug_data, config.main_offset);
137#endif // ARCHITECTURE_x86_64 101#endif // ARCHITECTURE_x86_64
138}
139
140DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes,
141 const Regs::ShaderConfig& config,
142 const ShaderSetup& setup) {
143 UnitState state;
144 DebugData<true> debug_data;
145
146 // Setup input register table
147 boost::fill(state.registers.input, Math::Vec4<float24>::AssignToAll(float24::Zero()));
148 const auto& attribute_register_map = config.input_register_map;
149 for (unsigned i = 0; i < num_attributes; i++)
150 state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i];
151 102
152 state.conditional_code[0] = false; 103 return &interpreter_engine;
153 state.conditional_code[1] = false; 104}
154 105
155 RunInterpreter(setup, state, debug_data, config.main_offset); 106void Shutdown() {
156 return debug_data; 107#ifdef ARCHITECTURE_x86_64
108 jit_engine = nullptr;
109#endif // ARCHITECTURE_x86_64
157} 110}
158 111
159} // namespace Shader 112} // namespace Shader
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index 2b07759b9..38ea717ab 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -6,16 +6,15 @@
6 6
7#include <array> 7#include <array>
8#include <cstddef> 8#include <cstddef>
9#include <memory>
10#include <type_traits> 9#include <type_traits>
11#include <nihstro/shader_bytecode.h> 10#include <nihstro/shader_bytecode.h>
12#include "common/assert.h" 11#include "common/assert.h"
13#include "common/common_funcs.h" 12#include "common/common_funcs.h"
14#include "common/common_types.h" 13#include "common/common_types.h"
15#include "common/vector_math.h" 14#include "common/vector_math.h"
16#include "video_core/pica.h"
17#include "video_core/pica_types.h" 15#include "video_core/pica_types.h"
18#include "video_core/shader/debug_data.h" 16#include "video_core/regs_rasterizer.h"
17#include "video_core/regs_shader.h"
19 18
20using nihstro::RegisterType; 19using nihstro::RegisterType;
21using nihstro::SourceRegister; 20using nihstro::SourceRegister;
@@ -25,14 +24,11 @@ namespace Pica {
25 24
26namespace Shader { 25namespace Shader {
27 26
28struct InputVertex { 27struct AttributeBuffer {
29 alignas(16) Math::Vec4<float24> attr[16]; 28 alignas(16) Math::Vec4<float24> attr[16];
30}; 29};
31 30
32struct OutputVertex { 31struct OutputVertex {
33 OutputVertex() = default;
34
35 // VS output attributes
36 Math::Vec4<float24> pos; 32 Math::Vec4<float24> pos;
37 Math::Vec4<float24> quat; 33 Math::Vec4<float24> quat;
38 Math::Vec4<float24> color; 34 Math::Vec4<float24> color;
@@ -44,49 +40,22 @@ struct OutputVertex {
44 INSERT_PADDING_WORDS(1); 40 INSERT_PADDING_WORDS(1);
45 Math::Vec2<float24> tc2; 41 Math::Vec2<float24> tc2;
46 42
47 // Padding for optimal alignment 43 static OutputVertex FromAttributeBuffer(const RasterizerRegs& regs, AttributeBuffer& output);
48 INSERT_PADDING_WORDS(4);
49
50 // Attributes used to store intermediate results
51
52 // position after perspective divide
53 Math::Vec3<float24> screenpos;
54 INSERT_PADDING_WORDS(1);
55
56 // Linear interpolation
57 // factor: 0=this, 1=vtx
58 void Lerp(float24 factor, const OutputVertex& vtx) {
59 pos = pos * factor + vtx.pos * (float24::FromFloat32(1) - factor);
60
61 // TODO: Should perform perspective correct interpolation here...
62 tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor);
63 tc1 = tc1 * factor + vtx.tc1 * (float24::FromFloat32(1) - factor);
64 tc2 = tc2 * factor + vtx.tc2 * (float24::FromFloat32(1) - factor);
65
66 screenpos = screenpos * factor + vtx.screenpos * (float24::FromFloat32(1) - factor);
67
68 color = color * factor + vtx.color * (float24::FromFloat32(1) - factor);
69 }
70
71 // Linear interpolation
72 // factor: 0=v0, 1=v1
73 static OutputVertex Lerp(float24 factor, const OutputVertex& v0, const OutputVertex& v1) {
74 OutputVertex ret = v0;
75 ret.Lerp(factor, v1);
76 return ret;
77 }
78}; 44};
45#define ASSERT_POS(var, pos) \
46 static_assert(offsetof(OutputVertex, var) == pos * sizeof(float24), "Semantic at wrong " \
47 "offset.")
48ASSERT_POS(pos, RasterizerRegs::VSOutputAttributes::POSITION_X);
49ASSERT_POS(quat, RasterizerRegs::VSOutputAttributes::QUATERNION_X);
50ASSERT_POS(color, RasterizerRegs::VSOutputAttributes::COLOR_R);
51ASSERT_POS(tc0, RasterizerRegs::VSOutputAttributes::TEXCOORD0_U);
52ASSERT_POS(tc1, RasterizerRegs::VSOutputAttributes::TEXCOORD1_U);
53ASSERT_POS(tc0_w, RasterizerRegs::VSOutputAttributes::TEXCOORD0_W);
54ASSERT_POS(view, RasterizerRegs::VSOutputAttributes::VIEW_X);
55ASSERT_POS(tc2, RasterizerRegs::VSOutputAttributes::TEXCOORD2_U);
56#undef ASSERT_POS
79static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); 57static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD");
80static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); 58static_assert(sizeof(OutputVertex) == 24 * sizeof(float), "OutputVertex has invalid size");
81
82struct OutputRegisters {
83 OutputRegisters() = default;
84
85 alignas(16) Math::Vec4<float24> value[16];
86
87 OutputVertex ToVertex(const Regs::ShaderConfig& config) const;
88};
89static_assert(std::is_pod<OutputRegisters>::value, "Structure is not POD");
90 59
91/** 60/**
92 * This structure contains the state information that needs to be unique for a shader unit. The 3DS 61 * This structure contains the state information that needs to be unique for a shader unit. The 3DS
@@ -100,11 +69,10 @@ struct UnitState {
100 // required to be 16-byte aligned. 69 // required to be 16-byte aligned.
101 alignas(16) Math::Vec4<float24> input[16]; 70 alignas(16) Math::Vec4<float24> input[16];
102 alignas(16) Math::Vec4<float24> temporary[16]; 71 alignas(16) Math::Vec4<float24> temporary[16];
72 alignas(16) Math::Vec4<float24> output[16];
103 } registers; 73 } registers;
104 static_assert(std::is_pod<Registers>::value, "Structure is not POD"); 74 static_assert(std::is_pod<Registers>::value, "Structure is not POD");
105 75
106 OutputRegisters output_registers;
107
108 bool conditional_code[2]; 76 bool conditional_code[2];
109 77
110 // Two Address registers and one loop counter 78 // Two Address registers and one loop counter
@@ -130,7 +98,7 @@ struct UnitState {
130 static size_t OutputOffset(const DestRegister& reg) { 98 static size_t OutputOffset(const DestRegister& reg) {
131 switch (reg.GetRegisterType()) { 99 switch (reg.GetRegisterType()) {
132 case RegisterType::Output: 100 case RegisterType::Output:
133 return offsetof(UnitState, output_registers.value) + 101 return offsetof(UnitState, registers.output) +
134 reg.GetIndex() * sizeof(Math::Vec4<float24>); 102 reg.GetIndex() * sizeof(Math::Vec4<float24>);
135 103
136 case RegisterType::Temporary: 104 case RegisterType::Temporary:
@@ -142,13 +110,19 @@ struct UnitState {
142 return 0; 110 return 0;
143 } 111 }
144 } 112 }
145};
146 113
147/// Clears the shader cache 114 /**
148void ClearCache(); 115 * Loads the unit state with an input vertex.
116 *
117 * @param config Shader configuration registers corresponding to the unit.
118 * @param input Attribute buffer to load into the input registers.
119 */
120 void LoadInput(const ShaderRegs& config, const AttributeBuffer& input);
149 121
150struct ShaderSetup { 122 void WriteOutput(const ShaderRegs& config, AttributeBuffer& output);
123};
151 124
125struct ShaderSetup {
152 struct { 126 struct {
153 // The float uniforms are accessed by the shader JIT using SSE instructions, and are 127 // The float uniforms are accessed by the shader JIT using SSE instructions, and are
154 // therefore required to be 16-byte aligned. 128 // therefore required to be 16-byte aligned.
@@ -173,32 +147,37 @@ struct ShaderSetup {
173 std::array<u32, 1024> program_code; 147 std::array<u32, 1024> program_code;
174 std::array<u32, 1024> swizzle_data; 148 std::array<u32, 1024> swizzle_data;
175 149
150 /// Data private to ShaderEngines
151 struct EngineData {
152 unsigned int entry_point;
153 /// Used by the JIT, points to a compiled shader object.
154 const void* cached_shader = nullptr;
155 } engine_data;
156};
157
158class ShaderEngine {
159public:
160 virtual ~ShaderEngine() = default;
161
176 /** 162 /**
177 * Performs any shader unit setup that only needs to happen once per shader (as opposed to once 163 * Performs any shader unit setup that only needs to happen once per shader (as opposed to once
178 * per vertex, which would happen within the `Run` function). 164 * per vertex, which would happen within the `Run` function).
179 */ 165 */
180 void Setup(); 166 virtual void SetupBatch(ShaderSetup& setup, unsigned int entry_point) = 0;
181 167
182 /** 168 /**
183 * Runs the currently setup shader 169 * Runs the currently setup shader.
184 * @param state Shader unit state, must be setup per shader and per shader unit 170 *
185 * @param input Input vertex into the shader 171 * @param setup Shader engine state, must be setup with SetupBatch on each shader change.
186 * @param num_attributes The number of vertex shader attributes 172 * @param state Shader unit state, must be setup with input data before each shader invocation.
187 */ 173 */
188 void Run(UnitState& state, const InputVertex& input, int num_attributes); 174 virtual void Run(const ShaderSetup& setup, UnitState& state) const = 0;
189
190 /**
191 * Produce debug information based on the given shader and input vertex
192 * @param input Input vertex into the shader
193 * @param num_attributes The number of vertex shader attributes
194 * @param config Configuration object for the shader pipeline
195 * @param setup Setup object for the shader pipeline
196 * @return Debug information for this shader with regards to the given vertex
197 */
198 DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes,
199 const Regs::ShaderConfig& config, const ShaderSetup& setup);
200}; 175};
201 176
177// TODO(yuriks): Remove and make it non-global state somewhere
178ShaderEngine* GetEngine();
179void Shutdown();
180
202} // namespace Shader 181} // namespace Shader
203 182
204} // namespace Pica 183} // namespace Pica
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp
index 20fb9754b..f4d1c46c5 100644
--- a/src/video_core/shader/shader_interpreter.cpp
+++ b/src/video_core/shader/shader_interpreter.cpp
@@ -7,10 +7,12 @@
7#include <cmath> 7#include <cmath>
8#include <numeric> 8#include <numeric>
9#include <boost/container/static_vector.hpp> 9#include <boost/container/static_vector.hpp>
10#include <boost/range/algorithm/fill.hpp>
10#include <nihstro/shader_bytecode.h> 11#include <nihstro/shader_bytecode.h>
11#include "common/assert.h" 12#include "common/assert.h"
12#include "common/common_types.h" 13#include "common/common_types.h"
13#include "common/logging/log.h" 14#include "common/logging/log.h"
15#include "common/microprofile.h"
14#include "common/vector_math.h" 16#include "common/vector_math.h"
15#include "video_core/pica_state.h" 17#include "video_core/pica_state.h"
16#include "video_core/pica_types.h" 18#include "video_core/pica_types.h"
@@ -37,12 +39,15 @@ struct CallStackElement {
37}; 39};
38 40
39template <bool Debug> 41template <bool Debug>
40void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug>& debug_data, 42static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug>& debug_data,
41 unsigned offset) { 43 unsigned offset) {
42 // TODO: Is there a maximal size for this? 44 // TODO: Is there a maximal size for this?
43 boost::container::static_vector<CallStackElement, 16> call_stack; 45 boost::container::static_vector<CallStackElement, 16> call_stack;
44 u32 program_counter = offset; 46 u32 program_counter = offset;
45 47
48 state.conditional_code[0] = false;
49 state.conditional_code[1] = false;
50
46 auto call = [&program_counter, &call_stack](u32 offset, u32 num_instructions, u32 return_offset, 51 auto call = [&program_counter, &call_stack](u32 offset, u32 num_instructions, u32 return_offset,
47 u8 repeat_count, u8 loop_increment) { 52 u8 repeat_count, u8 loop_increment) {
48 // -1 to make sure when incrementing the PC we end up at the correct offset 53 // -1 to make sure when incrementing the PC we end up at the correct offset
@@ -73,9 +78,9 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug>
73 } 78 }
74 }; 79 };
75 80
76 const auto& uniforms = g_state.vs.uniforms; 81 const auto& uniforms = setup.uniforms;
77 const auto& swizzle_data = g_state.vs.swizzle_data; 82 const auto& swizzle_data = setup.swizzle_data;
78 const auto& program_code = g_state.vs.program_code; 83 const auto& program_code = setup.program_code;
79 84
80 // Placeholder for invalid inputs 85 // Placeholder for invalid inputs
81 static float24 dummy_vec4_float24[4]; 86 static float24 dummy_vec4_float24[4];
@@ -170,7 +175,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug>
170 175
171 float24* dest = 176 float24* dest =
172 (instr.common.dest.Value() < 0x10) 177 (instr.common.dest.Value() < 0x10)
173 ? &state.output_registers.value[instr.common.dest.Value().GetIndex()][0] 178 ? &state.registers.output[instr.common.dest.Value().GetIndex()][0]
174 : (instr.common.dest.Value() < 0x20) 179 : (instr.common.dest.Value() < 0x20)
175 ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0] 180 ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0]
176 : dummy_vec4_float24; 181 : dummy_vec4_float24;
@@ -513,7 +518,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug>
513 518
514 float24* dest = 519 float24* dest =
515 (instr.mad.dest.Value() < 0x10) 520 (instr.mad.dest.Value() < 0x10)
516 ? &state.output_registers.value[instr.mad.dest.Value().GetIndex()][0] 521 ? &state.registers.output[instr.mad.dest.Value().GetIndex()][0]
517 : (instr.mad.dest.Value() < 0x20) 522 : (instr.mad.dest.Value() < 0x20)
518 ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0] 523 ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0]
519 : dummy_vec4_float24; 524 : dummy_vec4_float24;
@@ -647,9 +652,33 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug>
647 } 652 }
648} 653}
649 654
650// Explicit instantiation 655void InterpreterEngine::SetupBatch(ShaderSetup& setup, unsigned int entry_point) {
651template void RunInterpreter(const ShaderSetup&, UnitState&, DebugData<false>&, unsigned offset); 656 ASSERT(entry_point < 1024);
652template void RunInterpreter(const ShaderSetup&, UnitState&, DebugData<true>&, unsigned offset); 657 setup.engine_data.entry_point = entry_point;
658}
659
660MICROPROFILE_DECLARE(GPU_Shader);
661
662void InterpreterEngine::Run(const ShaderSetup& setup, UnitState& state) const {
663
664 MICROPROFILE_SCOPE(GPU_Shader);
665
666 DebugData<false> dummy_debug_data;
667 RunInterpreter(setup, state, dummy_debug_data, setup.engine_data.entry_point);
668}
669
670DebugData<true> InterpreterEngine::ProduceDebugInfo(const ShaderSetup& setup,
671 const AttributeBuffer& input,
672 const ShaderRegs& config) const {
673 UnitState state;
674 DebugData<true> debug_data;
675
676 // Setup input register table
677 boost::fill(state.registers.input, Math::Vec4<float24>::AssignToAll(float24::Zero()));
678 state.LoadInput(config, input);
679 RunInterpreter(setup, state, debug_data, setup.engine_data.entry_point);
680 return debug_data;
681}
653 682
654} // namespace 683} // namespace
655 684
diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h
index d31dcd7a6..50fd7c69d 100644
--- a/src/video_core/shader/shader_interpreter.h
+++ b/src/video_core/shader/shader_interpreter.h
@@ -4,18 +4,28 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "video_core/shader/debug_data.h"
8#include "video_core/shader/shader.h"
9
7namespace Pica { 10namespace Pica {
8 11
9namespace Shader { 12namespace Shader {
10 13
11struct UnitState; 14class InterpreterEngine final : public ShaderEngine {
12 15public:
13template <bool Debug> 16 void SetupBatch(ShaderSetup& setup, unsigned int entry_point) override;
14struct DebugData; 17 void Run(const ShaderSetup& setup, UnitState& state) const override;
15 18
16template <bool Debug> 19 /**
17void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug>& debug_data, 20 * Produce debug information based on the given shader and input vertex
18 unsigned offset); 21 * @param setup Shader engine state
22 * @param input Input vertex into the shader
23 * @param config Configuration object for the shader pipeline
24 * @return Debug information for this shader with regards to the given vertex
25 */
26 DebugData<true> ProduceDebugInfo(const ShaderSetup& setup, const AttributeBuffer& input,
27 const ShaderRegs& config) const;
28};
19 29
20} // namespace 30} // namespace
21 31
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index c588b778b..0ee0dd9ef 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -1,888 +1,48 @@
1// Copyright 2015 Citra Emulator Project 1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include "common/hash.h"
6#include <cmath> 6#include "common/microprofile.h"
7#include <cstdint>
8#include <nihstro/shader_bytecode.h>
9#include <smmintrin.h>
10#include <xmmintrin.h>
11#include "common/assert.h"
12#include "common/logging/log.h"
13#include "common/vector_math.h"
14#include "common/x64/cpu_detect.h"
15#include "common/x64/xbyak_abi.h"
16#include "common/x64/xbyak_util.h"
17#include "video_core/pica_state.h"
18#include "video_core/pica_types.h"
19#include "video_core/shader/shader.h" 7#include "video_core/shader/shader.h"
20#include "video_core/shader/shader_jit_x64.h" 8#include "video_core/shader/shader_jit_x64.h"
21 9#include "video_core/shader/shader_jit_x64_compiler.h"
22using namespace Common::X64;
23using namespace Xbyak::util;
24using Xbyak::Label;
25using Xbyak::Reg32;
26using Xbyak::Reg64;
27using Xbyak::Xmm;
28 10
29namespace Pica { 11namespace Pica {
30
31namespace Shader { 12namespace Shader {
32 13
33typedef void (JitShader::*JitFunction)(Instruction instr); 14JitX64Engine::JitX64Engine() = default;
34 15JitX64Engine::~JitX64Engine() = default;
35const JitFunction instr_table[64] = {
36 &JitShader::Compile_ADD, // add
37 &JitShader::Compile_DP3, // dp3
38 &JitShader::Compile_DP4, // dp4
39 &JitShader::Compile_DPH, // dph
40 nullptr, // unknown
41 &JitShader::Compile_EX2, // ex2
42 &JitShader::Compile_LG2, // lg2
43 nullptr, // unknown
44 &JitShader::Compile_MUL, // mul
45 &JitShader::Compile_SGE, // sge
46 &JitShader::Compile_SLT, // slt
47 &JitShader::Compile_FLR, // flr
48 &JitShader::Compile_MAX, // max
49 &JitShader::Compile_MIN, // min
50 &JitShader::Compile_RCP, // rcp
51 &JitShader::Compile_RSQ, // rsq
52 nullptr, // unknown
53 nullptr, // unknown
54 &JitShader::Compile_MOVA, // mova
55 &JitShader::Compile_MOV, // mov
56 nullptr, // unknown
57 nullptr, // unknown
58 nullptr, // unknown
59 nullptr, // unknown
60 &JitShader::Compile_DPH, // dphi
61 nullptr, // unknown
62 &JitShader::Compile_SGE, // sgei
63 &JitShader::Compile_SLT, // slti
64 nullptr, // unknown
65 nullptr, // unknown
66 nullptr, // unknown
67 nullptr, // unknown
68 nullptr, // unknown
69 &JitShader::Compile_NOP, // nop
70 &JitShader::Compile_END, // end
71 nullptr, // break
72 &JitShader::Compile_CALL, // call
73 &JitShader::Compile_CALLC, // callc
74 &JitShader::Compile_CALLU, // callu
75 &JitShader::Compile_IF, // ifu
76 &JitShader::Compile_IF, // ifc
77 &JitShader::Compile_LOOP, // loop
78 nullptr, // emit
79 nullptr, // sete
80 &JitShader::Compile_JMP, // jmpc
81 &JitShader::Compile_JMP, // jmpu
82 &JitShader::Compile_CMP, // cmp
83 &JitShader::Compile_CMP, // cmp
84 &JitShader::Compile_MAD, // madi
85 &JitShader::Compile_MAD, // madi
86 &JitShader::Compile_MAD, // madi
87 &JitShader::Compile_MAD, // madi
88 &JitShader::Compile_MAD, // madi
89 &JitShader::Compile_MAD, // madi
90 &JitShader::Compile_MAD, // madi
91 &JitShader::Compile_MAD, // madi
92 &JitShader::Compile_MAD, // mad
93 &JitShader::Compile_MAD, // mad
94 &JitShader::Compile_MAD, // mad
95 &JitShader::Compile_MAD, // mad
96 &JitShader::Compile_MAD, // mad
97 &JitShader::Compile_MAD, // mad
98 &JitShader::Compile_MAD, // mad
99 &JitShader::Compile_MAD, // mad
100};
101
102// The following is used to alias some commonly used registers. Generally, RAX-RDX and XMM0-XMM3 can
103// be used as scratch registers within a compiler function. The other registers have designated
104// purposes, as documented below:
105 16
106/// Pointer to the uniform memory 17void JitX64Engine::SetupBatch(ShaderSetup& setup, unsigned int entry_point) {
107static const Reg64 SETUP = r9; 18 ASSERT(entry_point < 1024);
108/// The two 32-bit VS address offset registers set by the MOVA instruction 19 setup.engine_data.entry_point = entry_point;
109static const Reg64 ADDROFFS_REG_0 = r10;
110static const Reg64 ADDROFFS_REG_1 = r11;
111/// VS loop count register (Multiplied by 16)
112static const Reg32 LOOPCOUNT_REG = r12d;
113/// Current VS loop iteration number (we could probably use LOOPCOUNT_REG, but this quicker)
114static const Reg32 LOOPCOUNT = esi;
115/// Number to increment LOOPCOUNT_REG by on each loop iteration (Multiplied by 16)
116static const Reg32 LOOPINC = edi;
117/// Result of the previous CMP instruction for the X-component comparison
118static const Reg64 COND0 = r13;
119/// Result of the previous CMP instruction for the Y-component comparison
120static const Reg64 COND1 = r14;
121/// Pointer to the UnitState instance for the current VS unit
122static const Reg64 STATE = r15;
123/// SIMD scratch register
124static const Xmm SCRATCH = xmm0;
125/// Loaded with the first swizzled source register, otherwise can be used as a scratch register
126static const Xmm SRC1 = xmm1;
127/// Loaded with the second swizzled source register, otherwise can be used as a scratch register
128static const Xmm SRC2 = xmm2;
129/// Loaded with the third swizzled source register, otherwise can be used as a scratch register
130static const Xmm SRC3 = xmm3;
131/// Additional scratch register
132static const Xmm SCRATCH2 = xmm4;
133/// Constant vector of [1.0f, 1.0f, 1.0f, 1.0f], used to efficiently set a vector to one
134static const Xmm ONE = xmm14;
135/// Constant vector of [-0.f, -0.f, -0.f, -0.f], used to efficiently negate a vector with XOR
136static const Xmm NEGBIT = xmm15;
137 20
138// State registers that must not be modified by external functions calls 21 u64 code_hash = Common::ComputeHash64(&setup.program_code, sizeof(setup.program_code));
139// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed 22 u64 swizzle_hash = Common::ComputeHash64(&setup.swizzle_data, sizeof(setup.swizzle_data));
140static const BitSet32 persistent_regs = BuildRegSet({
141 // Pointers to register blocks
142 SETUP, STATE,
143 // Cached registers
144 ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1,
145 // Constants
146 ONE, NEGBIT,
147});
148 23
149/// Raw constant for the source register selector that indicates no swizzling is performed 24 u64 cache_key = code_hash ^ swizzle_hash;
150static const u8 NO_SRC_REG_SWIZZLE = 0x1b; 25 auto iter = cache.find(cache_key);
151/// Raw constant for the destination register enable mask that indicates all components are enabled 26 if (iter != cache.end()) {
152static const u8 NO_DEST_REG_MASK = 0xf; 27 setup.engine_data.cached_shader = iter->second.get();
153
154/**
155 * Get the vertex shader instruction for a given offset in the current shader program
156 * @param offset Offset in the current shader program of the instruction
157 * @return Instruction at the specified offset
158 */
159static Instruction GetVertexShaderInstruction(size_t offset) {
160 return {g_state.vs.program_code[offset]};
161}
162
163static void LogCritical(const char* msg) {
164 LOG_CRITICAL(HW_GPU, "%s", msg);
165}
166
167void JitShader::Compile_Assert(bool condition, const char* msg) {
168 if (!condition) {
169 mov(ABI_PARAM1, reinterpret_cast<size_t>(msg));
170 CallFarFunction(*this, LogCritical);
171 }
172}
173
174/**
175 * Loads and swizzles a source register into the specified XMM register.
176 * @param instr VS instruction, used for determining how to load the source register
177 * @param src_num Number indicating which source register to load (1 = src1, 2 = src2, 3 = src3)
178 * @param src_reg SourceRegister object corresponding to the source register to load
179 * @param dest Destination XMM register to store the loaded, swizzled source register
180 */
181void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg,
182 Xmm dest) {
183 Reg64 src_ptr;
184 size_t src_offset;
185
186 if (src_reg.GetRegisterType() == RegisterType::FloatUniform) {
187 src_ptr = SETUP;
188 src_offset = ShaderSetup::GetFloatUniformOffset(src_reg.GetIndex());
189 } else { 28 } else {
190 src_ptr = STATE; 29 auto shader = std::make_unique<JitShader>();
191 src_offset = UnitState::InputOffset(src_reg); 30 shader->Compile(&setup.program_code, &setup.swizzle_data);
192 } 31 setup.engine_data.cached_shader = shader.get();
193 32 cache.emplace_hint(iter, cache_key, std::move(shader));
194 int src_offset_disp = (int)src_offset;
195 ASSERT_MSG(src_offset == src_offset_disp, "Source register offset too large for int type");
196
197 unsigned operand_desc_id;
198
199 const bool is_inverted =
200 (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed));
201
202 unsigned address_register_index;
203 unsigned offset_src;
204
205 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD ||
206 instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) {
207 operand_desc_id = instr.mad.operand_desc_id;
208 offset_src = is_inverted ? 3 : 2;
209 address_register_index = instr.mad.address_register_index;
210 } else {
211 operand_desc_id = instr.common.operand_desc_id;
212 offset_src = is_inverted ? 2 : 1;
213 address_register_index = instr.common.address_register_index;
214 }
215
216 if (src_num == offset_src && address_register_index != 0) {
217 switch (address_register_index) {
218 case 1: // address offset 1
219 movaps(dest, xword[src_ptr + ADDROFFS_REG_0 + src_offset_disp]);
220 break;
221 case 2: // address offset 2
222 movaps(dest, xword[src_ptr + ADDROFFS_REG_1 + src_offset_disp]);
223 break;
224 case 3: // address offset 3
225 movaps(dest, xword[src_ptr + LOOPCOUNT_REG.cvt64() + src_offset_disp]);
226 break;
227 default:
228 UNREACHABLE();
229 break;
230 }
231 } else {
232 // Load the source
233 movaps(dest, xword[src_ptr + src_offset_disp]);
234 }
235
236 SwizzlePattern swiz = {g_state.vs.swizzle_data[operand_desc_id]};
237
238 // Generate instructions for source register swizzling as needed
239 u8 sel = swiz.GetRawSelector(src_num);
240 if (sel != NO_SRC_REG_SWIZZLE) {
241 // Selector component order needs to be reversed for the SHUFPS instruction
242 sel = ((sel & 0xc0) >> 6) | ((sel & 3) << 6) | ((sel & 0xc) << 2) | ((sel & 0x30) >> 2);
243
244 // Shuffle inputs for swizzle
245 shufps(dest, dest, sel);
246 }
247
248 // If the source register should be negated, flip the negative bit using XOR
249 const bool negate[] = {swiz.negate_src1, swiz.negate_src2, swiz.negate_src3};
250 if (negate[src_num - 1]) {
251 xorps(dest, NEGBIT);
252 } 33 }
253} 34}
254 35
255void JitShader::Compile_DestEnable(Instruction instr, Xmm src) { 36MICROPROFILE_DECLARE(GPU_Shader);
256 DestRegister dest;
257 unsigned operand_desc_id;
258 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD ||
259 instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) {
260 operand_desc_id = instr.mad.operand_desc_id;
261 dest = instr.mad.dest.Value();
262 } else {
263 operand_desc_id = instr.common.operand_desc_id;
264 dest = instr.common.dest.Value();
265 }
266
267 SwizzlePattern swiz = {g_state.vs.swizzle_data[operand_desc_id]};
268
269 size_t dest_offset_disp = UnitState::OutputOffset(dest);
270
271 // If all components are enabled, write the result to the destination register
272 if (swiz.dest_mask == NO_DEST_REG_MASK) {
273 // Store dest back to memory
274 movaps(xword[STATE + dest_offset_disp], src);
275
276 } else {
277 // Not all components are enabled, so mask the result when storing to the destination
278 // register...
279 movaps(SCRATCH, xword[STATE + dest_offset_disp]);
280
281 if (Common::GetCPUCaps().sse4_1) {
282 u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) |
283 ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1);
284 blendps(SCRATCH, src, mask);
285 } else {
286 movaps(SCRATCH2, src);
287 unpckhps(SCRATCH2, SCRATCH); // Unpack X/Y components of source and destination
288 unpcklps(SCRATCH, src); // Unpack Z/W components of source and destination
289
290 // Compute selector to selectively copy source components to destination for SHUFPS
291 // instruction
292 u8 sel = ((swiz.DestComponentEnabled(0) ? 1 : 0) << 0) |
293 ((swiz.DestComponentEnabled(1) ? 3 : 2) << 2) |
294 ((swiz.DestComponentEnabled(2) ? 0 : 1) << 4) |
295 ((swiz.DestComponentEnabled(3) ? 2 : 3) << 6);
296 shufps(SCRATCH, SCRATCH2, sel);
297 }
298
299 // Store dest back to memory
300 movaps(xword[STATE + dest_offset_disp], SCRATCH);
301 }
302}
303
304void JitShader::Compile_SanitizedMul(Xmm src1, Xmm src2, Xmm scratch) {
305 movaps(scratch, src1);
306 cmpordps(scratch, src2);
307
308 mulps(src1, src2);
309 37
310 movaps(src2, src1); 38void JitX64Engine::Run(const ShaderSetup& setup, UnitState& state) const {
311 cmpunordps(src2, src2); 39 ASSERT(setup.engine_data.cached_shader != nullptr);
312 40
313 xorps(scratch, src2); 41 MICROPROFILE_SCOPE(GPU_Shader);
314 andps(src1, scratch);
315}
316
317void JitShader::Compile_EvaluateCondition(Instruction instr) {
318 // Note: NXOR is used below to check for equality
319 switch (instr.flow_control.op) {
320 case Instruction::FlowControlType::Or:
321 mov(eax, COND0);
322 mov(ebx, COND1);
323 xor(eax, (instr.flow_control.refx.Value() ^ 1));
324 xor(ebx, (instr.flow_control.refy.Value() ^ 1));
325 or (eax, ebx);
326 break;
327
328 case Instruction::FlowControlType::And:
329 mov(eax, COND0);
330 mov(ebx, COND1);
331 xor(eax, (instr.flow_control.refx.Value() ^ 1));
332 xor(ebx, (instr.flow_control.refy.Value() ^ 1));
333 and(eax, ebx);
334 break;
335
336 case Instruction::FlowControlType::JustX:
337 mov(eax, COND0);
338 xor(eax, (instr.flow_control.refx.Value() ^ 1));
339 break;
340
341 case Instruction::FlowControlType::JustY:
342 mov(eax, COND1);
343 xor(eax, (instr.flow_control.refy.Value() ^ 1));
344 break;
345 }
346}
347 42
348void JitShader::Compile_UniformCondition(Instruction instr) { 43 const JitShader* shader = static_cast<const JitShader*>(setup.engine_data.cached_shader);
349 size_t offset = ShaderSetup::GetBoolUniformOffset(instr.flow_control.bool_uniform_id); 44 shader->Run(setup, state, setup.engine_data.entry_point);
350 cmp(byte[SETUP + offset], 0);
351} 45}
352 46
353BitSet32 JitShader::PersistentCallerSavedRegs() {
354 return persistent_regs & ABI_ALL_CALLER_SAVED;
355}
356
357void JitShader::Compile_ADD(Instruction instr) {
358 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
359 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
360 addps(SRC1, SRC2);
361 Compile_DestEnable(instr, SRC1);
362}
363
364void JitShader::Compile_DP3(Instruction instr) {
365 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
366 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
367
368 Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
369
370 movaps(SRC2, SRC1);
371 shufps(SRC2, SRC2, _MM_SHUFFLE(1, 1, 1, 1));
372
373 movaps(SRC3, SRC1);
374 shufps(SRC3, SRC3, _MM_SHUFFLE(2, 2, 2, 2));
375
376 shufps(SRC1, SRC1, _MM_SHUFFLE(0, 0, 0, 0));
377 addps(SRC1, SRC2);
378 addps(SRC1, SRC3);
379
380 Compile_DestEnable(instr, SRC1);
381}
382
383void JitShader::Compile_DP4(Instruction instr) {
384 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
385 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
386
387 Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
388
389 movaps(SRC2, SRC1);
390 shufps(SRC1, SRC1, _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
391 addps(SRC1, SRC2);
392
393 movaps(SRC2, SRC1);
394 shufps(SRC1, SRC1, _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
395 addps(SRC1, SRC2);
396
397 Compile_DestEnable(instr, SRC1);
398}
399
400void JitShader::Compile_DPH(Instruction instr) {
401 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::DPHI) {
402 Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
403 Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
404 } else {
405 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
406 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
407 }
408
409 if (Common::GetCPUCaps().sse4_1) {
410 // Set 4th component to 1.0
411 blendps(SRC1, ONE, 0b1000);
412 } else {
413 // Set 4th component to 1.0
414 movaps(SCRATCH, SRC1);
415 unpckhps(SCRATCH, ONE); // XYZW, 1111 -> Z1__
416 unpcklpd(SRC1, SCRATCH); // XYZW, Z1__ -> XYZ1
417 }
418
419 Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
420
421 movaps(SRC2, SRC1);
422 shufps(SRC1, SRC1, _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
423 addps(SRC1, SRC2);
424
425 movaps(SRC2, SRC1);
426 shufps(SRC1, SRC1, _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
427 addps(SRC1, SRC2);
428
429 Compile_DestEnable(instr, SRC1);
430}
431
432void JitShader::Compile_EX2(Instruction instr) {
433 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
434 movss(xmm0, SRC1); // ABI_PARAM1
435
436 ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
437 CallFarFunction(*this, exp2f);
438 ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
439
440 shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); // ABI_RETURN
441 movaps(SRC1, xmm0);
442 Compile_DestEnable(instr, SRC1);
443}
444
445void JitShader::Compile_LG2(Instruction instr) {
446 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
447 movss(xmm0, SRC1); // ABI_PARAM1
448
449 ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
450 CallFarFunction(*this, log2f);
451 ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
452
453 shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); // ABI_RETURN
454 movaps(SRC1, xmm0);
455 Compile_DestEnable(instr, SRC1);
456}
457
458void JitShader::Compile_MUL(Instruction instr) {
459 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
460 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
461 Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
462 Compile_DestEnable(instr, SRC1);
463}
464
465void JitShader::Compile_SGE(Instruction instr) {
466 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SGEI) {
467 Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
468 Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
469 } else {
470 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
471 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
472 }
473
474 cmpleps(SRC2, SRC1);
475 andps(SRC2, ONE);
476
477 Compile_DestEnable(instr, SRC2);
478}
479
480void JitShader::Compile_SLT(Instruction instr) {
481 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SLTI) {
482 Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
483 Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
484 } else {
485 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
486 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
487 }
488
489 cmpltps(SRC1, SRC2);
490 andps(SRC1, ONE);
491
492 Compile_DestEnable(instr, SRC1);
493}
494
495void JitShader::Compile_FLR(Instruction instr) {
496 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
497
498 if (Common::GetCPUCaps().sse4_1) {
499 roundps(SRC1, SRC1, _MM_FROUND_FLOOR);
500 } else {
501 cvttps2dq(SRC1, SRC1);
502 cvtdq2ps(SRC1, SRC1);
503 }
504
505 Compile_DestEnable(instr, SRC1);
506}
507
508void JitShader::Compile_MAX(Instruction instr) {
509 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
510 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
511 // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned.
512 maxps(SRC1, SRC2);
513 Compile_DestEnable(instr, SRC1);
514}
515
516void JitShader::Compile_MIN(Instruction instr) {
517 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
518 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
519 // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned.
520 minps(SRC1, SRC2);
521 Compile_DestEnable(instr, SRC1);
522}
523
524void JitShader::Compile_MOVA(Instruction instr) {
525 SwizzlePattern swiz = {g_state.vs.swizzle_data[instr.common.operand_desc_id]};
526
527 if (!swiz.DestComponentEnabled(0) && !swiz.DestComponentEnabled(1)) {
528 return; // NoOp
529 }
530
531 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
532
533 // Convert floats to integers using truncation (only care about X and Y components)
534 cvttps2dq(SRC1, SRC1);
535
536 // Get result
537 movq(rax, SRC1);
538
539 // Handle destination enable
540 if (swiz.DestComponentEnabled(0) && swiz.DestComponentEnabled(1)) {
541 // Move and sign-extend low 32 bits
542 movsxd(ADDROFFS_REG_0, eax);
543
544 // Move and sign-extend high 32 bits
545 shr(rax, 32);
546 movsxd(ADDROFFS_REG_1, eax);
547
548 // Multiply by 16 to be used as an offset later
549 shl(ADDROFFS_REG_0, 4);
550 shl(ADDROFFS_REG_1, 4);
551 } else {
552 if (swiz.DestComponentEnabled(0)) {
553 // Move and sign-extend low 32 bits
554 movsxd(ADDROFFS_REG_0, eax);
555
556 // Multiply by 16 to be used as an offset later
557 shl(ADDROFFS_REG_0, 4);
558 } else if (swiz.DestComponentEnabled(1)) {
559 // Move and sign-extend high 32 bits
560 shr(rax, 32);
561 movsxd(ADDROFFS_REG_1, eax);
562
563 // Multiply by 16 to be used as an offset later
564 shl(ADDROFFS_REG_1, 4);
565 }
566 }
567}
568
569void JitShader::Compile_MOV(Instruction instr) {
570 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
571 Compile_DestEnable(instr, SRC1);
572}
573
574void JitShader::Compile_RCP(Instruction instr) {
575 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
576
577 // TODO(bunnei): RCPSS is a pretty rough approximation, this might cause problems if Pica
578 // performs this operation more accurately. This should be checked on hardware.
579 rcpss(SRC1, SRC1);
580 shufps(SRC1, SRC1, _MM_SHUFFLE(0, 0, 0, 0)); // XYWZ -> XXXX
581
582 Compile_DestEnable(instr, SRC1);
583}
584
585void JitShader::Compile_RSQ(Instruction instr) {
586 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
587
588 // TODO(bunnei): RSQRTSS is a pretty rough approximation, this might cause problems if Pica
589 // performs this operation more accurately. This should be checked on hardware.
590 rsqrtss(SRC1, SRC1);
591 shufps(SRC1, SRC1, _MM_SHUFFLE(0, 0, 0, 0)); // XYWZ -> XXXX
592
593 Compile_DestEnable(instr, SRC1);
594}
595
596void JitShader::Compile_NOP(Instruction instr) {}
597
598void JitShader::Compile_END(Instruction instr) {
599 ABI_PopRegistersAndAdjustStack(*this, ABI_ALL_CALLEE_SAVED, 8);
600 ret();
601}
602
603void JitShader::Compile_CALL(Instruction instr) {
604 // Push offset of the return
605 push(qword, (instr.flow_control.dest_offset + instr.flow_control.num_instructions));
606
607 // Call the subroutine
608 call(instruction_labels[instr.flow_control.dest_offset]);
609
610 // Skip over the return offset that's on the stack
611 add(rsp, 8);
612}
613
614void JitShader::Compile_CALLC(Instruction instr) {
615 Compile_EvaluateCondition(instr);
616 Label b;
617 jz(b);
618 Compile_CALL(instr);
619 L(b);
620}
621
622void JitShader::Compile_CALLU(Instruction instr) {
623 Compile_UniformCondition(instr);
624 Label b;
625 jz(b);
626 Compile_CALL(instr);
627 L(b);
628}
629
630void JitShader::Compile_CMP(Instruction instr) {
631 using Op = Instruction::Common::CompareOpType::Op;
632 Op op_x = instr.common.compare_op.x;
633 Op op_y = instr.common.compare_op.y;
634
635 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
636 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
637
638 // SSE doesn't have greater-than (GT) or greater-equal (GE) comparison operators. You need to
639 // emulate them by swapping the lhs and rhs and using LT and LE. NLT and NLE can't be used here
640 // because they don't match when used with NaNs.
641 static const u8 cmp[] = {CMP_EQ, CMP_NEQ, CMP_LT, CMP_LE, CMP_LT, CMP_LE};
642
643 bool invert_op_x = (op_x == Op::GreaterThan || op_x == Op::GreaterEqual);
644 Xmm lhs_x = invert_op_x ? SRC2 : SRC1;
645 Xmm rhs_x = invert_op_x ? SRC1 : SRC2;
646
647 if (op_x == op_y) {
648 // Compare X-component and Y-component together
649 cmpps(lhs_x, rhs_x, cmp[op_x]);
650 movq(COND0, lhs_x);
651
652 mov(COND1, COND0);
653 } else {
654 bool invert_op_y = (op_y == Op::GreaterThan || op_y == Op::GreaterEqual);
655 Xmm lhs_y = invert_op_y ? SRC2 : SRC1;
656 Xmm rhs_y = invert_op_y ? SRC1 : SRC2;
657
658 // Compare X-component
659 movaps(SCRATCH, lhs_x);
660 cmpss(SCRATCH, rhs_x, cmp[op_x]);
661
662 // Compare Y-component
663 cmpps(lhs_y, rhs_y, cmp[op_y]);
664
665 movq(COND0, SCRATCH);
666 movq(COND1, lhs_y);
667 }
668
669 shr(COND0.cvt32(), 31); // ignores upper 32 bits in source
670 shr(COND1, 63);
671}
672
673void JitShader::Compile_MAD(Instruction instr) {
674 Compile_SwizzleSrc(instr, 1, instr.mad.src1, SRC1);
675
676 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) {
677 Compile_SwizzleSrc(instr, 2, instr.mad.src2i, SRC2);
678 Compile_SwizzleSrc(instr, 3, instr.mad.src3i, SRC3);
679 } else {
680 Compile_SwizzleSrc(instr, 2, instr.mad.src2, SRC2);
681 Compile_SwizzleSrc(instr, 3, instr.mad.src3, SRC3);
682 }
683
684 Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
685 addps(SRC1, SRC3);
686
687 Compile_DestEnable(instr, SRC1);
688}
689
690void JitShader::Compile_IF(Instruction instr) {
691 Compile_Assert(instr.flow_control.dest_offset >= program_counter,
692 "Backwards if-statements not supported");
693 Label l_else, l_endif;
694
695 // Evaluate the "IF" condition
696 if (instr.opcode.Value() == OpCode::Id::IFU) {
697 Compile_UniformCondition(instr);
698 } else if (instr.opcode.Value() == OpCode::Id::IFC) {
699 Compile_EvaluateCondition(instr);
700 }
701 jz(l_else, T_NEAR);
702
703 // Compile the code that corresponds to the condition evaluating as true
704 Compile_Block(instr.flow_control.dest_offset);
705
706 // If there isn't an "ELSE" condition, we are done here
707 if (instr.flow_control.num_instructions == 0) {
708 L(l_else);
709 return;
710 }
711
712 jmp(l_endif, T_NEAR);
713
714 L(l_else);
715 // This code corresponds to the "ELSE" condition
716 // Comple the code that corresponds to the condition evaluating as false
717 Compile_Block(instr.flow_control.dest_offset + instr.flow_control.num_instructions);
718
719 L(l_endif);
720}
721
722void JitShader::Compile_LOOP(Instruction instr) {
723 Compile_Assert(instr.flow_control.dest_offset >= program_counter,
724 "Backwards loops not supported");
725 Compile_Assert(!looping, "Nested loops not supported");
726
727 looping = true;
728
729 // This decodes the fields from the integer uniform at index instr.flow_control.int_uniform_id.
730 // The Y (LOOPCOUNT_REG) and Z (LOOPINC) component are kept multiplied by 16 (Left shifted by
731 // 4 bits) to be used as an offset into the 16-byte vector registers later
732 size_t offset = ShaderSetup::GetIntUniformOffset(instr.flow_control.int_uniform_id);
733 mov(LOOPCOUNT, dword[SETUP + offset]);
734 mov(LOOPCOUNT_REG, LOOPCOUNT);
735 shr(LOOPCOUNT_REG, 4);
736 and(LOOPCOUNT_REG, 0xFF0); // Y-component is the start
737 mov(LOOPINC, LOOPCOUNT);
738 shr(LOOPINC, 12);
739 and(LOOPINC, 0xFF0); // Z-component is the incrementer
740 movzx(LOOPCOUNT, LOOPCOUNT.cvt8()); // X-component is iteration count
741 add(LOOPCOUNT, 1); // Iteration count is X-component + 1
742
743 Label l_loop_start;
744 L(l_loop_start);
745
746 Compile_Block(instr.flow_control.dest_offset + 1);
747
748 add(LOOPCOUNT_REG, LOOPINC); // Increment LOOPCOUNT_REG by Z-component
749 sub(LOOPCOUNT, 1); // Increment loop count by 1
750 jnz(l_loop_start); // Loop if not equal
751
752 looping = false;
753}
754
755void JitShader::Compile_JMP(Instruction instr) {
756 if (instr.opcode.Value() == OpCode::Id::JMPC)
757 Compile_EvaluateCondition(instr);
758 else if (instr.opcode.Value() == OpCode::Id::JMPU)
759 Compile_UniformCondition(instr);
760 else
761 UNREACHABLE();
762
763 bool inverted_condition =
764 (instr.opcode.Value() == OpCode::Id::JMPU) && (instr.flow_control.num_instructions & 1);
765
766 Label& b = instruction_labels[instr.flow_control.dest_offset];
767 if (inverted_condition) {
768 jz(b, T_NEAR);
769 } else {
770 jnz(b, T_NEAR);
771 }
772}
773
774void JitShader::Compile_Block(unsigned end) {
775 while (program_counter < end) {
776 Compile_NextInstr();
777 }
778}
779
780void JitShader::Compile_Return() {
781 // Peek return offset on the stack and check if we're at that offset
782 mov(rax, qword[rsp + 8]);
783 cmp(eax, (program_counter));
784
785 // If so, jump back to before CALL
786 Label b;
787 jnz(b);
788 ret();
789 L(b);
790}
791
792void JitShader::Compile_NextInstr() {
793 if (std::binary_search(return_offsets.begin(), return_offsets.end(), program_counter)) {
794 Compile_Return();
795 }
796
797 L(instruction_labels[program_counter]);
798
799 Instruction instr = GetVertexShaderInstruction(program_counter++);
800
801 OpCode::Id opcode = instr.opcode.Value();
802 auto instr_func = instr_table[static_cast<unsigned>(opcode)];
803
804 if (instr_func) {
805 // JIT the instruction!
806 ((*this).*instr_func)(instr);
807 } else {
808 // Unhandled instruction
809 LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (0x%08x)",
810 instr.opcode.Value().EffectiveOpCode(), instr.hex);
811 }
812}
813
814void JitShader::FindReturnOffsets() {
815 return_offsets.clear();
816
817 for (size_t offset = 0; offset < g_state.vs.program_code.size(); ++offset) {
818 Instruction instr = GetVertexShaderInstruction(offset);
819
820 switch (instr.opcode.Value()) {
821 case OpCode::Id::CALL:
822 case OpCode::Id::CALLC:
823 case OpCode::Id::CALLU:
824 return_offsets.push_back(instr.flow_control.dest_offset +
825 instr.flow_control.num_instructions);
826 break;
827 default:
828 break;
829 }
830 }
831
832 // Sort for efficient binary search later
833 std::sort(return_offsets.begin(), return_offsets.end());
834}
835
836void JitShader::Compile() {
837 // Reset flow control state
838 program = (CompiledShader*)getCurr();
839 program_counter = 0;
840 looping = false;
841 instruction_labels.fill(Xbyak::Label());
842
843 // Find all `CALL` instructions and identify return locations
844 FindReturnOffsets();
845
846 // The stack pointer is 8 modulo 16 at the entry of a procedure
847 ABI_PushRegistersAndAdjustStack(*this, ABI_ALL_CALLEE_SAVED, 8);
848
849 mov(SETUP, ABI_PARAM1);
850 mov(STATE, ABI_PARAM2);
851
852 // Zero address/loop registers
853 xor(ADDROFFS_REG_0.cvt32(), ADDROFFS_REG_0.cvt32());
854 xor(ADDROFFS_REG_1.cvt32(), ADDROFFS_REG_1.cvt32());
855 xor(LOOPCOUNT_REG, LOOPCOUNT_REG);
856
857 // Used to set a register to one
858 static const __m128 one = {1.f, 1.f, 1.f, 1.f};
859 mov(rax, reinterpret_cast<size_t>(&one));
860 movaps(ONE, xword[rax]);
861
862 // Used to negate registers
863 static const __m128 neg = {-0.f, -0.f, -0.f, -0.f};
864 mov(rax, reinterpret_cast<size_t>(&neg));
865 movaps(NEGBIT, xword[rax]);
866
867 // Jump to start of the shader program
868 jmp(ABI_PARAM3);
869
870 // Compile entire program
871 Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size()));
872
873 // Free memory that's no longer needed
874 return_offsets.clear();
875 return_offsets.shrink_to_fit();
876
877 ready();
878
879 uintptr_t size = reinterpret_cast<uintptr_t>(getCurr()) - reinterpret_cast<uintptr_t>(program);
880 ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!");
881 LOG_DEBUG(HW_GPU, "Compiled shader size=%lu", size);
882}
883
884JitShader::JitShader() : Xbyak::CodeGenerator(MAX_SHADER_SIZE) {}
885
886} // namespace Shader 47} // namespace Shader
887
888} // namespace Pica 48} // namespace Pica
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h
index f37548306..078b2cba5 100644
--- a/src/video_core/shader/shader_jit_x64.h
+++ b/src/video_core/shader/shader_jit_x64.h
@@ -1,121 +1,30 @@
1// Copyright 2015 Citra Emulator Project 1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <memory>
8#include <cstddef> 8#include <unordered_map>
9#include <utility>
10#include <vector>
11#include <nihstro/shader_bytecode.h>
12#include <xbyak.h>
13#include "common/bit_set.h"
14#include "common/common_types.h" 9#include "common/common_types.h"
15#include "common/x64/emitter.h"
16#include "video_core/shader/shader.h" 10#include "video_core/shader/shader.h"
17 11
18using nihstro::Instruction;
19using nihstro::OpCode;
20using nihstro::SwizzlePattern;
21
22namespace Pica { 12namespace Pica {
23
24namespace Shader { 13namespace Shader {
25 14
26/// Memory allocated for each compiled shader (64Kb) 15class JitShader;
27constexpr size_t MAX_SHADER_SIZE = 1024 * 64;
28 16
29/** 17class JitX64Engine final : public ShaderEngine {
30 * This class implements the shader JIT compiler. It recompiles a Pica shader program into x86_64
31 * code that can be executed on the host machine directly.
32 */
33class JitShader : public Xbyak::CodeGenerator {
34public: 18public:
35 JitShader(); 19 JitX64Engine();
36 20 ~JitX64Engine() override;
37 void Run(const ShaderSetup& setup, UnitState& state, unsigned offset) const {
38 program(&setup, &state, instruction_labels[offset].getAddress());
39 }
40
41 void Compile();
42 21
43 void Compile_ADD(Instruction instr); 22 void SetupBatch(ShaderSetup& setup, unsigned int entry_point) override;
44 void Compile_DP3(Instruction instr); 23 void Run(const ShaderSetup& setup, UnitState& state) const override;
45 void Compile_DP4(Instruction instr);
46 void Compile_DPH(Instruction instr);
47 void Compile_EX2(Instruction instr);
48 void Compile_LG2(Instruction instr);
49 void Compile_MUL(Instruction instr);
50 void Compile_SGE(Instruction instr);
51 void Compile_SLT(Instruction instr);
52 void Compile_FLR(Instruction instr);
53 void Compile_MAX(Instruction instr);
54 void Compile_MIN(Instruction instr);
55 void Compile_RCP(Instruction instr);
56 void Compile_RSQ(Instruction instr);
57 void Compile_MOVA(Instruction instr);
58 void Compile_MOV(Instruction instr);
59 void Compile_NOP(Instruction instr);
60 void Compile_END(Instruction instr);
61 void Compile_CALL(Instruction instr);
62 void Compile_CALLC(Instruction instr);
63 void Compile_CALLU(Instruction instr);
64 void Compile_IF(Instruction instr);
65 void Compile_LOOP(Instruction instr);
66 void Compile_JMP(Instruction instr);
67 void Compile_CMP(Instruction instr);
68 void Compile_MAD(Instruction instr);
69 24
70private: 25private:
71 void Compile_Block(unsigned end); 26 std::unordered_map<u64, std::unique_ptr<JitShader>> cache;
72 void Compile_NextInstr();
73
74 void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg,
75 Xbyak::Xmm dest);
76 void Compile_DestEnable(Instruction instr, Xbyak::Xmm dest);
77
78 /**
79 * Compiles a `MUL src1, src2` operation, properly handling the PICA semantics when multiplying
80 * zero by inf. Clobbers `src2` and `scratch`.
81 */
82 void Compile_SanitizedMul(Xbyak::Xmm src1, Xbyak::Xmm src2, Xbyak::Xmm scratch);
83
84 void Compile_EvaluateCondition(Instruction instr);
85 void Compile_UniformCondition(Instruction instr);
86
87 /**
88 * Emits the code to conditionally return from a subroutine envoked by the `CALL` instruction.
89 */
90 void Compile_Return();
91
92 BitSet32 PersistentCallerSavedRegs();
93
94 /**
95 * Assertion evaluated at compile-time, but only triggered if executed at runtime.
96 * @param msg Message to be logged if the assertion fails.
97 */
98 void Compile_Assert(bool condition, const char* msg);
99
100 /**
101 * Analyzes the entire shader program for `CALL` instructions before emitting any code,
102 * identifying the locations where a return needs to be inserted.
103 */
104 void FindReturnOffsets();
105
106 /// Mapping of Pica VS instructions to pointers in the emitted code
107 std::array<Xbyak::Label, 1024> instruction_labels;
108
109 /// Offsets in code where a return needs to be inserted
110 std::vector<unsigned> return_offsets;
111
112 unsigned program_counter = 0; ///< Offset of the next instruction to decode
113 bool looping = false; ///< True if compiling a loop, used to check for nested loops
114
115 using CompiledShader = void(const void* setup, void* state, const u8* start_addr);
116 CompiledShader* program = nullptr;
117}; 27};
118 28
119} // Shader 29} // namespace Shader
120 30} // namespace Pica
121} // Pica
diff --git a/src/video_core/shader/shader_jit_x64_compiler.cpp b/src/video_core/shader/shader_jit_x64_compiler.cpp
new file mode 100644
index 000000000..2dbc8b147
--- /dev/null
+++ b/src/video_core/shader/shader_jit_x64_compiler.cpp
@@ -0,0 +1,897 @@
1// Copyright 2015 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <cmath>
7#include <cstdint>
8#include <nihstro/shader_bytecode.h>
9#include <smmintrin.h>
10#include <xmmintrin.h>
11#include "common/assert.h"
12#include "common/logging/log.h"
13#include "common/vector_math.h"
14#include "common/x64/cpu_detect.h"
15#include "common/x64/xbyak_abi.h"
16#include "common/x64/xbyak_util.h"
17#include "video_core/pica_state.h"
18#include "video_core/pica_types.h"
19#include "video_core/shader/shader.h"
20#include "video_core/shader/shader_jit_x64_compiler.h"
21
22using namespace Common::X64;
23using namespace Xbyak::util;
24using Xbyak::Label;
25using Xbyak::Reg32;
26using Xbyak::Reg64;
27using Xbyak::Xmm;
28
29namespace Pica {
30
31namespace Shader {
32
33typedef void (JitShader::*JitFunction)(Instruction instr);
34
35const JitFunction instr_table[64] = {
36 &JitShader::Compile_ADD, // add
37 &JitShader::Compile_DP3, // dp3
38 &JitShader::Compile_DP4, // dp4
39 &JitShader::Compile_DPH, // dph
40 nullptr, // unknown
41 &JitShader::Compile_EX2, // ex2
42 &JitShader::Compile_LG2, // lg2
43 nullptr, // unknown
44 &JitShader::Compile_MUL, // mul
45 &JitShader::Compile_SGE, // sge
46 &JitShader::Compile_SLT, // slt
47 &JitShader::Compile_FLR, // flr
48 &JitShader::Compile_MAX, // max
49 &JitShader::Compile_MIN, // min
50 &JitShader::Compile_RCP, // rcp
51 &JitShader::Compile_RSQ, // rsq
52 nullptr, // unknown
53 nullptr, // unknown
54 &JitShader::Compile_MOVA, // mova
55 &JitShader::Compile_MOV, // mov
56 nullptr, // unknown
57 nullptr, // unknown
58 nullptr, // unknown
59 nullptr, // unknown
60 &JitShader::Compile_DPH, // dphi
61 nullptr, // unknown
62 &JitShader::Compile_SGE, // sgei
63 &JitShader::Compile_SLT, // slti
64 nullptr, // unknown
65 nullptr, // unknown
66 nullptr, // unknown
67 nullptr, // unknown
68 nullptr, // unknown
69 &JitShader::Compile_NOP, // nop
70 &JitShader::Compile_END, // end
71 nullptr, // break
72 &JitShader::Compile_CALL, // call
73 &JitShader::Compile_CALLC, // callc
74 &JitShader::Compile_CALLU, // callu
75 &JitShader::Compile_IF, // ifu
76 &JitShader::Compile_IF, // ifc
77 &JitShader::Compile_LOOP, // loop
78 nullptr, // emit
79 nullptr, // sete
80 &JitShader::Compile_JMP, // jmpc
81 &JitShader::Compile_JMP, // jmpu
82 &JitShader::Compile_CMP, // cmp
83 &JitShader::Compile_CMP, // cmp
84 &JitShader::Compile_MAD, // madi
85 &JitShader::Compile_MAD, // madi
86 &JitShader::Compile_MAD, // madi
87 &JitShader::Compile_MAD, // madi
88 &JitShader::Compile_MAD, // madi
89 &JitShader::Compile_MAD, // madi
90 &JitShader::Compile_MAD, // madi
91 &JitShader::Compile_MAD, // madi
92 &JitShader::Compile_MAD, // mad
93 &JitShader::Compile_MAD, // mad
94 &JitShader::Compile_MAD, // mad
95 &JitShader::Compile_MAD, // mad
96 &JitShader::Compile_MAD, // mad
97 &JitShader::Compile_MAD, // mad
98 &JitShader::Compile_MAD, // mad
99 &JitShader::Compile_MAD, // mad
100};
101
102// The following is used to alias some commonly used registers. Generally, RAX-RDX and XMM0-XMM3 can
103// be used as scratch registers within a compiler function. The other registers have designated
104// purposes, as documented below:
105
106/// Pointer to the uniform memory
107static const Reg64 SETUP = r9;
108/// The two 32-bit VS address offset registers set by the MOVA instruction
109static const Reg64 ADDROFFS_REG_0 = r10;
110static const Reg64 ADDROFFS_REG_1 = r11;
111/// VS loop count register (Multiplied by 16)
112static const Reg32 LOOPCOUNT_REG = r12d;
113/// Current VS loop iteration number (we could probably use LOOPCOUNT_REG, but this quicker)
114static const Reg32 LOOPCOUNT = esi;
115/// Number to increment LOOPCOUNT_REG by on each loop iteration (Multiplied by 16)
116static const Reg32 LOOPINC = edi;
117/// Result of the previous CMP instruction for the X-component comparison
118static const Reg64 COND0 = r13;
119/// Result of the previous CMP instruction for the Y-component comparison
120static const Reg64 COND1 = r14;
121/// Pointer to the UnitState instance for the current VS unit
122static const Reg64 STATE = r15;
123/// SIMD scratch register
124static const Xmm SCRATCH = xmm0;
125/// Loaded with the first swizzled source register, otherwise can be used as a scratch register
126static const Xmm SRC1 = xmm1;
127/// Loaded with the second swizzled source register, otherwise can be used as a scratch register
128static const Xmm SRC2 = xmm2;
129/// Loaded with the third swizzled source register, otherwise can be used as a scratch register
130static const Xmm SRC3 = xmm3;
131/// Additional scratch register
132static const Xmm SCRATCH2 = xmm4;
133/// Constant vector of [1.0f, 1.0f, 1.0f, 1.0f], used to efficiently set a vector to one
134static const Xmm ONE = xmm14;
135/// Constant vector of [-0.f, -0.f, -0.f, -0.f], used to efficiently negate a vector with XOR
136static const Xmm NEGBIT = xmm15;
137
138// State registers that must not be modified by external functions calls
139// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
140static const BitSet32 persistent_regs = BuildRegSet({
141 // Pointers to register blocks
142 SETUP, STATE,
143 // Cached registers
144 ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1,
145 // Constants
146 ONE, NEGBIT,
147 // Loop variables
148 LOOPCOUNT, LOOPINC,
149});
150
151/// Raw constant for the source register selector that indicates no swizzling is performed
152static const u8 NO_SRC_REG_SWIZZLE = 0x1b;
153/// Raw constant for the destination register enable mask that indicates all components are enabled
154static const u8 NO_DEST_REG_MASK = 0xf;
155
156static void LogCritical(const char* msg) {
157 LOG_CRITICAL(HW_GPU, "%s", msg);
158}
159
160void JitShader::Compile_Assert(bool condition, const char* msg) {
161 if (!condition) {
162 mov(ABI_PARAM1, reinterpret_cast<size_t>(msg));
163 CallFarFunction(*this, LogCritical);
164 }
165}
166
167/**
168 * Loads and swizzles a source register into the specified XMM register.
169 * @param instr VS instruction, used for determining how to load the source register
170 * @param src_num Number indicating which source register to load (1 = src1, 2 = src2, 3 = src3)
171 * @param src_reg SourceRegister object corresponding to the source register to load
172 * @param dest Destination XMM register to store the loaded, swizzled source register
173 */
174void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg,
175 Xmm dest) {
176 Reg64 src_ptr;
177 size_t src_offset;
178
179 if (src_reg.GetRegisterType() == RegisterType::FloatUniform) {
180 src_ptr = SETUP;
181 src_offset = ShaderSetup::GetFloatUniformOffset(src_reg.GetIndex());
182 } else {
183 src_ptr = STATE;
184 src_offset = UnitState::InputOffset(src_reg);
185 }
186
187 int src_offset_disp = (int)src_offset;
188 ASSERT_MSG(src_offset == src_offset_disp, "Source register offset too large for int type");
189
190 unsigned operand_desc_id;
191
192 const bool is_inverted =
193 (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed));
194
195 unsigned address_register_index;
196 unsigned offset_src;
197
198 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD ||
199 instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) {
200 operand_desc_id = instr.mad.operand_desc_id;
201 offset_src = is_inverted ? 3 : 2;
202 address_register_index = instr.mad.address_register_index;
203 } else {
204 operand_desc_id = instr.common.operand_desc_id;
205 offset_src = is_inverted ? 2 : 1;
206 address_register_index = instr.common.address_register_index;
207 }
208
209 if (src_num == offset_src && address_register_index != 0) {
210 switch (address_register_index) {
211 case 1: // address offset 1
212 movaps(dest, xword[src_ptr + ADDROFFS_REG_0 + src_offset_disp]);
213 break;
214 case 2: // address offset 2
215 movaps(dest, xword[src_ptr + ADDROFFS_REG_1 + src_offset_disp]);
216 break;
217 case 3: // address offset 3
218 movaps(dest, xword[src_ptr + LOOPCOUNT_REG.cvt64() + src_offset_disp]);
219 break;
220 default:
221 UNREACHABLE();
222 break;
223 }
224 } else {
225 // Load the source
226 movaps(dest, xword[src_ptr + src_offset_disp]);
227 }
228
229 SwizzlePattern swiz = {(*swizzle_data)[operand_desc_id]};
230
231 // Generate instructions for source register swizzling as needed
232 u8 sel = swiz.GetRawSelector(src_num);
233 if (sel != NO_SRC_REG_SWIZZLE) {
234 // Selector component order needs to be reversed for the SHUFPS instruction
235 sel = ((sel & 0xc0) >> 6) | ((sel & 3) << 6) | ((sel & 0xc) << 2) | ((sel & 0x30) >> 2);
236
237 // Shuffle inputs for swizzle
238 shufps(dest, dest, sel);
239 }
240
241 // If the source register should be negated, flip the negative bit using XOR
242 const bool negate[] = {swiz.negate_src1, swiz.negate_src2, swiz.negate_src3};
243 if (negate[src_num - 1]) {
244 xorps(dest, NEGBIT);
245 }
246}
247
248void JitShader::Compile_DestEnable(Instruction instr, Xmm src) {
249 DestRegister dest;
250 unsigned operand_desc_id;
251 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD ||
252 instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) {
253 operand_desc_id = instr.mad.operand_desc_id;
254 dest = instr.mad.dest.Value();
255 } else {
256 operand_desc_id = instr.common.operand_desc_id;
257 dest = instr.common.dest.Value();
258 }
259
260 SwizzlePattern swiz = {(*swizzle_data)[operand_desc_id]};
261
262 size_t dest_offset_disp = UnitState::OutputOffset(dest);
263
264 // If all components are enabled, write the result to the destination register
265 if (swiz.dest_mask == NO_DEST_REG_MASK) {
266 // Store dest back to memory
267 movaps(xword[STATE + dest_offset_disp], src);
268
269 } else {
270 // Not all components are enabled, so mask the result when storing to the destination
271 // register...
272 movaps(SCRATCH, xword[STATE + dest_offset_disp]);
273
274 if (Common::GetCPUCaps().sse4_1) {
275 u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) |
276 ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1);
277 blendps(SCRATCH, src, mask);
278 } else {
279 movaps(SCRATCH2, src);
280 unpckhps(SCRATCH2, SCRATCH); // Unpack X/Y components of source and destination
281 unpcklps(SCRATCH, src); // Unpack Z/W components of source and destination
282
283 // Compute selector to selectively copy source components to destination for SHUFPS
284 // instruction
285 u8 sel = ((swiz.DestComponentEnabled(0) ? 1 : 0) << 0) |
286 ((swiz.DestComponentEnabled(1) ? 3 : 2) << 2) |
287 ((swiz.DestComponentEnabled(2) ? 0 : 1) << 4) |
288 ((swiz.DestComponentEnabled(3) ? 2 : 3) << 6);
289 shufps(SCRATCH, SCRATCH2, sel);
290 }
291
292 // Store dest back to memory
293 movaps(xword[STATE + dest_offset_disp], SCRATCH);
294 }
295}
296
297void JitShader::Compile_SanitizedMul(Xmm src1, Xmm src2, Xmm scratch) {
298 // 0 * inf and inf * 0 in the PICA should return 0 instead of NaN. This can be implemented by
299 // checking for NaNs before and after the multiplication. If the multiplication result is NaN
300 // where neither source was, this NaN was generated by a 0 * inf multiplication, and so the
301 // result should be transformed to 0 to match PICA fp rules.
302
303 // Set scratch to mask of (src1 != NaN and src2 != NaN)
304 movaps(scratch, src1);
305 cmpordps(scratch, src2);
306
307 mulps(src1, src2);
308
309 // Set src2 to mask of (result == NaN)
310 movaps(src2, src1);
311 cmpunordps(src2, src2);
312
313 // Clear components where scratch != src2 (i.e. if result is NaN where neither source was NaN)
314 xorps(scratch, src2);
315 andps(src1, scratch);
316}
317
318void JitShader::Compile_EvaluateCondition(Instruction instr) {
319 // Note: NXOR is used below to check for equality
320 switch (instr.flow_control.op) {
321 case Instruction::FlowControlType::Or:
322 mov(eax, COND0);
323 mov(ebx, COND1);
324 xor(eax, (instr.flow_control.refx.Value() ^ 1));
325 xor(ebx, (instr.flow_control.refy.Value() ^ 1));
326 or (eax, ebx);
327 break;
328
329 case Instruction::FlowControlType::And:
330 mov(eax, COND0);
331 mov(ebx, COND1);
332 xor(eax, (instr.flow_control.refx.Value() ^ 1));
333 xor(ebx, (instr.flow_control.refy.Value() ^ 1));
334 and(eax, ebx);
335 break;
336
337 case Instruction::FlowControlType::JustX:
338 mov(eax, COND0);
339 xor(eax, (instr.flow_control.refx.Value() ^ 1));
340 break;
341
342 case Instruction::FlowControlType::JustY:
343 mov(eax, COND1);
344 xor(eax, (instr.flow_control.refy.Value() ^ 1));
345 break;
346 }
347}
348
349void JitShader::Compile_UniformCondition(Instruction instr) {
350 size_t offset = ShaderSetup::GetBoolUniformOffset(instr.flow_control.bool_uniform_id);
351 cmp(byte[SETUP + offset], 0);
352}
353
354BitSet32 JitShader::PersistentCallerSavedRegs() {
355 return persistent_regs & ABI_ALL_CALLER_SAVED;
356}
357
358void JitShader::Compile_ADD(Instruction instr) {
359 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
360 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
361 addps(SRC1, SRC2);
362 Compile_DestEnable(instr, SRC1);
363}
364
365void JitShader::Compile_DP3(Instruction instr) {
366 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
367 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
368
369 Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
370
371 movaps(SRC2, SRC1);
372 shufps(SRC2, SRC2, _MM_SHUFFLE(1, 1, 1, 1));
373
374 movaps(SRC3, SRC1);
375 shufps(SRC3, SRC3, _MM_SHUFFLE(2, 2, 2, 2));
376
377 shufps(SRC1, SRC1, _MM_SHUFFLE(0, 0, 0, 0));
378 addps(SRC1, SRC2);
379 addps(SRC1, SRC3);
380
381 Compile_DestEnable(instr, SRC1);
382}
383
384void JitShader::Compile_DP4(Instruction instr) {
385 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
386 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
387
388 Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
389
390 movaps(SRC2, SRC1);
391 shufps(SRC1, SRC1, _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
392 addps(SRC1, SRC2);
393
394 movaps(SRC2, SRC1);
395 shufps(SRC1, SRC1, _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
396 addps(SRC1, SRC2);
397
398 Compile_DestEnable(instr, SRC1);
399}
400
401void JitShader::Compile_DPH(Instruction instr) {
402 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::DPHI) {
403 Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
404 Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
405 } else {
406 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
407 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
408 }
409
410 if (Common::GetCPUCaps().sse4_1) {
411 // Set 4th component to 1.0
412 blendps(SRC1, ONE, 0b1000);
413 } else {
414 // Set 4th component to 1.0
415 movaps(SCRATCH, SRC1);
416 unpckhps(SCRATCH, ONE); // XYZW, 1111 -> Z1__
417 unpcklpd(SRC1, SCRATCH); // XYZW, Z1__ -> XYZ1
418 }
419
420 Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
421
422 movaps(SRC2, SRC1);
423 shufps(SRC1, SRC1, _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
424 addps(SRC1, SRC2);
425
426 movaps(SRC2, SRC1);
427 shufps(SRC1, SRC1, _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
428 addps(SRC1, SRC2);
429
430 Compile_DestEnable(instr, SRC1);
431}
432
433void JitShader::Compile_EX2(Instruction instr) {
434 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
435 movss(xmm0, SRC1); // ABI_PARAM1
436
437 ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
438 CallFarFunction(*this, exp2f);
439 ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
440
441 shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); // ABI_RETURN
442 movaps(SRC1, xmm0);
443 Compile_DestEnable(instr, SRC1);
444}
445
446void JitShader::Compile_LG2(Instruction instr) {
447 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
448 movss(xmm0, SRC1); // ABI_PARAM1
449
450 ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
451 CallFarFunction(*this, log2f);
452 ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
453
454 shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); // ABI_RETURN
455 movaps(SRC1, xmm0);
456 Compile_DestEnable(instr, SRC1);
457}
458
459void JitShader::Compile_MUL(Instruction instr) {
460 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
461 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
462 Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
463 Compile_DestEnable(instr, SRC1);
464}
465
466void JitShader::Compile_SGE(Instruction instr) {
467 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SGEI) {
468 Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
469 Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
470 } else {
471 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
472 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
473 }
474
475 cmpleps(SRC2, SRC1);
476 andps(SRC2, ONE);
477
478 Compile_DestEnable(instr, SRC2);
479}
480
481void JitShader::Compile_SLT(Instruction instr) {
482 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SLTI) {
483 Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
484 Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
485 } else {
486 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
487 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
488 }
489
490 cmpltps(SRC1, SRC2);
491 andps(SRC1, ONE);
492
493 Compile_DestEnable(instr, SRC1);
494}
495
496void JitShader::Compile_FLR(Instruction instr) {
497 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
498
499 if (Common::GetCPUCaps().sse4_1) {
500 roundps(SRC1, SRC1, _MM_FROUND_FLOOR);
501 } else {
502 cvttps2dq(SRC1, SRC1);
503 cvtdq2ps(SRC1, SRC1);
504 }
505
506 Compile_DestEnable(instr, SRC1);
507}
508
509void JitShader::Compile_MAX(Instruction instr) {
510 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
511 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
512 // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned.
513 maxps(SRC1, SRC2);
514 Compile_DestEnable(instr, SRC1);
515}
516
517void JitShader::Compile_MIN(Instruction instr) {
518 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
519 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
520 // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned.
521 minps(SRC1, SRC2);
522 Compile_DestEnable(instr, SRC1);
523}
524
525void JitShader::Compile_MOVA(Instruction instr) {
526 SwizzlePattern swiz = {(*swizzle_data)[instr.common.operand_desc_id]};
527
528 if (!swiz.DestComponentEnabled(0) && !swiz.DestComponentEnabled(1)) {
529 return; // NoOp
530 }
531
532 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
533
534 // Convert floats to integers using truncation (only care about X and Y components)
535 cvttps2dq(SRC1, SRC1);
536
537 // Get result
538 movq(rax, SRC1);
539
540 // Handle destination enable
541 if (swiz.DestComponentEnabled(0) && swiz.DestComponentEnabled(1)) {
542 // Move and sign-extend low 32 bits
543 movsxd(ADDROFFS_REG_0, eax);
544
545 // Move and sign-extend high 32 bits
546 shr(rax, 32);
547 movsxd(ADDROFFS_REG_1, eax);
548
549 // Multiply by 16 to be used as an offset later
550 shl(ADDROFFS_REG_0, 4);
551 shl(ADDROFFS_REG_1, 4);
552 } else {
553 if (swiz.DestComponentEnabled(0)) {
554 // Move and sign-extend low 32 bits
555 movsxd(ADDROFFS_REG_0, eax);
556
557 // Multiply by 16 to be used as an offset later
558 shl(ADDROFFS_REG_0, 4);
559 } else if (swiz.DestComponentEnabled(1)) {
560 // Move and sign-extend high 32 bits
561 shr(rax, 32);
562 movsxd(ADDROFFS_REG_1, eax);
563
564 // Multiply by 16 to be used as an offset later
565 shl(ADDROFFS_REG_1, 4);
566 }
567 }
568}
569
570void JitShader::Compile_MOV(Instruction instr) {
571 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
572 Compile_DestEnable(instr, SRC1);
573}
574
575void JitShader::Compile_RCP(Instruction instr) {
576 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
577
578 // TODO(bunnei): RCPSS is a pretty rough approximation, this might cause problems if Pica
579 // performs this operation more accurately. This should be checked on hardware.
580 rcpss(SRC1, SRC1);
581 shufps(SRC1, SRC1, _MM_SHUFFLE(0, 0, 0, 0)); // XYWZ -> XXXX
582
583 Compile_DestEnable(instr, SRC1);
584}
585
586void JitShader::Compile_RSQ(Instruction instr) {
587 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
588
589 // TODO(bunnei): RSQRTSS is a pretty rough approximation, this might cause problems if Pica
590 // performs this operation more accurately. This should be checked on hardware.
591 rsqrtss(SRC1, SRC1);
592 shufps(SRC1, SRC1, _MM_SHUFFLE(0, 0, 0, 0)); // XYWZ -> XXXX
593
594 Compile_DestEnable(instr, SRC1);
595}
596
597void JitShader::Compile_NOP(Instruction instr) {}
598
599void JitShader::Compile_END(Instruction instr) {
600 ABI_PopRegistersAndAdjustStack(*this, ABI_ALL_CALLEE_SAVED, 8, 16);
601 ret();
602}
603
604void JitShader::Compile_CALL(Instruction instr) {
605 // Push offset of the return
606 push(qword, (instr.flow_control.dest_offset + instr.flow_control.num_instructions));
607
608 // Call the subroutine
609 call(instruction_labels[instr.flow_control.dest_offset]);
610
611 // Skip over the return offset that's on the stack
612 add(rsp, 8);
613}
614
615void JitShader::Compile_CALLC(Instruction instr) {
616 Compile_EvaluateCondition(instr);
617 Label b;
618 jz(b);
619 Compile_CALL(instr);
620 L(b);
621}
622
623void JitShader::Compile_CALLU(Instruction instr) {
624 Compile_UniformCondition(instr);
625 Label b;
626 jz(b);
627 Compile_CALL(instr);
628 L(b);
629}
630
631void JitShader::Compile_CMP(Instruction instr) {
632 using Op = Instruction::Common::CompareOpType::Op;
633 Op op_x = instr.common.compare_op.x;
634 Op op_y = instr.common.compare_op.y;
635
636 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
637 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
638
639 // SSE doesn't have greater-than (GT) or greater-equal (GE) comparison operators. You need to
640 // emulate them by swapping the lhs and rhs and using LT and LE. NLT and NLE can't be used here
641 // because they don't match when used with NaNs.
642 static const u8 cmp[] = {CMP_EQ, CMP_NEQ, CMP_LT, CMP_LE, CMP_LT, CMP_LE};
643
644 bool invert_op_x = (op_x == Op::GreaterThan || op_x == Op::GreaterEqual);
645 Xmm lhs_x = invert_op_x ? SRC2 : SRC1;
646 Xmm rhs_x = invert_op_x ? SRC1 : SRC2;
647
648 if (op_x == op_y) {
649 // Compare X-component and Y-component together
650 cmpps(lhs_x, rhs_x, cmp[op_x]);
651 movq(COND0, lhs_x);
652
653 mov(COND1, COND0);
654 } else {
655 bool invert_op_y = (op_y == Op::GreaterThan || op_y == Op::GreaterEqual);
656 Xmm lhs_y = invert_op_y ? SRC2 : SRC1;
657 Xmm rhs_y = invert_op_y ? SRC1 : SRC2;
658
659 // Compare X-component
660 movaps(SCRATCH, lhs_x);
661 cmpss(SCRATCH, rhs_x, cmp[op_x]);
662
663 // Compare Y-component
664 cmpps(lhs_y, rhs_y, cmp[op_y]);
665
666 movq(COND0, SCRATCH);
667 movq(COND1, lhs_y);
668 }
669
670 shr(COND0.cvt32(), 31); // ignores upper 32 bits in source
671 shr(COND1, 63);
672}
673
674void JitShader::Compile_MAD(Instruction instr) {
675 Compile_SwizzleSrc(instr, 1, instr.mad.src1, SRC1);
676
677 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) {
678 Compile_SwizzleSrc(instr, 2, instr.mad.src2i, SRC2);
679 Compile_SwizzleSrc(instr, 3, instr.mad.src3i, SRC3);
680 } else {
681 Compile_SwizzleSrc(instr, 2, instr.mad.src2, SRC2);
682 Compile_SwizzleSrc(instr, 3, instr.mad.src3, SRC3);
683 }
684
685 Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
686 addps(SRC1, SRC3);
687
688 Compile_DestEnable(instr, SRC1);
689}
690
691void JitShader::Compile_IF(Instruction instr) {
692 Compile_Assert(instr.flow_control.dest_offset >= program_counter,
693 "Backwards if-statements not supported");
694 Label l_else, l_endif;
695
696 // Evaluate the "IF" condition
697 if (instr.opcode.Value() == OpCode::Id::IFU) {
698 Compile_UniformCondition(instr);
699 } else if (instr.opcode.Value() == OpCode::Id::IFC) {
700 Compile_EvaluateCondition(instr);
701 }
702 jz(l_else, T_NEAR);
703
704 // Compile the code that corresponds to the condition evaluating as true
705 Compile_Block(instr.flow_control.dest_offset);
706
707 // If there isn't an "ELSE" condition, we are done here
708 if (instr.flow_control.num_instructions == 0) {
709 L(l_else);
710 return;
711 }
712
713 jmp(l_endif, T_NEAR);
714
715 L(l_else);
716 // This code corresponds to the "ELSE" condition
717 // Comple the code that corresponds to the condition evaluating as false
718 Compile_Block(instr.flow_control.dest_offset + instr.flow_control.num_instructions);
719
720 L(l_endif);
721}
722
723void JitShader::Compile_LOOP(Instruction instr) {
724 Compile_Assert(instr.flow_control.dest_offset >= program_counter,
725 "Backwards loops not supported");
726 Compile_Assert(!looping, "Nested loops not supported");
727
728 looping = true;
729
730 // This decodes the fields from the integer uniform at index instr.flow_control.int_uniform_id.
731 // The Y (LOOPCOUNT_REG) and Z (LOOPINC) component are kept multiplied by 16 (Left shifted by
732 // 4 bits) to be used as an offset into the 16-byte vector registers later
733 size_t offset = ShaderSetup::GetIntUniformOffset(instr.flow_control.int_uniform_id);
734 mov(LOOPCOUNT, dword[SETUP + offset]);
735 mov(LOOPCOUNT_REG, LOOPCOUNT);
736 shr(LOOPCOUNT_REG, 4);
737 and(LOOPCOUNT_REG, 0xFF0); // Y-component is the start
738 mov(LOOPINC, LOOPCOUNT);
739 shr(LOOPINC, 12);
740 and(LOOPINC, 0xFF0); // Z-component is the incrementer
741 movzx(LOOPCOUNT, LOOPCOUNT.cvt8()); // X-component is iteration count
742 add(LOOPCOUNT, 1); // Iteration count is X-component + 1
743
744 Label l_loop_start;
745 L(l_loop_start);
746
747 Compile_Block(instr.flow_control.dest_offset + 1);
748
749 add(LOOPCOUNT_REG, LOOPINC); // Increment LOOPCOUNT_REG by Z-component
750 sub(LOOPCOUNT, 1); // Increment loop count by 1
751 jnz(l_loop_start); // Loop if not equal
752
753 looping = false;
754}
755
756void JitShader::Compile_JMP(Instruction instr) {
757 if (instr.opcode.Value() == OpCode::Id::JMPC)
758 Compile_EvaluateCondition(instr);
759 else if (instr.opcode.Value() == OpCode::Id::JMPU)
760 Compile_UniformCondition(instr);
761 else
762 UNREACHABLE();
763
764 bool inverted_condition =
765 (instr.opcode.Value() == OpCode::Id::JMPU) && (instr.flow_control.num_instructions & 1);
766
767 Label& b = instruction_labels[instr.flow_control.dest_offset];
768 if (inverted_condition) {
769 jz(b, T_NEAR);
770 } else {
771 jnz(b, T_NEAR);
772 }
773}
774
775void JitShader::Compile_Block(unsigned end) {
776 while (program_counter < end) {
777 Compile_NextInstr();
778 }
779}
780
781void JitShader::Compile_Return() {
782 // Peek return offset on the stack and check if we're at that offset
783 mov(rax, qword[rsp + 8]);
784 cmp(eax, (program_counter));
785
786 // If so, jump back to before CALL
787 Label b;
788 jnz(b);
789 ret();
790 L(b);
791}
792
793void JitShader::Compile_NextInstr() {
794 if (std::binary_search(return_offsets.begin(), return_offsets.end(), program_counter)) {
795 Compile_Return();
796 }
797
798 L(instruction_labels[program_counter]);
799
800 Instruction instr = {(*program_code)[program_counter++]};
801
802 OpCode::Id opcode = instr.opcode.Value();
803 auto instr_func = instr_table[static_cast<unsigned>(opcode)];
804
805 if (instr_func) {
806 // JIT the instruction!
807 ((*this).*instr_func)(instr);
808 } else {
809 // Unhandled instruction
810 LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (0x%08x)",
811 instr.opcode.Value().EffectiveOpCode(), instr.hex);
812 }
813}
814
815void JitShader::FindReturnOffsets() {
816 return_offsets.clear();
817
818 for (size_t offset = 0; offset < program_code->size(); ++offset) {
819 Instruction instr = {(*program_code)[offset]};
820
821 switch (instr.opcode.Value()) {
822 case OpCode::Id::CALL:
823 case OpCode::Id::CALLC:
824 case OpCode::Id::CALLU:
825 return_offsets.push_back(instr.flow_control.dest_offset +
826 instr.flow_control.num_instructions);
827 break;
828 default:
829 break;
830 }
831 }
832
833 // Sort for efficient binary search later
834 std::sort(return_offsets.begin(), return_offsets.end());
835}
836
837void JitShader::Compile(const std::array<u32, 1024>* program_code_,
838 const std::array<u32, 1024>* swizzle_data_) {
839 program_code = program_code_;
840 swizzle_data = swizzle_data_;
841
842 // Reset flow control state
843 program = (CompiledShader*)getCurr();
844 program_counter = 0;
845 looping = false;
846 instruction_labels.fill(Xbyak::Label());
847
848 // Find all `CALL` instructions and identify return locations
849 FindReturnOffsets();
850
851 // The stack pointer is 8 modulo 16 at the entry of a procedure
852 // We reserve 16 bytes and assign a dummy value to the first 8 bytes, to catch any potential
853 // return checks (see Compile_Return) that happen in shader main routine.
854 ABI_PushRegistersAndAdjustStack(*this, ABI_ALL_CALLEE_SAVED, 8, 16);
855 mov(qword[rsp + 8], 0xFFFFFFFFFFFFFFFFULL);
856
857 mov(SETUP, ABI_PARAM1);
858 mov(STATE, ABI_PARAM2);
859
860 // Zero address/loop registers
861 xor(ADDROFFS_REG_0.cvt32(), ADDROFFS_REG_0.cvt32());
862 xor(ADDROFFS_REG_1.cvt32(), ADDROFFS_REG_1.cvt32());
863 xor(LOOPCOUNT_REG, LOOPCOUNT_REG);
864
865 // Used to set a register to one
866 static const __m128 one = {1.f, 1.f, 1.f, 1.f};
867 mov(rax, reinterpret_cast<size_t>(&one));
868 movaps(ONE, xword[rax]);
869
870 // Used to negate registers
871 static const __m128 neg = {-0.f, -0.f, -0.f, -0.f};
872 mov(rax, reinterpret_cast<size_t>(&neg));
873 movaps(NEGBIT, xword[rax]);
874
875 // Jump to start of the shader program
876 jmp(ABI_PARAM3);
877
878 // Compile entire program
879 Compile_Block(static_cast<unsigned>(program_code->size()));
880
881 // Free memory that's no longer needed
882 program_code = nullptr;
883 swizzle_data = nullptr;
884 return_offsets.clear();
885 return_offsets.shrink_to_fit();
886
887 ready();
888
889 ASSERT_MSG(getSize() <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!");
890 LOG_DEBUG(HW_GPU, "Compiled shader size=%lu", getSize());
891}
892
893JitShader::JitShader() : Xbyak::CodeGenerator(MAX_SHADER_SIZE) {}
894
895} // namespace Shader
896
897} // namespace Pica
diff --git a/src/video_core/shader/shader_jit_x64_compiler.h b/src/video_core/shader/shader_jit_x64_compiler.h
new file mode 100644
index 000000000..f27675560
--- /dev/null
+++ b/src/video_core/shader/shader_jit_x64_compiler.h
@@ -0,0 +1,125 @@
1// Copyright 2015 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <cstddef>
9#include <utility>
10#include <vector>
11#include <nihstro/shader_bytecode.h>
12#include <xbyak.h>
13#include "common/bit_set.h"
14#include "common/common_types.h"
15#include "video_core/shader/shader.h"
16
17using nihstro::Instruction;
18using nihstro::OpCode;
19using nihstro::SwizzlePattern;
20
21namespace Pica {
22
23namespace Shader {
24
25/// Memory allocated for each compiled shader (64Kb)
26constexpr size_t MAX_SHADER_SIZE = 1024 * 64;
27
28/**
29 * This class implements the shader JIT compiler. It recompiles a Pica shader program into x86_64
30 * code that can be executed on the host machine directly.
31 */
32class JitShader : public Xbyak::CodeGenerator {
33public:
34 JitShader();
35
36 void Run(const ShaderSetup& setup, UnitState& state, unsigned offset) const {
37 program(&setup, &state, instruction_labels[offset].getAddress());
38 }
39
40 void Compile(const std::array<u32, 1024>* program_code,
41 const std::array<u32, 1024>* swizzle_data);
42
43 void Compile_ADD(Instruction instr);
44 void Compile_DP3(Instruction instr);
45 void Compile_DP4(Instruction instr);
46 void Compile_DPH(Instruction instr);
47 void Compile_EX2(Instruction instr);
48 void Compile_LG2(Instruction instr);
49 void Compile_MUL(Instruction instr);
50 void Compile_SGE(Instruction instr);
51 void Compile_SLT(Instruction instr);
52 void Compile_FLR(Instruction instr);
53 void Compile_MAX(Instruction instr);
54 void Compile_MIN(Instruction instr);
55 void Compile_RCP(Instruction instr);
56 void Compile_RSQ(Instruction instr);
57 void Compile_MOVA(Instruction instr);
58 void Compile_MOV(Instruction instr);
59 void Compile_NOP(Instruction instr);
60 void Compile_END(Instruction instr);
61 void Compile_CALL(Instruction instr);
62 void Compile_CALLC(Instruction instr);
63 void Compile_CALLU(Instruction instr);
64 void Compile_IF(Instruction instr);
65 void Compile_LOOP(Instruction instr);
66 void Compile_JMP(Instruction instr);
67 void Compile_CMP(Instruction instr);
68 void Compile_MAD(Instruction instr);
69
70private:
71 void Compile_Block(unsigned end);
72 void Compile_NextInstr();
73
74 void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg,
75 Xbyak::Xmm dest);
76 void Compile_DestEnable(Instruction instr, Xbyak::Xmm dest);
77
78 /**
79 * Compiles a `MUL src1, src2` operation, properly handling the PICA semantics when multiplying
80 * zero by inf. Clobbers `src2` and `scratch`.
81 */
82 void Compile_SanitizedMul(Xbyak::Xmm src1, Xbyak::Xmm src2, Xbyak::Xmm scratch);
83
84 void Compile_EvaluateCondition(Instruction instr);
85 void Compile_UniformCondition(Instruction instr);
86
87 /**
88 * Emits the code to conditionally return from a subroutine envoked by the `CALL` instruction.
89 */
90 void Compile_Return();
91
92 BitSet32 PersistentCallerSavedRegs();
93
94 /**
95 * Assertion evaluated at compile-time, but only triggered if executed at runtime.
96 * @param condition Condition to be evaluated.
97 * @param msg Message to be logged if the assertion fails.
98 */
99 void Compile_Assert(bool condition, const char* msg);
100
101 /**
102 * Analyzes the entire shader program for `CALL` instructions before emitting any code,
103 * identifying the locations where a return needs to be inserted.
104 */
105 void FindReturnOffsets();
106
107 const std::array<u32, 1024>* program_code = nullptr;
108 const std::array<u32, 1024>* swizzle_data = nullptr;
109
110 /// Mapping of Pica VS instructions to pointers in the emitted code
111 std::array<Xbyak::Label, 1024> instruction_labels;
112
113 /// Offsets in code where a return needs to be inserted
114 std::vector<unsigned> return_offsets;
115
116 unsigned program_counter = 0; ///< Offset of the next instruction to decode
117 bool looping = false; ///< True if compiling a loop, used to check for nested loops
118
119 using CompiledShader = void(const void* setup, void* state, const u8* start_addr);
120 CompiledShader* program = nullptr;
121};
122
123} // Shader
124
125} // Pica
diff --git a/src/video_core/clipper.cpp b/src/video_core/swrasterizer/clipper.cpp
index 05b5cea73..2d80822d9 100644
--- a/src/video_core/clipper.cpp
+++ b/src/video_core/swrasterizer/clipper.cpp
@@ -11,12 +11,13 @@
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "common/logging/log.h" 12#include "common/logging/log.h"
13#include "common/vector_math.h" 13#include "common/vector_math.h"
14#include "video_core/clipper.h"
15#include "video_core/pica.h"
16#include "video_core/pica_state.h" 14#include "video_core/pica_state.h"
17#include "video_core/pica_types.h" 15#include "video_core/pica_types.h"
18#include "video_core/rasterizer.h"
19#include "video_core/shader/shader.h" 16#include "video_core/shader/shader.h"
17#include "video_core/swrasterizer/clipper.h"
18#include "video_core/swrasterizer/rasterizer.h"
19
20using Pica::Rasterizer::Vertex;
20 21
21namespace Pica { 22namespace Pica {
22 23
@@ -29,20 +30,20 @@ public:
29 float24::FromFloat32(0), float24::FromFloat32(0))) 30 float24::FromFloat32(0), float24::FromFloat32(0)))
30 : coeffs(coeffs), bias(bias) {} 31 : coeffs(coeffs), bias(bias) {}
31 32
32 bool IsInside(const OutputVertex& vertex) const { 33 bool IsInside(const Vertex& vertex) const {
33 return Math::Dot(vertex.pos + bias, coeffs) <= float24::FromFloat32(0); 34 return Math::Dot(vertex.pos + bias, coeffs) <= float24::FromFloat32(0);
34 } 35 }
35 36
36 bool IsOutSide(const OutputVertex& vertex) const { 37 bool IsOutSide(const Vertex& vertex) const {
37 return !IsInside(vertex); 38 return !IsInside(vertex);
38 } 39 }
39 40
40 OutputVertex GetIntersection(const OutputVertex& v0, const OutputVertex& v1) const { 41 Vertex GetIntersection(const Vertex& v0, const Vertex& v1) const {
41 float24 dp = Math::Dot(v0.pos + bias, coeffs); 42 float24 dp = Math::Dot(v0.pos + bias, coeffs);
42 float24 dp_prev = Math::Dot(v1.pos + bias, coeffs); 43 float24 dp_prev = Math::Dot(v1.pos + bias, coeffs);
43 float24 factor = dp_prev / (dp_prev - dp); 44 float24 factor = dp_prev / (dp_prev - dp);
44 45
45 return OutputVertex::Lerp(factor, v0, v1); 46 return Vertex::Lerp(factor, v0, v1);
46 } 47 }
47 48
48private: 49private:
@@ -51,7 +52,7 @@ private:
51 Math::Vec4<float24> bias; 52 Math::Vec4<float24> bias;
52}; 53};
53 54
54static void InitScreenCoordinates(OutputVertex& vtx) { 55static void InitScreenCoordinates(Vertex& vtx) {
55 struct { 56 struct {
56 float24 halfsize_x; 57 float24 halfsize_x;
57 float24 offset_x; 58 float24 offset_x;
@@ -62,10 +63,10 @@ static void InitScreenCoordinates(OutputVertex& vtx) {
62 } viewport; 63 } viewport;
63 64
64 const auto& regs = g_state.regs; 65 const auto& regs = g_state.regs;
65 viewport.halfsize_x = float24::FromRaw(regs.viewport_size_x); 66 viewport.halfsize_x = float24::FromRaw(regs.rasterizer.viewport_size_x);
66 viewport.halfsize_y = float24::FromRaw(regs.viewport_size_y); 67 viewport.halfsize_y = float24::FromRaw(regs.rasterizer.viewport_size_y);
67 viewport.offset_x = float24::FromFloat32(static_cast<float>(regs.viewport_corner.x)); 68 viewport.offset_x = float24::FromFloat32(static_cast<float>(regs.rasterizer.viewport_corner.x));
68 viewport.offset_y = float24::FromFloat32(static_cast<float>(regs.viewport_corner.y)); 69 viewport.offset_y = float24::FromFloat32(static_cast<float>(regs.rasterizer.viewport_corner.y));
69 70
70 float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w; 71 float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w;
71 vtx.color *= inv_w; 72 vtx.color *= inv_w;
@@ -91,8 +92,8 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu
91 // introduces at most 1 new vertex to the polygon. Since we start with a triangle and have a 92 // introduces at most 1 new vertex to the polygon. Since we start with a triangle and have a
92 // fixed 6 clipping planes, the maximum number of vertices of the clipped polygon is 3 + 6 = 9. 93 // fixed 6 clipping planes, the maximum number of vertices of the clipped polygon is 3 + 6 = 9.
93 static const size_t MAX_VERTICES = 9; 94 static const size_t MAX_VERTICES = 9;
94 static_vector<OutputVertex, MAX_VERTICES> buffer_a = {v0, v1, v2}; 95 static_vector<Vertex, MAX_VERTICES> buffer_a = {v0, v1, v2};
95 static_vector<OutputVertex, MAX_VERTICES> buffer_b; 96 static_vector<Vertex, MAX_VERTICES> buffer_b;
96 auto* output_list = &buffer_a; 97 auto* output_list = &buffer_a;
97 auto* input_list = &buffer_b; 98 auto* input_list = &buffer_b;
98 99
@@ -123,7 +124,7 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu
123 std::swap(input_list, output_list); 124 std::swap(input_list, output_list);
124 output_list->clear(); 125 output_list->clear();
125 126
126 const OutputVertex* reference_vertex = &input_list->back(); 127 const Vertex* reference_vertex = &input_list->back();
127 128
128 for (const auto& vertex : *input_list) { 129 for (const auto& vertex : *input_list) {
129 // NOTE: This algorithm changes vertex order in some cases! 130 // NOTE: This algorithm changes vertex order in some cases!
@@ -148,9 +149,9 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu
148 InitScreenCoordinates((*output_list)[1]); 149 InitScreenCoordinates((*output_list)[1]);
149 150
150 for (size_t i = 0; i < output_list->size() - 2; i++) { 151 for (size_t i = 0; i < output_list->size() - 2; i++) {
151 OutputVertex& vtx0 = (*output_list)[0]; 152 Vertex& vtx0 = (*output_list)[0];
152 OutputVertex& vtx1 = (*output_list)[i + 1]; 153 Vertex& vtx1 = (*output_list)[i + 1];
153 OutputVertex& vtx2 = (*output_list)[i + 2]; 154 Vertex& vtx2 = (*output_list)[i + 2];
154 155
155 InitScreenCoordinates(vtx2); 156 InitScreenCoordinates(vtx2);
156 157
diff --git a/src/video_core/clipper.h b/src/video_core/swrasterizer/clipper.h
index b51af0af9..b51af0af9 100644
--- a/src/video_core/clipper.h
+++ b/src/video_core/swrasterizer/clipper.h
diff --git a/src/video_core/swrasterizer/framebuffer.cpp b/src/video_core/swrasterizer/framebuffer.cpp
new file mode 100644
index 000000000..7de3aac75
--- /dev/null
+++ b/src/video_core/swrasterizer/framebuffer.cpp
@@ -0,0 +1,358 @@
1// Copyright 2017 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6
7#include "common/assert.h"
8#include "common/color.h"
9#include "common/common_types.h"
10#include "common/logging/log.h"
11#include "common/math_util.h"
12#include "common/vector_math.h"
13#include "core/hw/gpu.h"
14#include "core/memory.h"
15#include "video_core/pica_state.h"
16#include "video_core/regs_framebuffer.h"
17#include "video_core/swrasterizer/framebuffer.h"
18#include "video_core/utils.h"
19
20namespace Pica {
21namespace Rasterizer {
22
23void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
24 const auto& framebuffer = g_state.regs.framebuffer.framebuffer;
25 const PAddr addr = framebuffer.GetColorBufferPhysicalAddress();
26
27 // Similarly to textures, the render framebuffer is laid out from bottom to top, too.
28 // NOTE: The framebuffer height register contains the actual FB height minus one.
29 y = framebuffer.height - y;
30
31 const u32 coarse_y = y & ~7;
32 u32 bytes_per_pixel =
33 GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value()));
34 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) +
35 coarse_y * framebuffer.width * bytes_per_pixel;
36 u8* dst_pixel = Memory::GetPhysicalPointer(addr) + dst_offset;
37
38 switch (framebuffer.color_format) {
39 case FramebufferRegs::ColorFormat::RGBA8:
40 Color::EncodeRGBA8(color, dst_pixel);
41 break;
42
43 case FramebufferRegs::ColorFormat::RGB8:
44 Color::EncodeRGB8(color, dst_pixel);
45 break;
46
47 case FramebufferRegs::ColorFormat::RGB5A1:
48 Color::EncodeRGB5A1(color, dst_pixel);
49 break;
50
51 case FramebufferRegs::ColorFormat::RGB565:
52 Color::EncodeRGB565(color, dst_pixel);
53 break;
54
55 case FramebufferRegs::ColorFormat::RGBA4:
56 Color::EncodeRGBA4(color, dst_pixel);
57 break;
58
59 default:
60 LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x",
61 framebuffer.color_format.Value());
62 UNIMPLEMENTED();
63 }
64}
65
66const Math::Vec4<u8> GetPixel(int x, int y) {
67 const auto& framebuffer = g_state.regs.framebuffer.framebuffer;
68 const PAddr addr = framebuffer.GetColorBufferPhysicalAddress();
69
70 y = framebuffer.height - y;
71
72 const u32 coarse_y = y & ~7;
73 u32 bytes_per_pixel =
74 GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value()));
75 u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) +
76 coarse_y * framebuffer.width * bytes_per_pixel;
77 u8* src_pixel = Memory::GetPhysicalPointer(addr) + src_offset;
78
79 switch (framebuffer.color_format) {
80 case FramebufferRegs::ColorFormat::RGBA8:
81 return Color::DecodeRGBA8(src_pixel);
82
83 case FramebufferRegs::ColorFormat::RGB8:
84 return Color::DecodeRGB8(src_pixel);
85
86 case FramebufferRegs::ColorFormat::RGB5A1:
87 return Color::DecodeRGB5A1(src_pixel);
88
89 case FramebufferRegs::ColorFormat::RGB565:
90 return Color::DecodeRGB565(src_pixel);
91
92 case FramebufferRegs::ColorFormat::RGBA4:
93 return Color::DecodeRGBA4(src_pixel);
94
95 default:
96 LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x",
97 framebuffer.color_format.Value());
98 UNIMPLEMENTED();
99 }
100
101 return {0, 0, 0, 0};
102}
103
104u32 GetDepth(int x, int y) {
105 const auto& framebuffer = g_state.regs.framebuffer.framebuffer;
106 const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
107 u8* depth_buffer = Memory::GetPhysicalPointer(addr);
108
109 y = framebuffer.height - y;
110
111 const u32 coarse_y = y & ~7;
112 u32 bytes_per_pixel = FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format);
113 u32 stride = framebuffer.width * bytes_per_pixel;
114
115 u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
116 u8* src_pixel = depth_buffer + src_offset;
117
118 switch (framebuffer.depth_format) {
119 case FramebufferRegs::DepthFormat::D16:
120 return Color::DecodeD16(src_pixel);
121 case FramebufferRegs::DepthFormat::D24:
122 return Color::DecodeD24(src_pixel);
123 case FramebufferRegs::DepthFormat::D24S8:
124 return Color::DecodeD24S8(src_pixel).x;
125 default:
126 LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
127 UNIMPLEMENTED();
128 return 0;
129 }
130}
131
132u8 GetStencil(int x, int y) {
133 const auto& framebuffer = g_state.regs.framebuffer.framebuffer;
134 const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
135 u8* depth_buffer = Memory::GetPhysicalPointer(addr);
136
137 y = framebuffer.height - y;
138
139 const u32 coarse_y = y & ~7;
140 u32 bytes_per_pixel = Pica::FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format);
141 u32 stride = framebuffer.width * bytes_per_pixel;
142
143 u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
144 u8* src_pixel = depth_buffer + src_offset;
145
146 switch (framebuffer.depth_format) {
147 case FramebufferRegs::DepthFormat::D24S8:
148 return Color::DecodeD24S8(src_pixel).y;
149
150 default:
151 LOG_WARNING(
152 HW_GPU,
153 "GetStencil called for function which doesn't have a stencil component (format %u)",
154 framebuffer.depth_format);
155 return 0;
156 }
157}
158
159void SetDepth(int x, int y, u32 value) {
160 const auto& framebuffer = g_state.regs.framebuffer.framebuffer;
161 const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
162 u8* depth_buffer = Memory::GetPhysicalPointer(addr);
163
164 y = framebuffer.height - y;
165
166 const u32 coarse_y = y & ~7;
167 u32 bytes_per_pixel = FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format);
168 u32 stride = framebuffer.width * bytes_per_pixel;
169
170 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
171 u8* dst_pixel = depth_buffer + dst_offset;
172
173 switch (framebuffer.depth_format) {
174 case FramebufferRegs::DepthFormat::D16:
175 Color::EncodeD16(value, dst_pixel);
176 break;
177
178 case FramebufferRegs::DepthFormat::D24:
179 Color::EncodeD24(value, dst_pixel);
180 break;
181
182 case FramebufferRegs::DepthFormat::D24S8:
183 Color::EncodeD24X8(value, dst_pixel);
184 break;
185
186 default:
187 LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
188 UNIMPLEMENTED();
189 break;
190 }
191}
192
193void SetStencil(int x, int y, u8 value) {
194 const auto& framebuffer = g_state.regs.framebuffer.framebuffer;
195 const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
196 u8* depth_buffer = Memory::GetPhysicalPointer(addr);
197
198 y = framebuffer.height - y;
199
200 const u32 coarse_y = y & ~7;
201 u32 bytes_per_pixel = Pica::FramebufferRegs::BytesPerDepthPixel(framebuffer.depth_format);
202 u32 stride = framebuffer.width * bytes_per_pixel;
203
204 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
205 u8* dst_pixel = depth_buffer + dst_offset;
206
207 switch (framebuffer.depth_format) {
208 case Pica::FramebufferRegs::DepthFormat::D16:
209 case Pica::FramebufferRegs::DepthFormat::D24:
210 // Nothing to do
211 break;
212
213 case Pica::FramebufferRegs::DepthFormat::D24S8:
214 Color::EncodeX24S8(value, dst_pixel);
215 break;
216
217 default:
218 LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
219 UNIMPLEMENTED();
220 break;
221 }
222}
223
224u8 PerformStencilAction(FramebufferRegs::StencilAction action, u8 old_stencil, u8 ref) {
225 switch (action) {
226 case FramebufferRegs::StencilAction::Keep:
227 return old_stencil;
228
229 case FramebufferRegs::StencilAction::Zero:
230 return 0;
231
232 case FramebufferRegs::StencilAction::Replace:
233 return ref;
234
235 case FramebufferRegs::StencilAction::Increment:
236 // Saturated increment
237 return std::min<u8>(old_stencil, 254) + 1;
238
239 case FramebufferRegs::StencilAction::Decrement:
240 // Saturated decrement
241 return std::max<u8>(old_stencil, 1) - 1;
242
243 case FramebufferRegs::StencilAction::Invert:
244 return ~old_stencil;
245
246 case FramebufferRegs::StencilAction::IncrementWrap:
247 return old_stencil + 1;
248
249 case FramebufferRegs::StencilAction::DecrementWrap:
250 return old_stencil - 1;
251
252 default:
253 LOG_CRITICAL(HW_GPU, "Unknown stencil action %x", (int)action);
254 UNIMPLEMENTED();
255 return 0;
256 }
257}
258
259Math::Vec4<u8> EvaluateBlendEquation(const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor,
260 const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor,
261 FramebufferRegs::BlendEquation equation) {
262 Math::Vec4<int> result;
263
264 auto src_result = (src * srcfactor).Cast<int>();
265 auto dst_result = (dest * destfactor).Cast<int>();
266
267 switch (equation) {
268 case FramebufferRegs::BlendEquation::Add:
269 result = (src_result + dst_result) / 255;
270 break;
271
272 case FramebufferRegs::BlendEquation::Subtract:
273 result = (src_result - dst_result) / 255;
274 break;
275
276 case FramebufferRegs::BlendEquation::ReverseSubtract:
277 result = (dst_result - src_result) / 255;
278 break;
279
280 // TODO: How do these two actually work? OpenGL doesn't include the blend factors in the
281 // min/max computations, but is this what the 3DS actually does?
282 case FramebufferRegs::BlendEquation::Min:
283 result.r() = std::min(src.r(), dest.r());
284 result.g() = std::min(src.g(), dest.g());
285 result.b() = std::min(src.b(), dest.b());
286 result.a() = std::min(src.a(), dest.a());
287 break;
288
289 case FramebufferRegs::BlendEquation::Max:
290 result.r() = std::max(src.r(), dest.r());
291 result.g() = std::max(src.g(), dest.g());
292 result.b() = std::max(src.b(), dest.b());
293 result.a() = std::max(src.a(), dest.a());
294 break;
295
296 default:
297 LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation %x", equation);
298 UNIMPLEMENTED();
299 }
300
301 return Math::Vec4<u8>(MathUtil::Clamp(result.r(), 0, 255), MathUtil::Clamp(result.g(), 0, 255),
302 MathUtil::Clamp(result.b(), 0, 255), MathUtil::Clamp(result.a(), 0, 255));
303};
304
305u8 LogicOp(u8 src, u8 dest, FramebufferRegs::LogicOp op) {
306 switch (op) {
307 case FramebufferRegs::LogicOp::Clear:
308 return 0;
309
310 case FramebufferRegs::LogicOp::And:
311 return src & dest;
312
313 case FramebufferRegs::LogicOp::AndReverse:
314 return src & ~dest;
315
316 case FramebufferRegs::LogicOp::Copy:
317 return src;
318
319 case FramebufferRegs::LogicOp::Set:
320 return 255;
321
322 case FramebufferRegs::LogicOp::CopyInverted:
323 return ~src;
324
325 case FramebufferRegs::LogicOp::NoOp:
326 return dest;
327
328 case FramebufferRegs::LogicOp::Invert:
329 return ~dest;
330
331 case FramebufferRegs::LogicOp::Nand:
332 return ~(src & dest);
333
334 case FramebufferRegs::LogicOp::Or:
335 return src | dest;
336
337 case FramebufferRegs::LogicOp::Nor:
338 return ~(src | dest);
339
340 case FramebufferRegs::LogicOp::Xor:
341 return src ^ dest;
342
343 case FramebufferRegs::LogicOp::Equiv:
344 return ~(src ^ dest);
345
346 case FramebufferRegs::LogicOp::AndInverted:
347 return ~src & dest;
348
349 case FramebufferRegs::LogicOp::OrReverse:
350 return src | ~dest;
351
352 case FramebufferRegs::LogicOp::OrInverted:
353 return ~src | dest;
354 }
355};
356
357} // namespace Rasterizer
358} // namespace Pica
diff --git a/src/video_core/swrasterizer/framebuffer.h b/src/video_core/swrasterizer/framebuffer.h
new file mode 100644
index 000000000..4a32a4979
--- /dev/null
+++ b/src/video_core/swrasterizer/framebuffer.h
@@ -0,0 +1,29 @@
1// Copyright 2017 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "common/vector_math.h"
9#include "video_core/regs_framebuffer.h"
10
11namespace Pica {
12namespace Rasterizer {
13
14void DrawPixel(int x, int y, const Math::Vec4<u8>& color);
15const Math::Vec4<u8> GetPixel(int x, int y);
16u32 GetDepth(int x, int y);
17u8 GetStencil(int x, int y);
18void SetDepth(int x, int y, u32 value);
19void SetStencil(int x, int y, u8 value);
20u8 PerformStencilAction(FramebufferRegs::StencilAction action, u8 old_stencil, u8 ref);
21
22Math::Vec4<u8> EvaluateBlendEquation(const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor,
23 const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor,
24 FramebufferRegs::BlendEquation equation);
25
26u8 LogicOp(u8 src, u8 dest, FramebufferRegs::LogicOp op);
27
28} // namespace Rasterizer
29} // namespace Pica
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp
index b9f5d4533..7557fcb89 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/swrasterizer/rasterizer.cpp
@@ -16,253 +16,21 @@
16#include "core/hw/gpu.h" 16#include "core/hw/gpu.h"
17#include "core/memory.h" 17#include "core/memory.h"
18#include "video_core/debug_utils/debug_utils.h" 18#include "video_core/debug_utils/debug_utils.h"
19#include "video_core/pica.h"
20#include "video_core/pica_state.h" 19#include "video_core/pica_state.h"
21#include "video_core/pica_types.h" 20#include "video_core/pica_types.h"
22#include "video_core/rasterizer.h" 21#include "video_core/regs_framebuffer.h"
22#include "video_core/regs_rasterizer.h"
23#include "video_core/regs_texturing.h"
23#include "video_core/shader/shader.h" 24#include "video_core/shader/shader.h"
25#include "video_core/swrasterizer/framebuffer.h"
26#include "video_core/swrasterizer/rasterizer.h"
27#include "video_core/swrasterizer/texturing.h"
28#include "video_core/texture/texture_decode.h"
24#include "video_core/utils.h" 29#include "video_core/utils.h"
25 30
26namespace Pica { 31namespace Pica {
27
28namespace Rasterizer { 32namespace Rasterizer {
29 33
30static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) {
31 const auto& framebuffer = g_state.regs.framebuffer;
32 const PAddr addr = framebuffer.GetColorBufferPhysicalAddress();
33
34 // Similarly to textures, the render framebuffer is laid out from bottom to top, too.
35 // NOTE: The framebuffer height register contains the actual FB height minus one.
36 y = framebuffer.height - y;
37
38 const u32 coarse_y = y & ~7;
39 u32 bytes_per_pixel =
40 GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value()));
41 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) +
42 coarse_y * framebuffer.width * bytes_per_pixel;
43 u8* dst_pixel = Memory::GetPhysicalPointer(addr) + dst_offset;
44
45 switch (framebuffer.color_format) {
46 case Regs::ColorFormat::RGBA8:
47 Color::EncodeRGBA8(color, dst_pixel);
48 break;
49
50 case Regs::ColorFormat::RGB8:
51 Color::EncodeRGB8(color, dst_pixel);
52 break;
53
54 case Regs::ColorFormat::RGB5A1:
55 Color::EncodeRGB5A1(color, dst_pixel);
56 break;
57
58 case Regs::ColorFormat::RGB565:
59 Color::EncodeRGB565(color, dst_pixel);
60 break;
61
62 case Regs::ColorFormat::RGBA4:
63 Color::EncodeRGBA4(color, dst_pixel);
64 break;
65
66 default:
67 LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x",
68 framebuffer.color_format.Value());
69 UNIMPLEMENTED();
70 }
71}
72
73static const Math::Vec4<u8> GetPixel(int x, int y) {
74 const auto& framebuffer = g_state.regs.framebuffer;
75 const PAddr addr = framebuffer.GetColorBufferPhysicalAddress();
76
77 y = framebuffer.height - y;
78
79 const u32 coarse_y = y & ~7;
80 u32 bytes_per_pixel =
81 GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value()));
82 u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) +
83 coarse_y * framebuffer.width * bytes_per_pixel;
84 u8* src_pixel = Memory::GetPhysicalPointer(addr) + src_offset;
85
86 switch (framebuffer.color_format) {
87 case Regs::ColorFormat::RGBA8:
88 return Color::DecodeRGBA8(src_pixel);
89
90 case Regs::ColorFormat::RGB8:
91 return Color::DecodeRGB8(src_pixel);
92
93 case Regs::ColorFormat::RGB5A1:
94 return Color::DecodeRGB5A1(src_pixel);
95
96 case Regs::ColorFormat::RGB565:
97 return Color::DecodeRGB565(src_pixel);
98
99 case Regs::ColorFormat::RGBA4:
100 return Color::DecodeRGBA4(src_pixel);
101
102 default:
103 LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x",
104 framebuffer.color_format.Value());
105 UNIMPLEMENTED();
106 }
107
108 return {0, 0, 0, 0};
109}
110
111static u32 GetDepth(int x, int y) {
112 const auto& framebuffer = g_state.regs.framebuffer;
113 const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
114 u8* depth_buffer = Memory::GetPhysicalPointer(addr);
115
116 y = framebuffer.height - y;
117
118 const u32 coarse_y = y & ~7;
119 u32 bytes_per_pixel = Regs::BytesPerDepthPixel(framebuffer.depth_format);
120 u32 stride = framebuffer.width * bytes_per_pixel;
121
122 u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
123 u8* src_pixel = depth_buffer + src_offset;
124
125 switch (framebuffer.depth_format) {
126 case Regs::DepthFormat::D16:
127 return Color::DecodeD16(src_pixel);
128 case Regs::DepthFormat::D24:
129 return Color::DecodeD24(src_pixel);
130 case Regs::DepthFormat::D24S8:
131 return Color::DecodeD24S8(src_pixel).x;
132 default:
133 LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
134 UNIMPLEMENTED();
135 return 0;
136 }
137}
138
139static u8 GetStencil(int x, int y) {
140 const auto& framebuffer = g_state.regs.framebuffer;
141 const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
142 u8* depth_buffer = Memory::GetPhysicalPointer(addr);
143
144 y = framebuffer.height - y;
145
146 const u32 coarse_y = y & ~7;
147 u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(framebuffer.depth_format);
148 u32 stride = framebuffer.width * bytes_per_pixel;
149
150 u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
151 u8* src_pixel = depth_buffer + src_offset;
152
153 switch (framebuffer.depth_format) {
154 case Regs::DepthFormat::D24S8:
155 return Color::DecodeD24S8(src_pixel).y;
156
157 default:
158 LOG_WARNING(
159 HW_GPU,
160 "GetStencil called for function which doesn't have a stencil component (format %u)",
161 framebuffer.depth_format);
162 return 0;
163 }
164}
165
166static void SetDepth(int x, int y, u32 value) {
167 const auto& framebuffer = g_state.regs.framebuffer;
168 const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
169 u8* depth_buffer = Memory::GetPhysicalPointer(addr);
170
171 y = framebuffer.height - y;
172
173 const u32 coarse_y = y & ~7;
174 u32 bytes_per_pixel = Regs::BytesPerDepthPixel(framebuffer.depth_format);
175 u32 stride = framebuffer.width * bytes_per_pixel;
176
177 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
178 u8* dst_pixel = depth_buffer + dst_offset;
179
180 switch (framebuffer.depth_format) {
181 case Regs::DepthFormat::D16:
182 Color::EncodeD16(value, dst_pixel);
183 break;
184
185 case Regs::DepthFormat::D24:
186 Color::EncodeD24(value, dst_pixel);
187 break;
188
189 case Regs::DepthFormat::D24S8:
190 Color::EncodeD24X8(value, dst_pixel);
191 break;
192
193 default:
194 LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
195 UNIMPLEMENTED();
196 break;
197 }
198}
199
200static void SetStencil(int x, int y, u8 value) {
201 const auto& framebuffer = g_state.regs.framebuffer;
202 const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
203 u8* depth_buffer = Memory::GetPhysicalPointer(addr);
204
205 y = framebuffer.height - y;
206
207 const u32 coarse_y = y & ~7;
208 u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(framebuffer.depth_format);
209 u32 stride = framebuffer.width * bytes_per_pixel;
210
211 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
212 u8* dst_pixel = depth_buffer + dst_offset;
213
214 switch (framebuffer.depth_format) {
215 case Pica::Regs::DepthFormat::D16:
216 case Pica::Regs::DepthFormat::D24:
217 // Nothing to do
218 break;
219
220 case Pica::Regs::DepthFormat::D24S8:
221 Color::EncodeX24S8(value, dst_pixel);
222 break;
223
224 default:
225 LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
226 UNIMPLEMENTED();
227 break;
228 }
229}
230
231static u8 PerformStencilAction(Regs::StencilAction action, u8 old_stencil, u8 ref) {
232 switch (action) {
233 case Regs::StencilAction::Keep:
234 return old_stencil;
235
236 case Regs::StencilAction::Zero:
237 return 0;
238
239 case Regs::StencilAction::Replace:
240 return ref;
241
242 case Regs::StencilAction::Increment:
243 // Saturated increment
244 return std::min<u8>(old_stencil, 254) + 1;
245
246 case Regs::StencilAction::Decrement:
247 // Saturated decrement
248 return std::max<u8>(old_stencil, 1) - 1;
249
250 case Regs::StencilAction::Invert:
251 return ~old_stencil;
252
253 case Regs::StencilAction::IncrementWrap:
254 return old_stencil + 1;
255
256 case Regs::StencilAction::DecrementWrap:
257 return old_stencil - 1;
258
259 default:
260 LOG_CRITICAL(HW_GPU, "Unknown stencil action %x", (int)action);
261 UNIMPLEMENTED();
262 return 0;
263 }
264}
265
266// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values 34// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
267struct Fix12P4 { 35struct Fix12P4 {
268 Fix12P4() {} 36 Fix12P4() {}
@@ -307,8 +75,8 @@ MICROPROFILE_DEFINE(GPU_Rasterization, "GPU", "Rasterization", MP_RGB(50, 50, 24
307 * Helper function for ProcessTriangle with the "reversed" flag to allow for implementing 75 * Helper function for ProcessTriangle with the "reversed" flag to allow for implementing
308 * culling via recursion. 76 * culling via recursion.
309 */ 77 */
310static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader::OutputVertex& v1, 78static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Vertex& v2,
311 const Shader::OutputVertex& v2, bool reversed = false) { 79 bool reversed = false) {
312 const auto& regs = g_state.regs; 80 const auto& regs = g_state.regs;
313 MICROPROFILE_SCOPE(GPU_Rasterization); 81 MICROPROFILE_SCOPE(GPU_Rasterization);
314 82
@@ -326,14 +94,14 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader
326 ScreenToRasterizerCoordinates(v1.screenpos), 94 ScreenToRasterizerCoordinates(v1.screenpos),
327 ScreenToRasterizerCoordinates(v2.screenpos)}; 95 ScreenToRasterizerCoordinates(v2.screenpos)};
328 96
329 if (regs.cull_mode == Regs::CullMode::KeepAll) { 97 if (regs.rasterizer.cull_mode == RasterizerRegs::CullMode::KeepAll) {
330 // Make sure we always end up with a triangle wound counter-clockwise 98 // Make sure we always end up with a triangle wound counter-clockwise
331 if (!reversed && SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0) { 99 if (!reversed && SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0) {
332 ProcessTriangleInternal(v0, v2, v1, true); 100 ProcessTriangleInternal(v0, v2, v1, true);
333 return; 101 return;
334 } 102 }
335 } else { 103 } else {
336 if (!reversed && regs.cull_mode == Regs::CullMode::KeepClockWise) { 104 if (!reversed && regs.rasterizer.cull_mode == RasterizerRegs::CullMode::KeepClockWise) {
337 // Reverse vertex order and use the CCW code path. 105 // Reverse vertex order and use the CCW code path.
338 ProcessTriangleInternal(v0, v2, v1, true); 106 ProcessTriangleInternal(v0, v2, v1, true);
339 return; 107 return;
@@ -350,13 +118,13 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader
350 u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y}); 118 u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y});
351 119
352 // Convert the scissor box coordinates to 12.4 fixed point 120 // Convert the scissor box coordinates to 12.4 fixed point
353 u16 scissor_x1 = (u16)(regs.scissor_test.x1 << 4); 121 u16 scissor_x1 = (u16)(regs.rasterizer.scissor_test.x1 << 4);
354 u16 scissor_y1 = (u16)(regs.scissor_test.y1 << 4); 122 u16 scissor_y1 = (u16)(regs.rasterizer.scissor_test.y1 << 4);
355 // x2,y2 have +1 added to cover the entire sub-pixel area 123 // x2,y2 have +1 added to cover the entire sub-pixel area
356 u16 scissor_x2 = (u16)((regs.scissor_test.x2 + 1) << 4); 124 u16 scissor_x2 = (u16)((regs.rasterizer.scissor_test.x2 + 1) << 4);
357 u16 scissor_y2 = (u16)((regs.scissor_test.y2 + 1) << 4); 125 u16 scissor_y2 = (u16)((regs.rasterizer.scissor_test.y2 + 1) << 4);
358 126
359 if (regs.scissor_test.mode == Regs::ScissorMode::Include) { 127 if (regs.rasterizer.scissor_test.mode == RasterizerRegs::ScissorMode::Include) {
360 // Calculate the new bounds 128 // Calculate the new bounds
361 min_x = std::max(min_x, scissor_x1); 129 min_x = std::max(min_x, scissor_x1);
362 min_y = std::max(min_y, scissor_y1); 130 min_y = std::max(min_y, scissor_y1);
@@ -396,12 +164,13 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader
396 164
397 auto w_inverse = Math::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w); 165 auto w_inverse = Math::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w);
398 166
399 auto textures = regs.GetTextures(); 167 auto textures = regs.texturing.GetTextures();
400 auto tev_stages = regs.GetTevStages(); 168 auto tev_stages = regs.texturing.GetTevStages();
401 169
402 bool stencil_action_enable = g_state.regs.output_merger.stencil_test.enable && 170 bool stencil_action_enable =
403 g_state.regs.framebuffer.depth_format == Regs::DepthFormat::D24S8; 171 g_state.regs.framebuffer.output_merger.stencil_test.enable &&
404 const auto stencil_test = g_state.regs.output_merger.stencil_test; 172 g_state.regs.framebuffer.framebuffer.depth_format == FramebufferRegs::DepthFormat::D24S8;
173 const auto stencil_test = g_state.regs.framebuffer.output_merger.stencil_test;
405 174
406 // Enter rasterization loop, starting at the center of the topleft bounding box corner. 175 // Enter rasterization loop, starting at the center of the topleft bounding box corner.
407 // TODO: Not sure if looping through x first might be faster 176 // TODO: Not sure if looping through x first might be faster
@@ -410,7 +179,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader
410 179
411 // Do not process the pixel if it's inside the scissor box and the scissor mode is set 180 // Do not process the pixel if it's inside the scissor box and the scissor mode is set
412 // to Exclude 181 // to Exclude
413 if (regs.scissor_test.mode == Regs::ScissorMode::Exclude) { 182 if (regs.rasterizer.scissor_test.mode == RasterizerRegs::ScissorMode::Exclude) {
414 if (x >= scissor_x1 && x < scissor_x2 && y >= scissor_y1 && y < scissor_y2) 183 if (x >= scissor_x1 && x < scissor_x2 && y >= scissor_y1 && y < scissor_y2)
415 continue; 184 continue;
416 } 185 }
@@ -440,12 +209,14 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader
440 209
441 // Not fully accurate. About 3 bits in precision are missing. 210 // Not fully accurate. About 3 bits in precision are missing.
442 // Z-Buffer (z / w * scale + offset) 211 // Z-Buffer (z / w * scale + offset)
443 float depth_scale = float24::FromRaw(regs.viewport_depth_range).ToFloat32(); 212 float depth_scale = float24::FromRaw(regs.rasterizer.viewport_depth_range).ToFloat32();
444 float depth_offset = float24::FromRaw(regs.viewport_depth_near_plane).ToFloat32(); 213 float depth_offset =
214 float24::FromRaw(regs.rasterizer.viewport_depth_near_plane).ToFloat32();
445 float depth = interpolated_z_over_w * depth_scale + depth_offset; 215 float depth = interpolated_z_over_w * depth_scale + depth_offset;
446 216
447 // Potentially switch to W-Buffer 217 // Potentially switch to W-Buffer
448 if (regs.depthmap_enable == Pica::Regs::DepthBuffering::WBuffering) { 218 if (regs.rasterizer.depthmap_enable ==
219 Pica::RasterizerRegs::DepthBuffering::WBuffering) {
449 // W-Buffer (z * scale + w * offset = (z / w * scale + offset) * w) 220 // W-Buffer (z * scale + w * offset = (z / w * scale + offset) * w)
450 depth *= interpolated_w_inverse.ToFloat32() * wsum; 221 depth *= interpolated_w_inverse.ToFloat32() * wsum;
451 } 222 }
@@ -512,9 +283,9 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader
512 // TODO: Refactor so cubemaps and shadowmaps can be handled 283 // TODO: Refactor so cubemaps and shadowmaps can be handled
513 if (i == 0) { 284 if (i == 0) {
514 switch (texture.config.type) { 285 switch (texture.config.type) {
515 case Regs::TextureConfig::Texture2D: 286 case TexturingRegs::TextureConfig::Texture2D:
516 break; 287 break;
517 case Regs::TextureConfig::Projection2D: { 288 case TexturingRegs::TextureConfig::Projection2D: {
518 auto tc0_w = GetInterpolatedAttribute(v0.tc0_w, v1.tc0_w, v2.tc0_w); 289 auto tc0_w = GetInterpolatedAttribute(v0.tc0_w, v1.tc0_w, v2.tc0_w);
519 u /= tc0_w; 290 u /= tc0_w;
520 v /= tc0_w; 291 v /= tc0_w;
@@ -533,37 +304,9 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader
533 int t = (int)(v * float24::FromFloat32(static_cast<float>(texture.config.height))) 304 int t = (int)(v * float24::FromFloat32(static_cast<float>(texture.config.height)))
534 .ToFloat32(); 305 .ToFloat32();
535 306
536 static auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val, 307 if ((texture.config.wrap_s == TexturingRegs::TextureConfig::ClampToBorder &&
537 unsigned size) {
538 switch (mode) {
539 case Regs::TextureConfig::ClampToEdge:
540 val = std::max(val, 0);
541 val = std::min(val, (int)size - 1);
542 return val;
543
544 case Regs::TextureConfig::ClampToBorder:
545 return val;
546
547 case Regs::TextureConfig::Repeat:
548 return (int)((unsigned)val % size);
549
550 case Regs::TextureConfig::MirroredRepeat: {
551 unsigned int coord = ((unsigned)val % (2 * size));
552 if (coord >= size)
553 coord = 2 * size - 1 - coord;
554 return (int)coord;
555 }
556
557 default:
558 LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x", (int)mode);
559 UNIMPLEMENTED();
560 return 0;
561 }
562 };
563
564 if ((texture.config.wrap_s == Regs::TextureConfig::ClampToBorder &&
565 (s < 0 || static_cast<u32>(s) >= texture.config.width)) || 308 (s < 0 || static_cast<u32>(s) >= texture.config.width)) ||
566 (texture.config.wrap_t == Regs::TextureConfig::ClampToBorder && 309 (texture.config.wrap_t == TexturingRegs::TextureConfig::ClampToBorder &&
567 (t < 0 || static_cast<u32>(t) >= texture.config.height))) { 310 (t < 0 || static_cast<u32>(t) >= texture.config.height))) {
568 auto border_color = texture.config.border_color; 311 auto border_color = texture.config.border_color;
569 texture_color[i] = {border_color.r, border_color.g, border_color.b, 312 texture_color[i] = {border_color.r, border_color.g, border_color.b,
@@ -579,10 +322,10 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader
579 u8* texture_data = 322 u8* texture_data =
580 Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress()); 323 Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress());
581 auto info = 324 auto info =
582 DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format); 325 Texture::TextureInfo::FromPicaRegister(texture.config, texture.format);
583 326
584 // TODO: Apply the min and mag filters to the texture 327 // TODO: Apply the min and mag filters to the texture
585 texture_color[i] = DebugUtils::LookupTexture(texture_data, s, t, info); 328 texture_color[i] = Texture::LookupTexture(texture_data, s, t, info);
586#if PICA_DUMP_TEXTURES 329#if PICA_DUMP_TEXTURES
587 DebugUtils::DumpTexture(texture.config, texture_data); 330 DebugUtils::DumpTexture(texture.config, texture_data);
588#endif 331#endif
@@ -599,17 +342,16 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader
599 Math::Vec4<u8> combiner_output; 342 Math::Vec4<u8> combiner_output;
600 Math::Vec4<u8> combiner_buffer = {0, 0, 0, 0}; 343 Math::Vec4<u8> combiner_buffer = {0, 0, 0, 0};
601 Math::Vec4<u8> next_combiner_buffer = { 344 Math::Vec4<u8> next_combiner_buffer = {
602 regs.tev_combiner_buffer_color.r, regs.tev_combiner_buffer_color.g, 345 regs.texturing.tev_combiner_buffer_color.r,
603 regs.tev_combiner_buffer_color.b, regs.tev_combiner_buffer_color.a, 346 regs.texturing.tev_combiner_buffer_color.g,
347 regs.texturing.tev_combiner_buffer_color.b,
348 regs.texturing.tev_combiner_buffer_color.a,
604 }; 349 };
605 350
606 for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); 351 for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size();
607 ++tev_stage_index) { 352 ++tev_stage_index) {
608 const auto& tev_stage = tev_stages[tev_stage_index]; 353 const auto& tev_stage = tev_stages[tev_stage_index];
609 using Source = Regs::TevStageConfig::Source; 354 using Source = TexturingRegs::TevStageConfig::Source;
610 using ColorModifier = Regs::TevStageConfig::ColorModifier;
611 using AlphaModifier = Regs::TevStageConfig::AlphaModifier;
612 using Operation = Regs::TevStageConfig::Operation;
613 355
614 auto GetSource = [&](Source source) -> Math::Vec4<u8> { 356 auto GetSource = [&](Source source) -> Math::Vec4<u8> {
615 switch (source) { 357 switch (source) {
@@ -649,187 +391,6 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader
649 } 391 }
650 }; 392 };
651 393
652 static auto GetColorModifier = [](ColorModifier factor,
653 const Math::Vec4<u8>& values) -> Math::Vec3<u8> {
654 switch (factor) {
655 case ColorModifier::SourceColor:
656 return values.rgb();
657
658 case ColorModifier::OneMinusSourceColor:
659 return (Math::Vec3<u8>(255, 255, 255) - values.rgb()).Cast<u8>();
660
661 case ColorModifier::SourceAlpha:
662 return values.aaa();
663
664 case ColorModifier::OneMinusSourceAlpha:
665 return (Math::Vec3<u8>(255, 255, 255) - values.aaa()).Cast<u8>();
666
667 case ColorModifier::SourceRed:
668 return values.rrr();
669
670 case ColorModifier::OneMinusSourceRed:
671 return (Math::Vec3<u8>(255, 255, 255) - values.rrr()).Cast<u8>();
672
673 case ColorModifier::SourceGreen:
674 return values.ggg();
675
676 case ColorModifier::OneMinusSourceGreen:
677 return (Math::Vec3<u8>(255, 255, 255) - values.ggg()).Cast<u8>();
678
679 case ColorModifier::SourceBlue:
680 return values.bbb();
681
682 case ColorModifier::OneMinusSourceBlue:
683 return (Math::Vec3<u8>(255, 255, 255) - values.bbb()).Cast<u8>();
684 }
685 };
686
687 static auto GetAlphaModifier = [](AlphaModifier factor,
688 const Math::Vec4<u8>& values) -> u8 {
689 switch (factor) {
690 case AlphaModifier::SourceAlpha:
691 return values.a();
692
693 case AlphaModifier::OneMinusSourceAlpha:
694 return 255 - values.a();
695
696 case AlphaModifier::SourceRed:
697 return values.r();
698
699 case AlphaModifier::OneMinusSourceRed:
700 return 255 - values.r();
701
702 case AlphaModifier::SourceGreen:
703 return values.g();
704
705 case AlphaModifier::OneMinusSourceGreen:
706 return 255 - values.g();
707
708 case AlphaModifier::SourceBlue:
709 return values.b();
710
711 case AlphaModifier::OneMinusSourceBlue:
712 return 255 - values.b();
713 }
714 };
715
716 static auto ColorCombine = [](Operation op,
717 const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> {
718 switch (op) {
719 case Operation::Replace:
720 return input[0];
721
722 case Operation::Modulate:
723 return ((input[0] * input[1]) / 255).Cast<u8>();
724
725 case Operation::Add: {
726 auto result = input[0] + input[1];
727 result.r() = std::min(255, result.r());
728 result.g() = std::min(255, result.g());
729 result.b() = std::min(255, result.b());
730 return result.Cast<u8>();
731 }
732
733 case Operation::AddSigned: {
734 // TODO(bunnei): Verify that the color conversion from (float) 0.5f to
735 // (byte) 128 is correct
736 auto result = input[0].Cast<int>() + input[1].Cast<int>() -
737 Math::MakeVec<int>(128, 128, 128);
738 result.r() = MathUtil::Clamp<int>(result.r(), 0, 255);
739 result.g() = MathUtil::Clamp<int>(result.g(), 0, 255);
740 result.b() = MathUtil::Clamp<int>(result.b(), 0, 255);
741 return result.Cast<u8>();
742 }
743
744 case Operation::Lerp:
745 return ((input[0] * input[2] +
746 input[1] *
747 (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) /
748 255)
749 .Cast<u8>();
750
751 case Operation::Subtract: {
752 auto result = input[0].Cast<int>() - input[1].Cast<int>();
753 result.r() = std::max(0, result.r());
754 result.g() = std::max(0, result.g());
755 result.b() = std::max(0, result.b());
756 return result.Cast<u8>();
757 }
758
759 case Operation::MultiplyThenAdd: {
760 auto result = (input[0] * input[1] + 255 * input[2].Cast<int>()) / 255;
761 result.r() = std::min(255, result.r());
762 result.g() = std::min(255, result.g());
763 result.b() = std::min(255, result.b());
764 return result.Cast<u8>();
765 }
766
767 case Operation::AddThenMultiply: {
768 auto result = input[0] + input[1];
769 result.r() = std::min(255, result.r());
770 result.g() = std::min(255, result.g());
771 result.b() = std::min(255, result.b());
772 result = (result * input[2].Cast<int>()) / 255;
773 return result.Cast<u8>();
774 }
775 case Operation::Dot3_RGB: {
776 // Not fully accurate.
777 // Worst case scenario seems to yield a +/-3 error
778 // Some HW results indicate that the per-component computation can't have a
779 // higher precision than 1/256,
780 // while dot3_rgb( (0x80,g0,b0),(0x7F,g1,b1) ) and dot3_rgb(
781 // (0x80,g0,b0),(0x80,g1,b1) ) give different results
782 int result =
783 ((input[0].r() * 2 - 255) * (input[1].r() * 2 - 255) + 128) / 256 +
784 ((input[0].g() * 2 - 255) * (input[1].g() * 2 - 255) + 128) / 256 +
785 ((input[0].b() * 2 - 255) * (input[1].b() * 2 - 255) + 128) / 256;
786 result = std::max(0, std::min(255, result));
787 return {(u8)result, (u8)result, (u8)result};
788 }
789 default:
790 LOG_ERROR(HW_GPU, "Unknown color combiner operation %d", (int)op);
791 UNIMPLEMENTED();
792 return {0, 0, 0};
793 }
794 };
795
796 static auto AlphaCombine = [](Operation op, const std::array<u8, 3>& input) -> u8 {
797 switch (op) {
798 case Operation::Replace:
799 return input[0];
800
801 case Operation::Modulate:
802 return input[0] * input[1] / 255;
803
804 case Operation::Add:
805 return std::min(255, input[0] + input[1]);
806
807 case Operation::AddSigned: {
808 // TODO(bunnei): Verify that the color conversion from (float) 0.5f to
809 // (byte) 128 is correct
810 auto result = static_cast<int>(input[0]) + static_cast<int>(input[1]) - 128;
811 return static_cast<u8>(MathUtil::Clamp<int>(result, 0, 255));
812 }
813
814 case Operation::Lerp:
815 return (input[0] * input[2] + input[1] * (255 - input[2])) / 255;
816
817 case Operation::Subtract:
818 return std::max(0, (int)input[0] - (int)input[1]);
819
820 case Operation::MultiplyThenAdd:
821 return std::min(255, (input[0] * input[1] + 255 * input[2]) / 255);
822
823 case Operation::AddThenMultiply:
824 return (std::min(255, (input[0] + input[1])) * input[2]) / 255;
825
826 default:
827 LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d", (int)op);
828 UNIMPLEMENTED();
829 return 0;
830 }
831 };
832
833 // color combiner 394 // color combiner
834 // NOTE: Not sure if the alpha combiner might use the color output of the previous 395 // NOTE: Not sure if the alpha combiner might use the color output of the previous
835 // stage as input. Hence, we currently don't directly write the result to 396 // stage as input. Hence, we currently don't directly write the result to
@@ -861,54 +422,54 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader
861 422
862 combiner_buffer = next_combiner_buffer; 423 combiner_buffer = next_combiner_buffer;
863 424
864 if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor( 425 if (regs.texturing.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(
865 tev_stage_index)) { 426 tev_stage_index)) {
866 next_combiner_buffer.r() = combiner_output.r(); 427 next_combiner_buffer.r() = combiner_output.r();
867 next_combiner_buffer.g() = combiner_output.g(); 428 next_combiner_buffer.g() = combiner_output.g();
868 next_combiner_buffer.b() = combiner_output.b(); 429 next_combiner_buffer.b() = combiner_output.b();
869 } 430 }
870 431
871 if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha( 432 if (regs.texturing.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(
872 tev_stage_index)) { 433 tev_stage_index)) {
873 next_combiner_buffer.a() = combiner_output.a(); 434 next_combiner_buffer.a() = combiner_output.a();
874 } 435 }
875 } 436 }
876 437
877 const auto& output_merger = regs.output_merger; 438 const auto& output_merger = regs.framebuffer.output_merger;
878 // TODO: Does alpha testing happen before or after stencil? 439 // TODO: Does alpha testing happen before or after stencil?
879 if (output_merger.alpha_test.enable) { 440 if (output_merger.alpha_test.enable) {
880 bool pass = false; 441 bool pass = false;
881 442
882 switch (output_merger.alpha_test.func) { 443 switch (output_merger.alpha_test.func) {
883 case Regs::CompareFunc::Never: 444 case FramebufferRegs::CompareFunc::Never:
884 pass = false; 445 pass = false;
885 break; 446 break;
886 447
887 case Regs::CompareFunc::Always: 448 case FramebufferRegs::CompareFunc::Always:
888 pass = true; 449 pass = true;
889 break; 450 break;
890 451
891 case Regs::CompareFunc::Equal: 452 case FramebufferRegs::CompareFunc::Equal:
892 pass = combiner_output.a() == output_merger.alpha_test.ref; 453 pass = combiner_output.a() == output_merger.alpha_test.ref;
893 break; 454 break;
894 455
895 case Regs::CompareFunc::NotEqual: 456 case FramebufferRegs::CompareFunc::NotEqual:
896 pass = combiner_output.a() != output_merger.alpha_test.ref; 457 pass = combiner_output.a() != output_merger.alpha_test.ref;
897 break; 458 break;
898 459
899 case Regs::CompareFunc::LessThan: 460 case FramebufferRegs::CompareFunc::LessThan:
900 pass = combiner_output.a() < output_merger.alpha_test.ref; 461 pass = combiner_output.a() < output_merger.alpha_test.ref;
901 break; 462 break;
902 463
903 case Regs::CompareFunc::LessThanOrEqual: 464 case FramebufferRegs::CompareFunc::LessThanOrEqual:
904 pass = combiner_output.a() <= output_merger.alpha_test.ref; 465 pass = combiner_output.a() <= output_merger.alpha_test.ref;
905 break; 466 break;
906 467
907 case Regs::CompareFunc::GreaterThan: 468 case FramebufferRegs::CompareFunc::GreaterThan:
908 pass = combiner_output.a() > output_merger.alpha_test.ref; 469 pass = combiner_output.a() > output_merger.alpha_test.ref;
909 break; 470 break;
910 471
911 case Regs::CompareFunc::GreaterThanOrEqual: 472 case FramebufferRegs::CompareFunc::GreaterThanOrEqual:
912 pass = combiner_output.a() >= output_merger.alpha_test.ref; 473 pass = combiner_output.a() >= output_merger.alpha_test.ref;
913 break; 474 break;
914 } 475 }
@@ -921,16 +482,16 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader
921 // Not fully accurate. We'd have to know what data type is used to 482 // Not fully accurate. We'd have to know what data type is used to
922 // store the depth etc. Using float for now until we know more 483 // store the depth etc. Using float for now until we know more
923 // about Pica datatypes 484 // about Pica datatypes
924 if (regs.fog_mode == Regs::FogMode::Fog) { 485 if (regs.texturing.fog_mode == TexturingRegs::FogMode::Fog) {
925 const Math::Vec3<u8> fog_color = { 486 const Math::Vec3<u8> fog_color = {
926 static_cast<u8>(regs.fog_color.r.Value()), 487 static_cast<u8>(regs.texturing.fog_color.r.Value()),
927 static_cast<u8>(regs.fog_color.g.Value()), 488 static_cast<u8>(regs.texturing.fog_color.g.Value()),
928 static_cast<u8>(regs.fog_color.b.Value()), 489 static_cast<u8>(regs.texturing.fog_color.b.Value()),
929 }; 490 };
930 491
931 // Get index into fog LUT 492 // Get index into fog LUT
932 float fog_index; 493 float fog_index;
933 if (g_state.regs.fog_flip) { 494 if (g_state.regs.texturing.fog_flip) {
934 fog_index = (1.0f - depth) * 128.0f; 495 fog_index = (1.0f - depth) * 128.0f;
935 } else { 496 } else {
936 fog_index = depth * 128.0f; 497 fog_index = depth * 128.0f;
@@ -954,10 +515,10 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader
954 u8 old_stencil = 0; 515 u8 old_stencil = 0;
955 516
956 auto UpdateStencil = [stencil_test, x, y, 517 auto UpdateStencil = [stencil_test, x, y,
957 &old_stencil](Pica::Regs::StencilAction action) { 518 &old_stencil](Pica::FramebufferRegs::StencilAction action) {
958 u8 new_stencil = 519 u8 new_stencil =
959 PerformStencilAction(action, old_stencil, stencil_test.reference_value); 520 PerformStencilAction(action, old_stencil, stencil_test.reference_value);
960 if (g_state.regs.framebuffer.allow_depth_stencil_write != 0) 521 if (g_state.regs.framebuffer.framebuffer.allow_depth_stencil_write != 0)
961 SetStencil(x >> 4, y >> 4, (new_stencil & stencil_test.write_mask) | 522 SetStencil(x >> 4, y >> 4, (new_stencil & stencil_test.write_mask) |
962 (old_stencil & ~stencil_test.write_mask)); 523 (old_stencil & ~stencil_test.write_mask));
963 }; 524 };
@@ -969,35 +530,35 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader
969 530
970 bool pass = false; 531 bool pass = false;
971 switch (stencil_test.func) { 532 switch (stencil_test.func) {
972 case Regs::CompareFunc::Never: 533 case FramebufferRegs::CompareFunc::Never:
973 pass = false; 534 pass = false;
974 break; 535 break;
975 536
976 case Regs::CompareFunc::Always: 537 case FramebufferRegs::CompareFunc::Always:
977 pass = true; 538 pass = true;
978 break; 539 break;
979 540
980 case Regs::CompareFunc::Equal: 541 case FramebufferRegs::CompareFunc::Equal:
981 pass = (ref == dest); 542 pass = (ref == dest);
982 break; 543 break;
983 544
984 case Regs::CompareFunc::NotEqual: 545 case FramebufferRegs::CompareFunc::NotEqual:
985 pass = (ref != dest); 546 pass = (ref != dest);
986 break; 547 break;
987 548
988 case Regs::CompareFunc::LessThan: 549 case FramebufferRegs::CompareFunc::LessThan:
989 pass = (ref < dest); 550 pass = (ref < dest);
990 break; 551 break;
991 552
992 case Regs::CompareFunc::LessThanOrEqual: 553 case FramebufferRegs::CompareFunc::LessThanOrEqual:
993 pass = (ref <= dest); 554 pass = (ref <= dest);
994 break; 555 break;
995 556
996 case Regs::CompareFunc::GreaterThan: 557 case FramebufferRegs::CompareFunc::GreaterThan:
997 pass = (ref > dest); 558 pass = (ref > dest);
998 break; 559 break;
999 560
1000 case Regs::CompareFunc::GreaterThanOrEqual: 561 case FramebufferRegs::CompareFunc::GreaterThanOrEqual:
1001 pass = (ref >= dest); 562 pass = (ref >= dest);
1002 break; 563 break;
1003 } 564 }
@@ -1009,7 +570,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader
1009 } 570 }
1010 571
1011 // Convert float to integer 572 // Convert float to integer
1012 unsigned num_bits = Regs::DepthBitsPerPixel(regs.framebuffer.depth_format); 573 unsigned num_bits =
574 FramebufferRegs::DepthBitsPerPixel(regs.framebuffer.framebuffer.depth_format);
1013 u32 z = (u32)(depth * ((1 << num_bits) - 1)); 575 u32 z = (u32)(depth * ((1 << num_bits) - 1));
1014 576
1015 if (output_merger.depth_test_enable) { 577 if (output_merger.depth_test_enable) {
@@ -1018,35 +580,35 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader
1018 bool pass = false; 580 bool pass = false;
1019 581
1020 switch (output_merger.depth_test_func) { 582 switch (output_merger.depth_test_func) {
1021 case Regs::CompareFunc::Never: 583 case FramebufferRegs::CompareFunc::Never:
1022 pass = false; 584 pass = false;
1023 break; 585 break;
1024 586
1025 case Regs::CompareFunc::Always: 587 case FramebufferRegs::CompareFunc::Always:
1026 pass = true; 588 pass = true;
1027 break; 589 break;
1028 590
1029 case Regs::CompareFunc::Equal: 591 case FramebufferRegs::CompareFunc::Equal:
1030 pass = z == ref_z; 592 pass = z == ref_z;
1031 break; 593 break;
1032 594
1033 case Regs::CompareFunc::NotEqual: 595 case FramebufferRegs::CompareFunc::NotEqual:
1034 pass = z != ref_z; 596 pass = z != ref_z;
1035 break; 597 break;
1036 598
1037 case Regs::CompareFunc::LessThan: 599 case FramebufferRegs::CompareFunc::LessThan:
1038 pass = z < ref_z; 600 pass = z < ref_z;
1039 break; 601 break;
1040 602
1041 case Regs::CompareFunc::LessThanOrEqual: 603 case FramebufferRegs::CompareFunc::LessThanOrEqual:
1042 pass = z <= ref_z; 604 pass = z <= ref_z;
1043 break; 605 break;
1044 606
1045 case Regs::CompareFunc::GreaterThan: 607 case FramebufferRegs::CompareFunc::GreaterThan:
1046 pass = z > ref_z; 608 pass = z > ref_z;
1047 break; 609 break;
1048 610
1049 case Regs::CompareFunc::GreaterThanOrEqual: 611 case FramebufferRegs::CompareFunc::GreaterThanOrEqual:
1050 pass = z >= ref_z; 612 pass = z >= ref_z;
1051 break; 613 break;
1052 } 614 }
@@ -1058,8 +620,11 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader
1058 } 620 }
1059 } 621 }
1060 622
1061 if (regs.framebuffer.allow_depth_stencil_write != 0 && output_merger.depth_write_enable) 623 if (regs.framebuffer.framebuffer.allow_depth_stencil_write != 0 &&
624 output_merger.depth_write_enable) {
625
1062 SetDepth(x >> 4, y >> 4, z); 626 SetDepth(x >> 4, y >> 4, z);
627 }
1063 628
1064 // The stencil depth_pass action is executed even if depth testing is disabled 629 // The stencil depth_pass action is executed even if depth testing is disabled
1065 if (stencil_action_enable) 630 if (stencil_action_enable)
@@ -1071,7 +636,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader
1071 if (output_merger.alphablend_enable) { 636 if (output_merger.alphablend_enable) {
1072 auto params = output_merger.alpha_blending; 637 auto params = output_merger.alpha_blending;
1073 638
1074 auto LookupFactor = [&](unsigned channel, Regs::BlendFactor factor) -> u8 { 639 auto LookupFactor = [&](unsigned channel,
640 FramebufferRegs::BlendFactor factor) -> u8 {
1075 DEBUG_ASSERT(channel < 4); 641 DEBUG_ASSERT(channel < 4);
1076 642
1077 const Math::Vec4<u8> blend_const = { 643 const Math::Vec4<u8> blend_const = {
@@ -1082,49 +648,49 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader
1082 }; 648 };
1083 649
1084 switch (factor) { 650 switch (factor) {
1085 case Regs::BlendFactor::Zero: 651 case FramebufferRegs::BlendFactor::Zero:
1086 return 0; 652 return 0;
1087 653
1088 case Regs::BlendFactor::One: 654 case FramebufferRegs::BlendFactor::One:
1089 return 255; 655 return 255;
1090 656
1091 case Regs::BlendFactor::SourceColor: 657 case FramebufferRegs::BlendFactor::SourceColor:
1092 return combiner_output[channel]; 658 return combiner_output[channel];
1093 659
1094 case Regs::BlendFactor::OneMinusSourceColor: 660 case FramebufferRegs::BlendFactor::OneMinusSourceColor:
1095 return 255 - combiner_output[channel]; 661 return 255 - combiner_output[channel];
1096 662
1097 case Regs::BlendFactor::DestColor: 663 case FramebufferRegs::BlendFactor::DestColor:
1098 return dest[channel]; 664 return dest[channel];
1099 665
1100 case Regs::BlendFactor::OneMinusDestColor: 666 case FramebufferRegs::BlendFactor::OneMinusDestColor:
1101 return 255 - dest[channel]; 667 return 255 - dest[channel];
1102 668
1103 case Regs::BlendFactor::SourceAlpha: 669 case FramebufferRegs::BlendFactor::SourceAlpha:
1104 return combiner_output.a(); 670 return combiner_output.a();
1105 671
1106 case Regs::BlendFactor::OneMinusSourceAlpha: 672 case FramebufferRegs::BlendFactor::OneMinusSourceAlpha:
1107 return 255 - combiner_output.a(); 673 return 255 - combiner_output.a();
1108 674
1109 case Regs::BlendFactor::DestAlpha: 675 case FramebufferRegs::BlendFactor::DestAlpha:
1110 return dest.a(); 676 return dest.a();
1111 677
1112 case Regs::BlendFactor::OneMinusDestAlpha: 678 case FramebufferRegs::BlendFactor::OneMinusDestAlpha:
1113 return 255 - dest.a(); 679 return 255 - dest.a();
1114 680
1115 case Regs::BlendFactor::ConstantColor: 681 case FramebufferRegs::BlendFactor::ConstantColor:
1116 return blend_const[channel]; 682 return blend_const[channel];
1117 683
1118 case Regs::BlendFactor::OneMinusConstantColor: 684 case FramebufferRegs::BlendFactor::OneMinusConstantColor:
1119 return 255 - blend_const[channel]; 685 return 255 - blend_const[channel];
1120 686
1121 case Regs::BlendFactor::ConstantAlpha: 687 case FramebufferRegs::BlendFactor::ConstantAlpha:
1122 return blend_const.a(); 688 return blend_const.a();
1123 689
1124 case Regs::BlendFactor::OneMinusConstantAlpha: 690 case FramebufferRegs::BlendFactor::OneMinusConstantAlpha:
1125 return 255 - blend_const.a(); 691 return 255 - blend_const.a();
1126 692
1127 case Regs::BlendFactor::SourceAlphaSaturate: 693 case FramebufferRegs::BlendFactor::SourceAlphaSaturate:
1128 // Returns 1.0 for the alpha channel 694 // Returns 1.0 for the alpha channel
1129 if (channel == 3) 695 if (channel == 3)
1130 return 255; 696 return 255;
@@ -1139,55 +705,6 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader
1139 return combiner_output[channel]; 705 return combiner_output[channel];
1140 }; 706 };
1141 707
1142 static auto EvaluateBlendEquation = [](
1143 const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor,
1144 const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor,
1145 Regs::BlendEquation equation) {
1146 Math::Vec4<int> result;
1147
1148 auto src_result = (src * srcfactor).Cast<int>();
1149 auto dst_result = (dest * destfactor).Cast<int>();
1150
1151 switch (equation) {
1152 case Regs::BlendEquation::Add:
1153 result = (src_result + dst_result) / 255;
1154 break;
1155
1156 case Regs::BlendEquation::Subtract:
1157 result = (src_result - dst_result) / 255;
1158 break;
1159
1160 case Regs::BlendEquation::ReverseSubtract:
1161 result = (dst_result - src_result) / 255;
1162 break;
1163
1164 // TODO: How do these two actually work?
1165 // OpenGL doesn't include the blend factors in the min/max computations,
1166 // but is this what the 3DS actually does?
1167 case Regs::BlendEquation::Min:
1168 result.r() = std::min(src.r(), dest.r());
1169 result.g() = std::min(src.g(), dest.g());
1170 result.b() = std::min(src.b(), dest.b());
1171 result.a() = std::min(src.a(), dest.a());
1172 break;
1173
1174 case Regs::BlendEquation::Max:
1175 result.r() = std::max(src.r(), dest.r());
1176 result.g() = std::max(src.g(), dest.g());
1177 result.b() = std::max(src.b(), dest.b());
1178 result.a() = std::max(src.a(), dest.a());
1179 break;
1180
1181 default:
1182 LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation %x", equation);
1183 UNIMPLEMENTED();
1184 }
1185
1186 return Math::Vec4<u8>(
1187 MathUtil::Clamp(result.r(), 0, 255), MathUtil::Clamp(result.g(), 0, 255),
1188 MathUtil::Clamp(result.b(), 0, 255), MathUtil::Clamp(result.a(), 0, 255));
1189 };
1190
1191 auto srcfactor = Math::MakeVec(LookupFactor(0, params.factor_source_rgb), 708 auto srcfactor = Math::MakeVec(LookupFactor(0, params.factor_source_rgb),
1192 LookupFactor(1, params.factor_source_rgb), 709 LookupFactor(1, params.factor_source_rgb),
1193 LookupFactor(2, params.factor_source_rgb), 710 LookupFactor(2, params.factor_source_rgb),
@@ -1204,58 +721,6 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader
1204 dstfactor, params.blend_equation_a) 721 dstfactor, params.blend_equation_a)
1205 .a(); 722 .a();
1206 } else { 723 } else {
1207 static auto LogicOp = [](u8 src, u8 dest, Regs::LogicOp op) -> u8 {
1208 switch (op) {
1209 case Regs::LogicOp::Clear:
1210 return 0;
1211
1212 case Regs::LogicOp::And:
1213 return src & dest;
1214
1215 case Regs::LogicOp::AndReverse:
1216 return src & ~dest;
1217
1218 case Regs::LogicOp::Copy:
1219 return src;
1220
1221 case Regs::LogicOp::Set:
1222 return 255;
1223
1224 case Regs::LogicOp::CopyInverted:
1225 return ~src;
1226
1227 case Regs::LogicOp::NoOp:
1228 return dest;
1229
1230 case Regs::LogicOp::Invert:
1231 return ~dest;
1232
1233 case Regs::LogicOp::Nand:
1234 return ~(src & dest);
1235
1236 case Regs::LogicOp::Or:
1237 return src | dest;
1238
1239 case Regs::LogicOp::Nor:
1240 return ~(src | dest);
1241
1242 case Regs::LogicOp::Xor:
1243 return src ^ dest;
1244
1245 case Regs::LogicOp::Equiv:
1246 return ~(src ^ dest);
1247
1248 case Regs::LogicOp::AndInverted:
1249 return ~src & dest;
1250
1251 case Regs::LogicOp::OrReverse:
1252 return src | ~dest;
1253
1254 case Regs::LogicOp::OrInverted:
1255 return ~src | dest;
1256 }
1257 };
1258
1259 blend_output = 724 blend_output =
1260 Math::MakeVec(LogicOp(combiner_output.r(), dest.r(), output_merger.logic_op), 725 Math::MakeVec(LogicOp(combiner_output.r(), dest.r(), output_merger.logic_op),
1261 LogicOp(combiner_output.g(), dest.g(), output_merger.logic_op), 726 LogicOp(combiner_output.g(), dest.g(), output_merger.logic_op),
@@ -1270,14 +735,13 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, const Shader
1270 output_merger.alpha_enable ? blend_output.a() : dest.a(), 735 output_merger.alpha_enable ? blend_output.a() : dest.a(),
1271 }; 736 };
1272 737
1273 if (regs.framebuffer.allow_color_write != 0) 738 if (regs.framebuffer.framebuffer.allow_color_write != 0)
1274 DrawPixel(x >> 4, y >> 4, result); 739 DrawPixel(x >> 4, y >> 4, result);
1275 } 740 }
1276 } 741 }
1277} 742}
1278 743
1279void ProcessTriangle(const Shader::OutputVertex& v0, const Shader::OutputVertex& v1, 744void ProcessTriangle(const Vertex& v0, const Vertex& v1, const Vertex& v2) {
1280 const Shader::OutputVertex& v2) {
1281 ProcessTriangleInternal(v0, v1, v2); 745 ProcessTriangleInternal(v0, v1, v2);
1282} 746}
1283 747
diff --git a/src/video_core/swrasterizer/rasterizer.h b/src/video_core/swrasterizer/rasterizer.h
new file mode 100644
index 000000000..3a72ac343
--- /dev/null
+++ b/src/video_core/swrasterizer/rasterizer.h
@@ -0,0 +1,48 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "video_core/shader/shader.h"
8
9namespace Pica {
10
11namespace Rasterizer {
12
13struct Vertex : Shader::OutputVertex {
14 Vertex(const OutputVertex& v) : OutputVertex(v) {}
15
16 // Attributes used to store intermediate results
17 // position after perspective divide
18 Math::Vec3<float24> screenpos;
19
20 // Linear interpolation
21 // factor: 0=this, 1=vtx
22 void Lerp(float24 factor, const Vertex& vtx) {
23 pos = pos * factor + vtx.pos * (float24::FromFloat32(1) - factor);
24
25 // TODO: Should perform perspective correct interpolation here...
26 tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor);
27 tc1 = tc1 * factor + vtx.tc1 * (float24::FromFloat32(1) - factor);
28 tc2 = tc2 * factor + vtx.tc2 * (float24::FromFloat32(1) - factor);
29
30 screenpos = screenpos * factor + vtx.screenpos * (float24::FromFloat32(1) - factor);
31
32 color = color * factor + vtx.color * (float24::FromFloat32(1) - factor);
33 }
34
35 // Linear interpolation
36 // factor: 0=v0, 1=v1
37 static Vertex Lerp(float24 factor, const Vertex& v0, const Vertex& v1) {
38 Vertex ret = v0;
39 ret.Lerp(factor, v1);
40 return ret;
41 }
42};
43
44void ProcessTriangle(const Vertex& v0, const Vertex& v1, const Vertex& v2);
45
46} // namespace Rasterizer
47
48} // namespace Pica
diff --git a/src/video_core/swrasterizer.cpp b/src/video_core/swrasterizer/swrasterizer.cpp
index 9cd21f72b..402b705dd 100644
--- a/src/video_core/swrasterizer.cpp
+++ b/src/video_core/swrasterizer/swrasterizer.cpp
@@ -2,8 +2,8 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "video_core/clipper.h" 5#include "video_core/swrasterizer/clipper.h"
6#include "video_core/swrasterizer.h" 6#include "video_core/swrasterizer/swrasterizer.h"
7 7
8namespace VideoCore { 8namespace VideoCore {
9 9
diff --git a/src/video_core/swrasterizer.h b/src/video_core/swrasterizer/swrasterizer.h
index 6d42d7409..6d42d7409 100644
--- a/src/video_core/swrasterizer.h
+++ b/src/video_core/swrasterizer/swrasterizer.h
diff --git a/src/video_core/swrasterizer/texturing.cpp b/src/video_core/swrasterizer/texturing.cpp
new file mode 100644
index 000000000..eb18e4ba4
--- /dev/null
+++ b/src/video_core/swrasterizer/texturing.cpp
@@ -0,0 +1,228 @@
1// Copyright 2017 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6
7#include "common/assert.h"
8#include "common/common_types.h"
9#include "common/math_util.h"
10#include "common/vector_math.h"
11#include "video_core/regs_texturing.h"
12#include "video_core/swrasterizer/texturing.h"
13
14namespace Pica {
15namespace Rasterizer {
16
17using TevStageConfig = TexturingRegs::TevStageConfig;
18
19int GetWrappedTexCoord(TexturingRegs::TextureConfig::WrapMode mode, int val, unsigned size) {
20 switch (mode) {
21 case TexturingRegs::TextureConfig::ClampToEdge:
22 val = std::max(val, 0);
23 val = std::min(val, (int)size - 1);
24 return val;
25
26 case TexturingRegs::TextureConfig::ClampToBorder:
27 return val;
28
29 case TexturingRegs::TextureConfig::Repeat:
30 return (int)((unsigned)val % size);
31
32 case TexturingRegs::TextureConfig::MirroredRepeat: {
33 unsigned int coord = ((unsigned)val % (2 * size));
34 if (coord >= size)
35 coord = 2 * size - 1 - coord;
36 return (int)coord;
37 }
38
39 default:
40 LOG_ERROR(HW_GPU, "Unknown texture coordinate wrapping mode %x", (int)mode);
41 UNIMPLEMENTED();
42 return 0;
43 }
44};
45
46Math::Vec3<u8> GetColorModifier(TevStageConfig::ColorModifier factor,
47 const Math::Vec4<u8>& values) {
48 using ColorModifier = TevStageConfig::ColorModifier;
49
50 switch (factor) {
51 case ColorModifier::SourceColor:
52 return values.rgb();
53
54 case ColorModifier::OneMinusSourceColor:
55 return (Math::Vec3<u8>(255, 255, 255) - values.rgb()).Cast<u8>();
56
57 case ColorModifier::SourceAlpha:
58 return values.aaa();
59
60 case ColorModifier::OneMinusSourceAlpha:
61 return (Math::Vec3<u8>(255, 255, 255) - values.aaa()).Cast<u8>();
62
63 case ColorModifier::SourceRed:
64 return values.rrr();
65
66 case ColorModifier::OneMinusSourceRed:
67 return (Math::Vec3<u8>(255, 255, 255) - values.rrr()).Cast<u8>();
68
69 case ColorModifier::SourceGreen:
70 return values.ggg();
71
72 case ColorModifier::OneMinusSourceGreen:
73 return (Math::Vec3<u8>(255, 255, 255) - values.ggg()).Cast<u8>();
74
75 case ColorModifier::SourceBlue:
76 return values.bbb();
77
78 case ColorModifier::OneMinusSourceBlue:
79 return (Math::Vec3<u8>(255, 255, 255) - values.bbb()).Cast<u8>();
80 }
81};
82
83u8 GetAlphaModifier(TevStageConfig::AlphaModifier factor, const Math::Vec4<u8>& values) {
84 using AlphaModifier = TevStageConfig::AlphaModifier;
85
86 switch (factor) {
87 case AlphaModifier::SourceAlpha:
88 return values.a();
89
90 case AlphaModifier::OneMinusSourceAlpha:
91 return 255 - values.a();
92
93 case AlphaModifier::SourceRed:
94 return values.r();
95
96 case AlphaModifier::OneMinusSourceRed:
97 return 255 - values.r();
98
99 case AlphaModifier::SourceGreen:
100 return values.g();
101
102 case AlphaModifier::OneMinusSourceGreen:
103 return 255 - values.g();
104
105 case AlphaModifier::SourceBlue:
106 return values.b();
107
108 case AlphaModifier::OneMinusSourceBlue:
109 return 255 - values.b();
110 }
111};
112
113Math::Vec3<u8> ColorCombine(TevStageConfig::Operation op, const Math::Vec3<u8> input[3]) {
114 using Operation = TevStageConfig::Operation;
115
116 switch (op) {
117 case Operation::Replace:
118 return input[0];
119
120 case Operation::Modulate:
121 return ((input[0] * input[1]) / 255).Cast<u8>();
122
123 case Operation::Add: {
124 auto result = input[0] + input[1];
125 result.r() = std::min(255, result.r());
126 result.g() = std::min(255, result.g());
127 result.b() = std::min(255, result.b());
128 return result.Cast<u8>();
129 }
130
131 case Operation::AddSigned: {
132 // TODO(bunnei): Verify that the color conversion from (float) 0.5f to
133 // (byte) 128 is correct
134 auto result =
135 input[0].Cast<int>() + input[1].Cast<int>() - Math::MakeVec<int>(128, 128, 128);
136 result.r() = MathUtil::Clamp<int>(result.r(), 0, 255);
137 result.g() = MathUtil::Clamp<int>(result.g(), 0, 255);
138 result.b() = MathUtil::Clamp<int>(result.b(), 0, 255);
139 return result.Cast<u8>();
140 }
141
142 case Operation::Lerp:
143 return ((input[0] * input[2] +
144 input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) /
145 255)
146 .Cast<u8>();
147
148 case Operation::Subtract: {
149 auto result = input[0].Cast<int>() - input[1].Cast<int>();
150 result.r() = std::max(0, result.r());
151 result.g() = std::max(0, result.g());
152 result.b() = std::max(0, result.b());
153 return result.Cast<u8>();
154 }
155
156 case Operation::MultiplyThenAdd: {
157 auto result = (input[0] * input[1] + 255 * input[2].Cast<int>()) / 255;
158 result.r() = std::min(255, result.r());
159 result.g() = std::min(255, result.g());
160 result.b() = std::min(255, result.b());
161 return result.Cast<u8>();
162 }
163
164 case Operation::AddThenMultiply: {
165 auto result = input[0] + input[1];
166 result.r() = std::min(255, result.r());
167 result.g() = std::min(255, result.g());
168 result.b() = std::min(255, result.b());
169 result = (result * input[2].Cast<int>()) / 255;
170 return result.Cast<u8>();
171 }
172 case Operation::Dot3_RGB: {
173 // Not fully accurate. Worst case scenario seems to yield a +/-3 error. Some HW results
174 // indicate that the per-component computation can't have a higher precision than 1/256,
175 // while dot3_rgb((0x80,g0,b0), (0x7F,g1,b1)) and dot3_rgb((0x80,g0,b0), (0x80,g1,b1)) give
176 // different results.
177 int result = ((input[0].r() * 2 - 255) * (input[1].r() * 2 - 255) + 128) / 256 +
178 ((input[0].g() * 2 - 255) * (input[1].g() * 2 - 255) + 128) / 256 +
179 ((input[0].b() * 2 - 255) * (input[1].b() * 2 - 255) + 128) / 256;
180 result = std::max(0, std::min(255, result));
181 return {(u8)result, (u8)result, (u8)result};
182 }
183 default:
184 LOG_ERROR(HW_GPU, "Unknown color combiner operation %d", (int)op);
185 UNIMPLEMENTED();
186 return {0, 0, 0};
187 }
188};
189
190u8 AlphaCombine(TevStageConfig::Operation op, const std::array<u8, 3>& input) {
191 switch (op) {
192 using Operation = TevStageConfig::Operation;
193 case Operation::Replace:
194 return input[0];
195
196 case Operation::Modulate:
197 return input[0] * input[1] / 255;
198
199 case Operation::Add:
200 return std::min(255, input[0] + input[1]);
201
202 case Operation::AddSigned: {
203 // TODO(bunnei): Verify that the color conversion from (float) 0.5f to (byte) 128 is correct
204 auto result = static_cast<int>(input[0]) + static_cast<int>(input[1]) - 128;
205 return static_cast<u8>(MathUtil::Clamp<int>(result, 0, 255));
206 }
207
208 case Operation::Lerp:
209 return (input[0] * input[2] + input[1] * (255 - input[2])) / 255;
210
211 case Operation::Subtract:
212 return std::max(0, (int)input[0] - (int)input[1]);
213
214 case Operation::MultiplyThenAdd:
215 return std::min(255, (input[0] * input[1] + 255 * input[2]) / 255);
216
217 case Operation::AddThenMultiply:
218 return (std::min(255, (input[0] + input[1])) * input[2]) / 255;
219
220 default:
221 LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d", (int)op);
222 UNIMPLEMENTED();
223 return 0;
224 }
225};
226
227} // namespace Rasterizer
228} // namespace Pica
diff --git a/src/video_core/swrasterizer/texturing.h b/src/video_core/swrasterizer/texturing.h
new file mode 100644
index 000000000..24f74a5a3
--- /dev/null
+++ b/src/video_core/swrasterizer/texturing.h
@@ -0,0 +1,28 @@
1// Copyright 2017 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "common/vector_math.h"
9#include "video_core/regs_texturing.h"
10
11namespace Pica {
12namespace Rasterizer {
13
14int GetWrappedTexCoord(TexturingRegs::TextureConfig::WrapMode mode, int val, unsigned size);
15
16Math::Vec3<u8> GetColorModifier(TexturingRegs::TevStageConfig::ColorModifier factor,
17 const Math::Vec4<u8>& values);
18
19u8 GetAlphaModifier(TexturingRegs::TevStageConfig::AlphaModifier factor,
20 const Math::Vec4<u8>& values);
21
22Math::Vec3<u8> ColorCombine(TexturingRegs::TevStageConfig::Operation op,
23 const Math::Vec3<u8> input[3]);
24
25u8 AlphaCombine(TexturingRegs::TevStageConfig::Operation op, const std::array<u8, 3>& input);
26
27} // namespace Rasterizer
28} // namespace Pica
diff --git a/src/video_core/texture/etc1.cpp b/src/video_core/texture/etc1.cpp
new file mode 100644
index 000000000..43f7f56db
--- /dev/null
+++ b/src/video_core/texture/etc1.cpp
@@ -0,0 +1,122 @@
1// Copyright 2017 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include "common/bit_field.h"
7#include "common/color.h"
8#include "common/common_types.h"
9#include "common/math_util.h"
10#include "common/vector_math.h"
11#include "video_core/texture/etc1.h"
12
13namespace Pica {
14namespace Texture {
15
16namespace {
17
18constexpr std::array<std::array<u8, 2>, 8> etc1_modifier_table = {{
19 {2, 8}, {5, 17}, {9, 29}, {13, 42}, {18, 60}, {24, 80}, {33, 106}, {47, 183},
20}};
21
22union ETC1Tile {
23 u64 raw;
24
25 // Each of these two is a collection of 16 bits (one per lookup value)
26 BitField<0, 16, u64> table_subindexes;
27 BitField<16, 16, u64> negation_flags;
28
29 unsigned GetTableSubIndex(unsigned index) const {
30 return (table_subindexes >> index) & 1;
31 }
32
33 bool GetNegationFlag(unsigned index) const {
34 return ((negation_flags >> index) & 1) == 1;
35 }
36
37 BitField<32, 1, u64> flip;
38 BitField<33, 1, u64> differential_mode;
39
40 BitField<34, 3, u64> table_index_2;
41 BitField<37, 3, u64> table_index_1;
42
43 union {
44 // delta value + base value
45 BitField<40, 3, s64> db;
46 BitField<43, 5, u64> b;
47
48 BitField<48, 3, s64> dg;
49 BitField<51, 5, u64> g;
50
51 BitField<56, 3, s64> dr;
52 BitField<59, 5, u64> r;
53 } differential;
54
55 union {
56 BitField<40, 4, u64> b2;
57 BitField<44, 4, u64> b1;
58
59 BitField<48, 4, u64> g2;
60 BitField<52, 4, u64> g1;
61
62 BitField<56, 4, u64> r2;
63 BitField<60, 4, u64> r1;
64 } separate;
65
66 const Math::Vec3<u8> GetRGB(unsigned int x, unsigned int y) const {
67 int texel = 4 * x + y;
68
69 if (flip)
70 std::swap(x, y);
71
72 // Lookup base value
73 Math::Vec3<int> ret;
74 if (differential_mode) {
75 ret.r() = static_cast<int>(differential.r);
76 ret.g() = static_cast<int>(differential.g);
77 ret.b() = static_cast<int>(differential.b);
78 if (x >= 2) {
79 ret.r() += static_cast<int>(differential.dr);
80 ret.g() += static_cast<int>(differential.dg);
81 ret.b() += static_cast<int>(differential.db);
82 }
83 ret.r() = Color::Convert5To8(ret.r());
84 ret.g() = Color::Convert5To8(ret.g());
85 ret.b() = Color::Convert5To8(ret.b());
86 } else {
87 if (x < 2) {
88 ret.r() = Color::Convert4To8(static_cast<u8>(separate.r1));
89 ret.g() = Color::Convert4To8(static_cast<u8>(separate.g1));
90 ret.b() = Color::Convert4To8(static_cast<u8>(separate.b1));
91 } else {
92 ret.r() = Color::Convert4To8(static_cast<u8>(separate.r2));
93 ret.g() = Color::Convert4To8(static_cast<u8>(separate.g2));
94 ret.b() = Color::Convert4To8(static_cast<u8>(separate.b2));
95 }
96 }
97
98 // Add modifier
99 unsigned table_index =
100 static_cast<int>((x < 2) ? table_index_1.Value() : table_index_2.Value());
101
102 int modifier = etc1_modifier_table[table_index][GetTableSubIndex(texel)];
103 if (GetNegationFlag(texel))
104 modifier *= -1;
105
106 ret.r() = MathUtil::Clamp(ret.r() + modifier, 0, 255);
107 ret.g() = MathUtil::Clamp(ret.g() + modifier, 0, 255);
108 ret.b() = MathUtil::Clamp(ret.b() + modifier, 0, 255);
109
110 return ret.Cast<u8>();
111 }
112};
113
114} // anonymous namespace
115
116Math::Vec3<u8> SampleETC1Subtile(u64 value, unsigned int x, unsigned int y) {
117 ETC1Tile tile{value};
118 return tile.GetRGB(x, y);
119}
120
121} // namespace Texture
122} // namespace Pica
diff --git a/src/video_core/texture/etc1.h b/src/video_core/texture/etc1.h
new file mode 100644
index 000000000..e188b19df
--- /dev/null
+++ b/src/video_core/texture/etc1.h
@@ -0,0 +1,16 @@
1// Copyright 2017 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "common/vector_math.h"
9
10namespace Pica {
11namespace Texture {
12
13Math::Vec3<u8> SampleETC1Subtile(u64 value, unsigned int x, unsigned int y);
14
15} // namespace Texture
16} // namespace Pica
diff --git a/src/video_core/texture/texture_decode.cpp b/src/video_core/texture/texture_decode.cpp
new file mode 100644
index 000000000..0818d652c
--- /dev/null
+++ b/src/video_core/texture/texture_decode.cpp
@@ -0,0 +1,227 @@
1// Copyright 2017 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/color.h"
7#include "common/logging/log.h"
8#include "common/math_util.h"
9#include "common/swap.h"
10#include "common/vector_math.h"
11#include "video_core/regs_texturing.h"
12#include "video_core/texture/etc1.h"
13#include "video_core/texture/texture_decode.h"
14#include "video_core/utils.h"
15
16using TextureFormat = Pica::TexturingRegs::TextureFormat;
17
18namespace Pica {
19namespace Texture {
20
21constexpr size_t TILE_SIZE = 8 * 8;
22constexpr size_t ETC1_SUBTILES = 2 * 2;
23
24size_t CalculateTileSize(TextureFormat format) {
25 switch (format) {
26 case TextureFormat::RGBA8:
27 return 4 * TILE_SIZE;
28
29 case TextureFormat::RGB8:
30 return 3 * TILE_SIZE;
31
32 case TextureFormat::RGB5A1:
33 case TextureFormat::RGB565:
34 case TextureFormat::RGBA4:
35 case TextureFormat::IA8:
36 case TextureFormat::RG8:
37 return 2 * TILE_SIZE;
38
39 case TextureFormat::I8:
40 case TextureFormat::A8:
41 case TextureFormat::IA4:
42 return 1 * TILE_SIZE;
43
44 case TextureFormat::I4:
45 case TextureFormat::A4:
46 return TILE_SIZE / 2;
47
48 case TextureFormat::ETC1:
49 return ETC1_SUBTILES * 8;
50
51 case TextureFormat::ETC1A4:
52 return ETC1_SUBTILES * 16;
53
54 default: // placeholder for yet unknown formats
55 UNIMPLEMENTED();
56 return 0;
57 }
58}
59
60Math::Vec4<u8> LookupTexture(const u8* source, unsigned int x, unsigned int y,
61 const TextureInfo& info, bool disable_alpha) {
62 // Coordinate in tiles
63 const unsigned int coarse_x = x / 8;
64 const unsigned int coarse_y = y / 8;
65
66 // Coordinate inside the tile
67 const unsigned int fine_x = x % 8;
68 const unsigned int fine_y = y % 8;
69
70 const u8* line = source + coarse_y * info.stride;
71 const u8* tile = line + coarse_x * CalculateTileSize(info.format);
72 return LookupTexelInTile(tile, fine_x, fine_y, info, disable_alpha);
73}
74
75Math::Vec4<u8> LookupTexelInTile(const u8* source, unsigned int x, unsigned int y,
76 const TextureInfo& info, bool disable_alpha) {
77 DEBUG_ASSERT(x < 8);
78 DEBUG_ASSERT(y < 8);
79
80 using VideoCore::MortonInterleave;
81
82 switch (info.format) {
83 case TextureFormat::RGBA8: {
84 auto res = Color::DecodeRGBA8(source + MortonInterleave(x, y) * 4);
85 return {res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a())};
86 }
87
88 case TextureFormat::RGB8: {
89 auto res = Color::DecodeRGB8(source + MortonInterleave(x, y) * 3);
90 return {res.r(), res.g(), res.b(), 255};
91 }
92
93 case TextureFormat::RGB5A1: {
94 auto res = Color::DecodeRGB5A1(source + MortonInterleave(x, y) * 2);
95 return {res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a())};
96 }
97
98 case TextureFormat::RGB565: {
99 auto res = Color::DecodeRGB565(source + MortonInterleave(x, y) * 2);
100 return {res.r(), res.g(), res.b(), 255};
101 }
102
103 case TextureFormat::RGBA4: {
104 auto res = Color::DecodeRGBA4(source + MortonInterleave(x, y) * 2);
105 return {res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a())};
106 }
107
108 case TextureFormat::IA8: {
109 const u8* source_ptr = source + MortonInterleave(x, y) * 2;
110
111 if (disable_alpha) {
112 // Show intensity as red, alpha as green
113 return {source_ptr[1], source_ptr[0], 0, 255};
114 } else {
115 return {source_ptr[1], source_ptr[1], source_ptr[1], source_ptr[0]};
116 }
117 }
118
119 case TextureFormat::RG8: {
120 auto res = Color::DecodeRG8(source + MortonInterleave(x, y) * 2);
121 return {res.r(), res.g(), 0, 255};
122 }
123
124 case TextureFormat::I8: {
125 const u8* source_ptr = source + MortonInterleave(x, y);
126 return {*source_ptr, *source_ptr, *source_ptr, 255};
127 }
128
129 case TextureFormat::A8: {
130 const u8* source_ptr = source + MortonInterleave(x, y);
131
132 if (disable_alpha) {
133 return {*source_ptr, *source_ptr, *source_ptr, 255};
134 } else {
135 return {0, 0, 0, *source_ptr};
136 }
137 }
138
139 case TextureFormat::IA4: {
140 const u8* source_ptr = source + MortonInterleave(x, y);
141
142 u8 i = Color::Convert4To8(((*source_ptr) & 0xF0) >> 4);
143 u8 a = Color::Convert4To8((*source_ptr) & 0xF);
144
145 if (disable_alpha) {
146 // Show intensity as red, alpha as green
147 return {i, a, 0, 255};
148 } else {
149 return {i, i, i, a};
150 }
151 }
152
153 case TextureFormat::I4: {
154 u32 morton_offset = MortonInterleave(x, y);
155 const u8* source_ptr = source + morton_offset / 2;
156
157 u8 i = (morton_offset % 2) ? ((*source_ptr & 0xF0) >> 4) : (*source_ptr & 0xF);
158 i = Color::Convert4To8(i);
159
160 return {i, i, i, 255};
161 }
162
163 case TextureFormat::A4: {
164 u32 morton_offset = MortonInterleave(x, y);
165 const u8* source_ptr = source + morton_offset / 2;
166
167 u8 a = (morton_offset % 2) ? ((*source_ptr & 0xF0) >> 4) : (*source_ptr & 0xF);
168 a = Color::Convert4To8(a);
169
170 if (disable_alpha) {
171 return {a, a, a, 255};
172 } else {
173 return {0, 0, 0, a};
174 }
175 }
176
177 case TextureFormat::ETC1:
178 case TextureFormat::ETC1A4: {
179 bool has_alpha = (info.format == TextureFormat::ETC1A4);
180 size_t subtile_size = has_alpha ? 16 : 8;
181
182 // ETC1 further subdivides each 8x8 tile into four 4x4 subtiles
183 constexpr unsigned int subtile_width = 4;
184 constexpr unsigned int subtile_height = 4;
185
186 unsigned int subtile_index = (x / subtile_width) + 2 * (y / subtile_height);
187 x %= subtile_width;
188 y %= subtile_height;
189
190 const u8* subtile_ptr = source + subtile_index * subtile_size;
191
192 u8 alpha = 255;
193 if (has_alpha) {
194 u64_le packed_alpha;
195 memcpy(&packed_alpha, subtile_ptr, sizeof(u64));
196 subtile_ptr += sizeof(u64);
197
198 alpha = Color::Convert4To8((packed_alpha >> (4 * (x * subtile_width + y))) & 0xF);
199 }
200
201 u64_le subtile_data;
202 memcpy(&subtile_data, subtile_ptr, sizeof(u64));
203
204 return Math::MakeVec(SampleETC1Subtile(subtile_data, x, y),
205 disable_alpha ? (u8)255 : alpha);
206 }
207
208 default:
209 LOG_ERROR(HW_GPU, "Unknown texture format: %x", (u32)info.format);
210 DEBUG_ASSERT(false);
211 return {};
212 }
213}
214
215TextureInfo TextureInfo::FromPicaRegister(const TexturingRegs::TextureConfig& config,
216 const TexturingRegs::TextureFormat& format) {
217 TextureInfo info;
218 info.physical_address = config.GetPhysicalAddress();
219 info.width = config.width;
220 info.height = config.height;
221 info.format = format;
222 info.SetDefaultStride();
223 return info;
224}
225
226} // namespace Texture
227} // namespace Pica
diff --git a/src/video_core/texture/texture_decode.h b/src/video_core/texture/texture_decode.h
new file mode 100644
index 000000000..8507cfeb8
--- /dev/null
+++ b/src/video_core/texture/texture_decode.h
@@ -0,0 +1,60 @@
1// Copyright 2017 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "common/vector_math.h"
9#include "video_core/regs_texturing.h"
10
11namespace Pica {
12namespace Texture {
13
14/// Returns the byte size of a 8*8 tile of the specified texture format.
15size_t CalculateTileSize(TexturingRegs::TextureFormat format);
16
17struct TextureInfo {
18 PAddr physical_address;
19 unsigned int width;
20 unsigned int height;
21 ptrdiff_t stride;
22 TexturingRegs::TextureFormat format;
23
24 static TextureInfo FromPicaRegister(const TexturingRegs::TextureConfig& config,
25 const TexturingRegs::TextureFormat& format);
26
27 /// Calculates stride from format and width, assuming that the entire texture is contiguous.
28 void SetDefaultStride() {
29 stride = CalculateTileSize(format) * (width / 8);
30 }
31};
32
33/**
34 * Lookup texel located at the given coordinates and return an RGBA vector of its color.
35 * @param source Source pointer to read data from
36 * @param x,y Texture coordinates to read from
37 * @param info TextureInfo object describing the texture setup
38 * @param disable_alpha This is used for debug widgets which use this method to display textures
39 * without providing a good way to visualize alpha by themselves. If true, this will return 255 for
40 * the alpha component, and either drop the information entirely or store it in an "unused" color
41 * channel.
42 * @todo Eventually we should get rid of the disable_alpha parameter.
43 */
44Math::Vec4<u8> LookupTexture(const u8* source, unsigned int x, unsigned int y,
45 const TextureInfo& info, bool disable_alpha = false);
46
47/**
48 * Looks up a texel from a single 8x8 texture tile.
49 *
50 * @param source Pointer to the beginning of the tile.
51 * @param x, y In-tile coordinates to read from. Must be < 8.
52 * @param info TextureInfo describing the texture format.
53 * @param disable_alpha Used for debugging. Sets the result alpha to 255 and either discards the
54 * real alpha or inserts it in an otherwise unused channel.
55 */
56Math::Vec4<u8> LookupTexelInTile(const u8* source, unsigned int x, unsigned int y,
57 const TextureInfo& info, bool disable_alpha);
58
59} // namespace Texture
60} // namespace Pica
diff --git a/src/video_core/vertex_loader.cpp b/src/video_core/vertex_loader.cpp
index 2b8ef7018..37c5224a9 100644
--- a/src/video_core/vertex_loader.cpp
+++ b/src/video_core/vertex_loader.cpp
@@ -8,15 +8,15 @@
8#include "common/vector_math.h" 8#include "common/vector_math.h"
9#include "core/memory.h" 9#include "core/memory.h"
10#include "video_core/debug_utils/debug_utils.h" 10#include "video_core/debug_utils/debug_utils.h"
11#include "video_core/pica.h"
12#include "video_core/pica_state.h" 11#include "video_core/pica_state.h"
13#include "video_core/pica_types.h" 12#include "video_core/pica_types.h"
13#include "video_core/regs_pipeline.h"
14#include "video_core/shader/shader.h" 14#include "video_core/shader/shader.h"
15#include "video_core/vertex_loader.h" 15#include "video_core/vertex_loader.h"
16 16
17namespace Pica { 17namespace Pica {
18 18
19void VertexLoader::Setup(const Pica::Regs& regs) { 19void VertexLoader::Setup(const PipelineRegs& regs) {
20 ASSERT_MSG(!is_setup, "VertexLoader is not intended to be setup more than once."); 20 ASSERT_MSG(!is_setup, "VertexLoader is not intended to be setup more than once.");
21 21
22 const auto& attribute_config = regs.vertex_attributes; 22 const auto& attribute_config = regs.vertex_attributes;
@@ -70,7 +70,8 @@ void VertexLoader::Setup(const Pica::Regs& regs) {
70 is_setup = true; 70 is_setup = true;
71} 71}
72 72
73void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, 73void VertexLoader::LoadVertex(u32 base_address, int index, int vertex,
74 Shader::AttributeBuffer& input,
74 DebugUtils::MemoryAccessTracker& memory_accesses) { 75 DebugUtils::MemoryAccessTracker& memory_accesses) {
75 ASSERT_MSG(is_setup, "A VertexLoader needs to be setup before loading vertices."); 76 ASSERT_MSG(is_setup, "A VertexLoader needs to be setup before loading vertices.");
76 77
@@ -84,15 +85,16 @@ void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::I
84 memory_accesses.AddAccess( 85 memory_accesses.AddAccess(
85 source_addr, 86 source_addr,
86 vertex_attribute_elements[i] * 87 vertex_attribute_elements[i] *
87 ((vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) 88 ((vertex_attribute_formats[i] == PipelineRegs::VertexAttributeFormat::FLOAT)
88 ? 4 89 ? 4
89 : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) 90 : (vertex_attribute_formats[i] ==
91 PipelineRegs::VertexAttributeFormat::SHORT)
90 ? 2 92 ? 2
91 : 1)); 93 : 1));
92 } 94 }
93 95
94 switch (vertex_attribute_formats[i]) { 96 switch (vertex_attribute_formats[i]) {
95 case Regs::VertexAttributeFormat::BYTE: { 97 case PipelineRegs::VertexAttributeFormat::BYTE: {
96 const s8* srcdata = 98 const s8* srcdata =
97 reinterpret_cast<const s8*>(Memory::GetPhysicalPointer(source_addr)); 99 reinterpret_cast<const s8*>(Memory::GetPhysicalPointer(source_addr));
98 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { 100 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
@@ -100,7 +102,7 @@ void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::I
100 } 102 }
101 break; 103 break;
102 } 104 }
103 case Regs::VertexAttributeFormat::UBYTE: { 105 case PipelineRegs::VertexAttributeFormat::UBYTE: {
104 const u8* srcdata = 106 const u8* srcdata =
105 reinterpret_cast<const u8*>(Memory::GetPhysicalPointer(source_addr)); 107 reinterpret_cast<const u8*>(Memory::GetPhysicalPointer(source_addr));
106 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { 108 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
@@ -108,7 +110,7 @@ void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::I
108 } 110 }
109 break; 111 break;
110 } 112 }
111 case Regs::VertexAttributeFormat::SHORT: { 113 case PipelineRegs::VertexAttributeFormat::SHORT: {
112 const s16* srcdata = 114 const s16* srcdata =
113 reinterpret_cast<const s16*>(Memory::GetPhysicalPointer(source_addr)); 115 reinterpret_cast<const s16*>(Memory::GetPhysicalPointer(source_addr));
114 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { 116 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
@@ -116,7 +118,7 @@ void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::I
116 } 118 }
117 break; 119 break;
118 } 120 }
119 case Regs::VertexAttributeFormat::FLOAT: { 121 case PipelineRegs::VertexAttributeFormat::FLOAT: {
120 const float* srcdata = 122 const float* srcdata =
121 reinterpret_cast<const float*>(Memory::GetPhysicalPointer(source_addr)); 123 reinterpret_cast<const float*>(Memory::GetPhysicalPointer(source_addr));
122 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { 124 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
@@ -142,7 +144,7 @@ void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::I
142 input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); 144 input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
143 } else if (vertex_attribute_is_default[i]) { 145 } else if (vertex_attribute_is_default[i]) {
144 // Load the default attribute if we're configured to do so 146 // Load the default attribute if we're configured to do so
145 input.attr[i] = g_state.vs_default_attributes[i]; 147 input.attr[i] = g_state.input_default_attributes.attr[i];
146 LOG_TRACE(HW_GPU, 148 LOG_TRACE(HW_GPU,
147 "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", i, 149 "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", i,
148 vertex, index, input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), 150 vertex, index, input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(),
diff --git a/src/video_core/vertex_loader.h b/src/video_core/vertex_loader.h
index 9f2098bb2..02db10aee 100644
--- a/src/video_core/vertex_loader.h
+++ b/src/video_core/vertex_loader.h
@@ -2,7 +2,7 @@
2 2
3#include <array> 3#include <array>
4#include "common/common_types.h" 4#include "common/common_types.h"
5#include "video_core/pica.h" 5#include "video_core/regs_pipeline.h"
6 6
7namespace Pica { 7namespace Pica {
8 8
@@ -11,18 +11,18 @@ class MemoryAccessTracker;
11} 11}
12 12
13namespace Shader { 13namespace Shader {
14struct InputVertex; 14struct AttributeBuffer;
15} 15}
16 16
17class VertexLoader { 17class VertexLoader {
18public: 18public:
19 VertexLoader() = default; 19 VertexLoader() = default;
20 explicit VertexLoader(const Pica::Regs& regs) { 20 explicit VertexLoader(const PipelineRegs& regs) {
21 Setup(regs); 21 Setup(regs);
22 } 22 }
23 23
24 void Setup(const Pica::Regs& regs); 24 void Setup(const PipelineRegs& regs);
25 void LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, 25 void LoadVertex(u32 base_address, int index, int vertex, Shader::AttributeBuffer& input,
26 DebugUtils::MemoryAccessTracker& memory_accesses); 26 DebugUtils::MemoryAccessTracker& memory_accesses);
27 27
28 int GetNumTotalAttributes() const { 28 int GetNumTotalAttributes() const {
@@ -32,7 +32,7 @@ public:
32private: 32private:
33 std::array<u32, 16> vertex_attribute_sources; 33 std::array<u32, 16> vertex_attribute_sources;
34 std::array<u32, 16> vertex_attribute_strides{}; 34 std::array<u32, 16> vertex_attribute_strides{};
35 std::array<Regs::VertexAttributeFormat, 16> vertex_attribute_formats; 35 std::array<PipelineRegs::VertexAttributeFormat, 16> vertex_attribute_formats;
36 std::array<u32, 16> vertex_attribute_elements{}; 36 std::array<u32, 16> vertex_attribute_elements{};
37 std::array<bool, 16> vertex_attribute_is_default; 37 std::array<bool, 16> vertex_attribute_is_default;
38 int num_total_attributes = 0; 38 int num_total_attributes = 0;