summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/CMakeLists.txt1
-rw-r--r--src/audio_core/CMakeLists.txt27
-rw-r--r--src/audio_core/audio_core.cpp46
-rw-r--r--src/audio_core/audio_core.h7
-rw-r--r--src/audio_core/hle/common.h11
-rw-r--r--src/audio_core/hle/dsp.cpp128
-rw-r--r--src/audio_core/hle/dsp.h40
-rw-r--r--src/audio_core/hle/filter.h1
-rw-r--r--src/audio_core/hle/mixers.cpp201
-rw-r--r--src/audio_core/hle/mixers.h63
-rw-r--r--src/audio_core/hle/pipe.cpp41
-rw-r--r--src/audio_core/hle/pipe.h16
-rw-r--r--src/audio_core/hle/source.cpp320
-rw-r--r--src/audio_core/hle/source.h144
-rw-r--r--src/audio_core/interpolate.cpp85
-rw-r--r--src/audio_core/interpolate.h41
-rw-r--r--src/audio_core/null_sink.h29
-rw-r--r--src/audio_core/sdl2_sink.cpp126
-rw-r--r--src/audio_core/sdl2_sink.h30
-rw-r--r--src/audio_core/sink.h2
-rw-r--r--src/audio_core/sink_details.cpp25
-rw-r--r--src/audio_core/sink_details.h27
-rw-r--r--src/audio_core/time_stretch.cpp144
-rw-r--r--src/audio_core/time_stretch.h57
-rw-r--r--src/citra/CMakeLists.txt2
-rw-r--r--src/citra/citra.cpp38
-rw-r--r--src/citra/config.cpp6
-rw-r--r--src/citra/default_ini.h9
-rw-r--r--src/citra/emu_window/emu_window_sdl2.cpp7
-rw-r--r--src/citra_qt/CMakeLists.txt21
-rw-r--r--src/citra_qt/bootmanager.cpp2
-rw-r--r--src/citra_qt/config.cpp91
-rw-r--r--src/citra_qt/configure.ui108
-rw-r--r--src/citra_qt/configure_audio.cpp44
-rw-r--r--src/citra_qt/configure_audio.h27
-rw-r--r--src/citra_qt/configure_audio.ui48
-rw-r--r--src/citra_qt/configure_debug.cpp31
-rw-r--r--src/citra_qt/configure_debug.h29
-rw-r--r--src/citra_qt/configure_debug.ui102
-rw-r--r--src/citra_qt/configure_dialog.cpp30
-rw-r--r--src/citra_qt/configure_dialog.h29
-rw-r--r--src/citra_qt/configure_general.cpp39
-rw-r--r--src/citra_qt/configure_general.h29
-rw-r--r--src/citra_qt/configure_general.ui173
-rw-r--r--src/citra_qt/debugger/graphics_breakpoints.cpp6
-rw-r--r--src/citra_qt/debugger/graphics_framebuffer.cpp6
-rw-r--r--src/citra_qt/debugger/graphics_tracing.cpp6
-rw-r--r--src/citra_qt/debugger/graphics_vertex_shader.cpp8
-rw-r--r--src/citra_qt/debugger/profiler.cpp55
-rw-r--r--src/citra_qt/debugger/profiler.h3
-rw-r--r--src/citra_qt/game_list.cpp27
-rw-r--r--src/citra_qt/game_list.h6
-rw-r--r--src/citra_qt/game_list_p.h106
-rw-r--r--src/citra_qt/hotkeys.cpp54
-rw-r--r--src/citra_qt/hotkeys.h8
-rw-r--r--src/citra_qt/hotkeys.ui47
-rw-r--r--src/citra_qt/main.cpp184
-rw-r--r--src/citra_qt/main.h8
-rw-r--r--src/citra_qt/main.ui51
-rw-r--r--src/citra_qt/ui_settings.cpp11
-rw-r--r--src/citra_qt/ui_settings.h47
-rw-r--r--src/citra_qt/util/util.cpp2
-rw-r--r--src/common/CMakeLists.txt1
-rw-r--r--src/common/assert.h2
-rw-r--r--src/common/bit_field.h2
-rw-r--r--src/common/bit_set.h3
-rw-r--r--src/common/code_block.h6
-rw-r--r--src/common/common_funcs.h4
-rw-r--r--src/common/file_util.cpp58
-rw-r--r--src/common/file_util.h41
-rw-r--r--src/common/logging/backend.cpp1
-rw-r--r--src/common/logging/log.h3
-rw-r--r--src/common/microprofile.h4
-rw-r--r--src/common/microprofileui.h3
-rw-r--r--src/common/profiler.cpp82
-rw-r--r--src/common/profiler.h152
-rw-r--r--src/common/profiler_reporting.h27
-rw-r--r--src/common/swap.h68
-rw-r--r--src/common/thread.h46
-rw-r--r--src/common/x64/emitter.cpp28
-rw-r--r--src/common/x64/emitter.h4
-rw-r--r--src/core/CMakeLists.txt4
-rw-r--r--src/core/arm/arm_interface.h1
-rw-r--r--src/core/arm/dyncom/arm_dyncom.cpp2
-rw-r--r--src/core/arm/dyncom/arm_dyncom_interpreter.cpp24
-rw-r--r--src/core/core.cpp2
-rw-r--r--src/core/gdbstub/gdbstub.cpp30
-rw-r--r--src/core/hle/applets/applet.h1
-rw-r--r--src/core/hle/applets/mii_selector.cpp29
-rw-r--r--src/core/hle/applets/mii_selector.h50
-rw-r--r--src/core/hle/applets/swkbd.cpp24
-rw-r--r--src/core/hle/applets/swkbd.h7
-rw-r--r--src/core/hle/config_mem.cpp7
-rw-r--r--src/core/hle/function_wrappers.h3
-rw-r--r--src/core/hle/hle.cpp22
-rw-r--r--src/core/hle/hle.h4
-rw-r--r--src/core/hle/kernel/memory.cpp5
-rw-r--r--src/core/hle/kernel/process.cpp2
-rw-r--r--src/core/hle/kernel/process.h9
-rw-r--r--src/core/hle/kernel/shared_memory.cpp177
-rw-r--r--src/core/hle/kernel/shared_memory.h48
-rw-r--r--src/core/hle/kernel/thread.cpp89
-rw-r--r--src/core/hle/kernel/thread.h4
-rw-r--r--src/core/hle/result.h3
-rw-r--r--src/core/hle/service/ac_u.cpp26
-rw-r--r--src/core/hle/service/act_a.cpp26
-rw-r--r--src/core/hle/service/act_a.h23
-rw-r--r--src/core/hle/service/act_u.cpp3
-rw-r--r--src/core/hle/service/am/am.cpp2
-rw-r--r--src/core/hle/service/apt/apt.cpp58
-rw-r--r--src/core/hle/service/apt/apt.h15
-rw-r--r--src/core/hle/service/apt/bcfnt/bcfnt.cpp71
-rw-r--r--src/core/hle/service/apt/bcfnt/bcfnt.h87
-rw-r--r--src/core/hle/service/cfg/cfg.cpp4
-rw-r--r--src/core/hle/service/cfg/cfg.h13
-rw-r--r--src/core/hle/service/csnd_snd.cpp13
-rw-r--r--src/core/hle/service/dsp_dsp.cpp199
-rw-r--r--src/core/hle/service/dsp_dsp.h19
-rw-r--r--src/core/hle/service/fs/archive.cpp1
-rw-r--r--src/core/hle/service/fs/fs_user.cpp2
-rw-r--r--src/core/hle/service/gsp_gpu.cpp75
-rw-r--r--src/core/hle/service/gsp_gpu.h1
-rw-r--r--src/core/hle/service/hid/hid.cpp5
-rw-r--r--src/core/hle/service/ir/ir.cpp5
-rw-r--r--src/core/hle/service/ndm/ndm.cpp197
-rw-r--r--src/core/hle/service/ndm/ndm.h216
-rw-r--r--src/core/hle/service/ndm/ndm_u.cpp34
-rw-r--r--src/core/hle/service/service.cpp2
-rw-r--r--src/core/hle/service/soc_u.cpp100
-rw-r--r--src/core/hle/service/y2r_u.cpp490
-rw-r--r--src/core/hle/service/y2r_u.h20
-rw-r--r--src/core/hle/shared_page.cpp3
-rw-r--r--src/core/hle/shared_page.h6
-rw-r--r--src/core/hle/svc.cpp70
-rw-r--r--src/core/hw/gpu.cpp327
-rw-r--r--src/core/hw/gpu.h4
-rw-r--r--src/core/hw/lcd.h2
-rw-r--r--src/core/hw/y2r.cpp2
-rw-r--r--src/core/loader/3dsx.cpp39
-rw-r--r--src/core/loader/3dsx.h9
-rw-r--r--src/core/loader/loader.cpp53
-rw-r--r--src/core/loader/loader.h57
-rw-r--r--src/core/loader/ncch.cpp31
-rw-r--r--src/core/loader/ncch.h7
-rw-r--r--src/core/memory.cpp140
-rw-r--r--src/core/memory.h22
-rw-r--r--src/core/settings.cpp19
-rw-r--r--src/core/settings.h9
-rw-r--r--src/core/tracer/recorder.cpp24
-rw-r--r--src/core/tracer/recorder.h1
-rw-r--r--src/tests/CMakeLists.txt16
-rw-r--r--src/tests/tests.cpp9
-rw-r--r--src/video_core/CMakeLists.txt3
-rw-r--r--src/video_core/clipper.cpp17
-rw-r--r--src/video_core/command_processor.cpp189
-rw-r--r--src/video_core/debug_utils/debug_utils.cpp279
-rw-r--r--src/video_core/debug_utils/debug_utils.h91
-rw-r--r--src/video_core/pica.cpp7
-rw-r--r--src/video_core/pica.h58
-rw-r--r--src/video_core/pica_state.h9
-rw-r--r--src/video_core/pica_types.h1
-rw-r--r--src/video_core/primitive_assembly.cpp5
-rw-r--r--src/video_core/rasterizer.cpp175
-rw-r--r--src/video_core/rasterizer_interface.h31
-rw-r--r--src/video_core/renderer_base.cpp5
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp962
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h378
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp712
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h221
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp188
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.h1
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp75
-rw-r--r--src/video_core/renderer_opengl/gl_state.h28
-rw-r--r--src/video_core/renderer_opengl/pica_to_gl.h27
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp149
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h47
-rw-r--r--src/video_core/shader/shader.cpp116
-rw-r--r--src/video_core/shader/shader.h137
-rw-r--r--src/video_core/shader/shader_interpreter.cpp86
-rw-r--r--src/video_core/shader/shader_interpreter.h6
-rw-r--r--src/video_core/shader/shader_jit_x64.cpp344
-rw-r--r--src/video_core/shader/shader_jit_x64.h63
-rw-r--r--src/video_core/swrasterizer.h12
-rw-r--r--src/video_core/utils.cpp36
-rw-r--r--src/video_core/utils.h27
-rw-r--r--src/video_core/vertex_loader.cpp146
-rw-r--r--src/video_core/vertex_loader.h40
-rw-r--r--src/video_core/video_core.cpp5
-rw-r--r--src/video_core/video_core.h1
191 files changed, 8188 insertions, 3245 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index de4fe716a..1e1245160 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -5,6 +5,7 @@ add_subdirectory(common)
5add_subdirectory(core) 5add_subdirectory(core)
6add_subdirectory(video_core) 6add_subdirectory(video_core)
7add_subdirectory(audio_core) 7add_subdirectory(audio_core)
8add_subdirectory(tests)
8if (ENABLE_SDL2) 9if (ENABLE_SDL2)
9 add_subdirectory(citra) 10 add_subdirectory(citra)
10endif() 11endif()
diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt
index 869da5e83..a72a907ef 100644
--- a/src/audio_core/CMakeLists.txt
+++ b/src/audio_core/CMakeLists.txt
@@ -3,7 +3,12 @@ set(SRCS
3 codec.cpp 3 codec.cpp
4 hle/dsp.cpp 4 hle/dsp.cpp
5 hle/filter.cpp 5 hle/filter.cpp
6 hle/mixers.cpp
6 hle/pipe.cpp 7 hle/pipe.cpp
8 hle/source.cpp
9 interpolate.cpp
10 sink_details.cpp
11 time_stretch.cpp
7 ) 12 )
8 13
9set(HEADERS 14set(HEADERS
@@ -12,10 +17,30 @@ set(HEADERS
12 hle/common.h 17 hle/common.h
13 hle/dsp.h 18 hle/dsp.h
14 hle/filter.h 19 hle/filter.h
20 hle/mixers.h
15 hle/pipe.h 21 hle/pipe.h
22 hle/source.h
23 interpolate.h
24 null_sink.h
16 sink.h 25 sink.h
26 sink_details.h
27 time_stretch.h
17 ) 28 )
18 29
30include_directories(../../externals/soundtouch/include)
31
32if(SDL2_FOUND)
33 set(SRCS ${SRCS} sdl2_sink.cpp)
34 set(HEADERS ${HEADERS} sdl2_sink.h)
35 include_directories(${SDL2_INCLUDE_DIR})
36endif()
37
19create_directory_groups(${SRCS} ${HEADERS}) 38create_directory_groups(${SRCS} ${HEADERS})
20 39
21add_library(audio_core STATIC ${SRCS} ${HEADERS}) \ No newline at end of file 40add_library(audio_core STATIC ${SRCS} ${HEADERS})
41target_link_libraries(audio_core SoundTouch)
42
43if(SDL2_FOUND)
44 target_link_libraries(audio_core ${SDL2_LIBRARY})
45 set_property(TARGET audio_core APPEND PROPERTY COMPILE_DEFINITIONS HAVE_SDL2)
46endif()
diff --git a/src/audio_core/audio_core.cpp b/src/audio_core/audio_core.cpp
index 894f46990..d42249ebd 100644
--- a/src/audio_core/audio_core.cpp
+++ b/src/audio_core/audio_core.cpp
@@ -2,8 +2,15 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <memory>
6#include <string>
7
5#include "audio_core/audio_core.h" 8#include "audio_core/audio_core.h"
6#include "audio_core/hle/dsp.h" 9#include "audio_core/hle/dsp.h"
10#include "audio_core/hle/pipe.h"
11#include "audio_core/null_sink.h"
12#include "audio_core/sink.h"
13#include "audio_core/sink_details.h"
7 14
8#include "core/core_timing.h" 15#include "core/core_timing.h"
9#include "core/hle/kernel/vm_manager.h" 16#include "core/hle/kernel/vm_manager.h"
@@ -17,17 +24,16 @@ static constexpr u64 audio_frame_ticks = 1310252ull; ///< Units: ARM11 cycles
17 24
18static void AudioTickCallback(u64 /*userdata*/, int cycles_late) { 25static void AudioTickCallback(u64 /*userdata*/, int cycles_late) {
19 if (DSP::HLE::Tick()) { 26 if (DSP::HLE::Tick()) {
20 // HACK: We're not signaling the interrups when they should be, but just firing them all off together. 27 // TODO(merry): Signal all the other interrupts as appropriate.
21 // It should be only (interrupt_id = 2, channel_id = 2) that's signalled here. 28 DSP_DSP::SignalPipeInterrupt(DSP::HLE::DspPipe::Audio);
22 // TODO(merry): Understand when the other interrupts are fired. 29 // HACK(merry): Added to prevent regressions. Will remove soon.
23 DSP_DSP::SignalAllInterrupts(); 30 DSP_DSP::SignalPipeInterrupt(DSP::HLE::DspPipe::Binary);
24 } 31 }
25 32
26 // Reschedule recurrent event 33 // Reschedule recurrent event
27 CoreTiming::ScheduleEvent(audio_frame_ticks - cycles_late, tick_event); 34 CoreTiming::ScheduleEvent(audio_frame_ticks - cycles_late, tick_event);
28} 35}
29 36
30/// Initialise Audio
31void Init() { 37void Init() {
32 DSP::HLE::Init(); 38 DSP::HLE::Init();
33 39
@@ -35,19 +41,39 @@ void Init() {
35 CoreTiming::ScheduleEvent(audio_frame_ticks, tick_event); 41 CoreTiming::ScheduleEvent(audio_frame_ticks, tick_event);
36} 42}
37 43
38/// Add DSP address spaces to Process's address space.
39void AddAddressSpace(Kernel::VMManager& address_space) { 44void AddAddressSpace(Kernel::VMManager& address_space) {
40 auto r0_vma = address_space.MapBackingMemory(DSP::HLE::region0_base, reinterpret_cast<u8*>(&DSP::HLE::g_region0), sizeof(DSP::HLE::SharedMemory), Kernel::MemoryState::IO).MoveFrom(); 45 auto r0_vma = address_space.MapBackingMemory(DSP::HLE::region0_base, reinterpret_cast<u8*>(&DSP::HLE::g_regions[0]), sizeof(DSP::HLE::SharedMemory), Kernel::MemoryState::IO).MoveFrom();
41 address_space.Reprotect(r0_vma, Kernel::VMAPermission::ReadWrite); 46 address_space.Reprotect(r0_vma, Kernel::VMAPermission::ReadWrite);
42 47
43 auto r1_vma = address_space.MapBackingMemory(DSP::HLE::region1_base, reinterpret_cast<u8*>(&DSP::HLE::g_region1), sizeof(DSP::HLE::SharedMemory), Kernel::MemoryState::IO).MoveFrom(); 48 auto r1_vma = address_space.MapBackingMemory(DSP::HLE::region1_base, reinterpret_cast<u8*>(&DSP::HLE::g_regions[1]), sizeof(DSP::HLE::SharedMemory), Kernel::MemoryState::IO).MoveFrom();
44 address_space.Reprotect(r1_vma, Kernel::VMAPermission::ReadWrite); 49 address_space.Reprotect(r1_vma, Kernel::VMAPermission::ReadWrite);
45} 50}
46 51
47/// Shutdown Audio 52void SelectSink(std::string sink_id) {
53 if (sink_id == "auto") {
54 // Auto-select.
55 // g_sink_details is ordered in terms of desirability, with the best choice at the front.
56 const auto& sink_detail = g_sink_details.front();
57 DSP::HLE::SetSink(sink_detail.factory());
58 return;
59 }
60
61 auto iter = std::find_if(g_sink_details.begin(), g_sink_details.end(), [sink_id](const auto& sink_detail) {
62 return sink_detail.id == sink_id;
63 });
64
65 if (iter == g_sink_details.end()) {
66 LOG_ERROR(Audio, "AudioCore::SelectSink given invalid sink_id");
67 DSP::HLE::SetSink(std::make_unique<NullSink>());
68 return;
69 }
70
71 DSP::HLE::SetSink(iter->factory());
72}
73
48void Shutdown() { 74void Shutdown() {
49 CoreTiming::UnscheduleEvent(tick_event, 0); 75 CoreTiming::UnscheduleEvent(tick_event, 0);
50 DSP::HLE::Shutdown(); 76 DSP::HLE::Shutdown();
51} 77}
52 78
53} //namespace 79} // namespace AudioCore
diff --git a/src/audio_core/audio_core.h b/src/audio_core/audio_core.h
index 64c330914..f618361f3 100644
--- a/src/audio_core/audio_core.h
+++ b/src/audio_core/audio_core.h
@@ -4,14 +4,14 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <string>
8
7namespace Kernel { 9namespace Kernel {
8class VMManager; 10class VMManager;
9} 11}
10 12
11namespace AudioCore { 13namespace AudioCore {
12 14
13constexpr int num_sources = 24;
14constexpr int samples_per_frame = 160; ///< Samples per audio frame at native sample rate
15constexpr int native_sample_rate = 32728; ///< 32kHz 15constexpr int native_sample_rate = 32728; ///< 32kHz
16 16
17/// Initialise Audio Core 17/// Initialise Audio Core
@@ -20,6 +20,9 @@ void Init();
20/// Add DSP address spaces to a Process. 20/// Add DSP address spaces to a Process.
21void AddAddressSpace(Kernel::VMManager& vm_manager); 21void AddAddressSpace(Kernel::VMManager& vm_manager);
22 22
23/// Select the sink to use based on sink id.
24void SelectSink(std::string sink_id);
25
23/// Shutdown Audio Core 26/// Shutdown Audio Core
24void Shutdown(); 27void Shutdown();
25 28
diff --git a/src/audio_core/hle/common.h b/src/audio_core/hle/common.h
index 37d441eb2..596b67eaf 100644
--- a/src/audio_core/hle/common.h
+++ b/src/audio_core/hle/common.h
@@ -7,18 +7,19 @@
7#include <algorithm> 7#include <algorithm>
8#include <array> 8#include <array>
9 9
10#include "audio_core/audio_core.h"
11
12#include "common/common_types.h" 10#include "common/common_types.h"
13 11
14namespace DSP { 12namespace DSP {
15namespace HLE { 13namespace HLE {
16 14
15constexpr int num_sources = 24;
16constexpr int samples_per_frame = 160; ///< Samples per audio frame at native sample rate
17
17/// The final output to the speakers is stereo. Preprocessing output in Source is also stereo. 18/// The final output to the speakers is stereo. Preprocessing output in Source is also stereo.
18using StereoFrame16 = std::array<std::array<s16, 2>, AudioCore::samples_per_frame>; 19using StereoFrame16 = std::array<std::array<s16, 2>, samples_per_frame>;
19 20
20/// The DSP is quadraphonic internally. 21/// The DSP is quadraphonic internally.
21using QuadFrame32 = std::array<std::array<s32, 4>, AudioCore::samples_per_frame>; 22using QuadFrame32 = std::array<std::array<s32, 4>, samples_per_frame>;
22 23
23/** 24/**
24 * This performs the filter operation defined by FilterT::ProcessSample on the frame in-place. 25 * This performs the filter operation defined by FilterT::ProcessSample on the frame in-place.
@@ -26,7 +27,7 @@ using QuadFrame32 = std::array<std::array<s32, 4>, AudioCore::samples_per_fram
26 */ 27 */
27template<typename FrameT, typename FilterT> 28template<typename FrameT, typename FilterT>
28void FilterFrame(FrameT& frame, FilterT& filter) { 29void FilterFrame(FrameT& frame, FilterT& filter) {
29 std::transform(frame.begin(), frame.end(), frame.begin(), [&filter](const typename FrameT::value_type& sample) { 30 std::transform(frame.begin(), frame.end(), frame.begin(), [&filter](const auto& sample) {
30 return filter.ProcessSample(sample); 31 return filter.ProcessSample(sample);
31 }); 32 });
32} 33}
diff --git a/src/audio_core/hle/dsp.cpp b/src/audio_core/hle/dsp.cpp
index c89356edc..0640e1eff 100644
--- a/src/audio_core/hle/dsp.cpp
+++ b/src/audio_core/hle/dsp.cpp
@@ -2,40 +2,138 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <array>
6#include <memory>
7
5#include "audio_core/hle/dsp.h" 8#include "audio_core/hle/dsp.h"
9#include "audio_core/hle/mixers.h"
6#include "audio_core/hle/pipe.h" 10#include "audio_core/hle/pipe.h"
11#include "audio_core/hle/source.h"
12#include "audio_core/sink.h"
13#include "audio_core/time_stretch.h"
7 14
8namespace DSP { 15namespace DSP {
9namespace HLE { 16namespace HLE {
10 17
11SharedMemory g_region0; 18// Region management
12SharedMemory g_region1; 19
20std::array<SharedMemory, 2> g_regions;
21
22static size_t CurrentRegionIndex() {
23 // The region with the higher frame counter is chosen unless there is wraparound.
24 // This function only returns a 0 or 1.
25
26 if (g_regions[0].frame_counter == 0xFFFFu && g_regions[1].frame_counter != 0xFFFEu) {
27 // Wraparound has occured.
28 return 1;
29 }
30
31 if (g_regions[1].frame_counter == 0xFFFFu && g_regions[0].frame_counter != 0xFFFEu) {
32 // Wraparound has occured.
33 return 0;
34 }
35
36 return (g_regions[0].frame_counter > g_regions[1].frame_counter) ? 0 : 1;
37}
38
39static SharedMemory& ReadRegion() {
40 return g_regions[CurrentRegionIndex()];
41}
42
43static SharedMemory& WriteRegion() {
44 return g_regions[1 - CurrentRegionIndex()];
45}
46
47// Audio processing and mixing
48
49static std::array<Source, num_sources> sources = {
50 Source(0), Source(1), Source(2), Source(3), Source(4), Source(5),
51 Source(6), Source(7), Source(8), Source(9), Source(10), Source(11),
52 Source(12), Source(13), Source(14), Source(15), Source(16), Source(17),
53 Source(18), Source(19), Source(20), Source(21), Source(22), Source(23)
54};
55static Mixers mixers;
56
57static StereoFrame16 GenerateCurrentFrame() {
58 SharedMemory& read = ReadRegion();
59 SharedMemory& write = WriteRegion();
60
61 std::array<QuadFrame32, 3> intermediate_mixes = {};
62
63 // Generate intermediate mixes
64 for (size_t i = 0; i < num_sources; i++) {
65 write.source_statuses.status[i] = sources[i].Tick(read.source_configurations.config[i], read.adpcm_coefficients.coeff[i]);
66 for (size_t mix = 0; mix < 3; mix++) {
67 sources[i].MixInto(intermediate_mixes[mix], mix);
68 }
69 }
70
71 // Generate final mix
72 write.dsp_status = mixers.Tick(read.dsp_configuration, read.intermediate_mix_samples, write.intermediate_mix_samples, intermediate_mixes);
73
74 StereoFrame16 output_frame = mixers.GetOutput();
75
76 // Write current output frame to the shared memory region
77 for (size_t samplei = 0; samplei < output_frame.size(); samplei++) {
78 for (size_t channeli = 0; channeli < output_frame[0].size(); channeli++) {
79 write.final_samples.pcm16[samplei][channeli] = s16_le(output_frame[samplei][channeli]);
80 }
81 }
82
83 return output_frame;
84}
85
86// Audio output
87
88static std::unique_ptr<AudioCore::Sink> sink;
89static AudioCore::TimeStretcher time_stretcher;
90
91static void OutputCurrentFrame(const StereoFrame16& frame) {
92 time_stretcher.AddSamples(&frame[0][0], frame.size());
93 sink->EnqueueSamples(time_stretcher.Process(sink->SamplesInQueue()));
94}
95
96// Public Interface
13 97
14void Init() { 98void Init() {
15 DSP::HLE::ResetPipes(); 99 DSP::HLE::ResetPipes();
100
101 for (auto& source : sources) {
102 source.Reset();
103 }
104
105 mixers.Reset();
106
107 time_stretcher.Reset();
108 if (sink) {
109 time_stretcher.SetOutputSampleRate(sink->GetNativeSampleRate());
110 }
16} 111}
17 112
18void Shutdown() { 113void Shutdown() {
114 time_stretcher.Flush();
115 while (true) {
116 std::vector<s16> residual_audio = time_stretcher.Process(sink->SamplesInQueue());
117 if (residual_audio.empty())
118 break;
119 sink->EnqueueSamples(residual_audio);
120 }
19} 121}
20 122
21bool Tick() { 123bool Tick() {
22 return true; 124 StereoFrame16 current_frame = {};
23}
24 125
25SharedMemory& CurrentRegion() { 126 // TODO: Check dsp::DSP semaphore (which indicates emulated application has finished writing to shared memory region)
26 // The region with the higher frame counter is chosen unless there is wraparound. 127 current_frame = GenerateCurrentFrame();
27 128
28 if (g_region0.frame_counter == 0xFFFFu && g_region1.frame_counter != 0xFFFEu) { 129 OutputCurrentFrame(current_frame);
29 // Wraparound has occured.
30 return g_region1;
31 }
32 130
33 if (g_region1.frame_counter == 0xFFFFu && g_region0.frame_counter != 0xFFFEu) { 131 return true;
34 // Wraparound has occured. 132}
35 return g_region0;
36 }
37 133
38 return (g_region0.frame_counter > g_region1.frame_counter) ? g_region0 : g_region1; 134void SetSink(std::unique_ptr<AudioCore::Sink> sink_) {
135 sink = std::move(sink_);
136 time_stretcher.SetOutputSampleRate(sink->GetNativeSampleRate());
39} 137}
40 138
41} // namespace HLE 139} // namespace HLE
diff --git a/src/audio_core/hle/dsp.h b/src/audio_core/hle/dsp.h
index c15ef0b7a..9275cd7de 100644
--- a/src/audio_core/hle/dsp.h
+++ b/src/audio_core/hle/dsp.h
@@ -4,16 +4,22 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
7#include <cstddef> 8#include <cstddef>
9#include <memory>
8#include <type_traits> 10#include <type_traits>
9 11
10#include "audio_core/audio_core.h" 12#include "audio_core/hle/common.h"
11 13
12#include "common/bit_field.h" 14#include "common/bit_field.h"
13#include "common/common_funcs.h" 15#include "common/common_funcs.h"
14#include "common/common_types.h" 16#include "common/common_types.h"
15#include "common/swap.h" 17#include "common/swap.h"
16 18
19namespace AudioCore {
20class Sink;
21}
22
17namespace DSP { 23namespace DSP {
18namespace HLE { 24namespace HLE {
19 25
@@ -27,13 +33,8 @@ namespace HLE {
27// double-buffer. The frame counter is located as the very last u16 of each region and is incremented 33// double-buffer. The frame counter is located as the very last u16 of each region and is incremented
28// each audio tick. 34// each audio tick.
29 35
30struct SharedMemory;
31
32constexpr VAddr region0_base = 0x1FF50000; 36constexpr VAddr region0_base = 0x1FF50000;
33extern SharedMemory g_region0;
34
35constexpr VAddr region1_base = 0x1FF70000; 37constexpr VAddr region1_base = 0x1FF70000;
36extern SharedMemory g_region1;
37 38
38/** 39/**
39 * The DSP is native 16-bit. The DSP also appears to be big-endian. When reading 32-bit numbers from 40 * The DSP is native 16-bit. The DSP also appears to be big-endian. When reading 32-bit numbers from
@@ -164,9 +165,9 @@ struct SourceConfiguration {
164 float_le rate_multiplier; 165 float_le rate_multiplier;
165 166
166 enum class InterpolationMode : u8 { 167 enum class InterpolationMode : u8 {
167 None = 0, 168 Polyphase = 0,
168 Linear = 1, 169 Linear = 1,
169 Polyphase = 2 170 None = 2
170 }; 171 };
171 172
172 InterpolationMode interpolation_mode; 173 InterpolationMode interpolation_mode;
@@ -305,7 +306,7 @@ struct SourceConfiguration {
305 u16_le buffer_id; 306 u16_le buffer_id;
306 }; 307 };
307 308
308 Configuration config[AudioCore::num_sources]; 309 Configuration config[num_sources];
309}; 310};
310ASSERT_DSP_STRUCT(SourceConfiguration::Configuration, 192); 311ASSERT_DSP_STRUCT(SourceConfiguration::Configuration, 192);
311ASSERT_DSP_STRUCT(SourceConfiguration::Configuration::Buffer, 20); 312ASSERT_DSP_STRUCT(SourceConfiguration::Configuration::Buffer, 20);
@@ -313,14 +314,14 @@ ASSERT_DSP_STRUCT(SourceConfiguration::Configuration::Buffer, 20);
313struct SourceStatus { 314struct SourceStatus {
314 struct Status { 315 struct Status {
315 u8 is_enabled; ///< Is this channel enabled? (Doesn't have to be playing anything.) 316 u8 is_enabled; ///< Is this channel enabled? (Doesn't have to be playing anything.)
316 u8 previous_buffer_id_dirty; ///< Non-zero when previous_buffer_id changes 317 u8 current_buffer_id_dirty; ///< Non-zero when current_buffer_id changes
317 u16_le sync; ///< Is set by the DSP to the value of SourceConfiguration::sync 318 u16_le sync; ///< Is set by the DSP to the value of SourceConfiguration::sync
318 u32_dsp buffer_position; ///< Number of samples into the current buffer 319 u32_dsp buffer_position; ///< Number of samples into the current buffer
319 u16_le previous_buffer_id; ///< Updated when a buffer finishes playing 320 u16_le current_buffer_id; ///< Updated when a buffer finishes playing
320 INSERT_PADDING_DSPWORDS(1); 321 INSERT_PADDING_DSPWORDS(1);
321 }; 322 };
322 323
323 Status status[AudioCore::num_sources]; 324 Status status[num_sources];
324}; 325};
325ASSERT_DSP_STRUCT(SourceStatus::Status, 12); 326ASSERT_DSP_STRUCT(SourceStatus::Status, 12);
326 327
@@ -413,7 +414,7 @@ ASSERT_DSP_STRUCT(DspConfiguration::ReverbEffect, 52);
413struct AdpcmCoefficients { 414struct AdpcmCoefficients {
414 /// Coefficients are signed fixed point with 11 fractional bits. 415 /// Coefficients are signed fixed point with 11 fractional bits.
415 /// Each source has 16 coefficients associated with it. 416 /// Each source has 16 coefficients associated with it.
416 s16_le coeff[AudioCore::num_sources][16]; 417 s16_le coeff[num_sources][16];
417}; 418};
418ASSERT_DSP_STRUCT(AdpcmCoefficients, 768); 419ASSERT_DSP_STRUCT(AdpcmCoefficients, 768);
419 420
@@ -427,7 +428,7 @@ ASSERT_DSP_STRUCT(DspStatus, 32);
427/// Final mixed output in PCM16 stereo format, what you hear out of the speakers. 428/// Final mixed output in PCM16 stereo format, what you hear out of the speakers.
428/// When the application writes to this region it has no effect. 429/// When the application writes to this region it has no effect.
429struct FinalMixSamples { 430struct FinalMixSamples {
430 s16_le pcm16[2 * AudioCore::samples_per_frame]; 431 s16_le pcm16[samples_per_frame][2];
431}; 432};
432ASSERT_DSP_STRUCT(FinalMixSamples, 640); 433ASSERT_DSP_STRUCT(FinalMixSamples, 640);
433 434
@@ -437,7 +438,7 @@ ASSERT_DSP_STRUCT(FinalMixSamples, 640);
437/// Values that exceed s16 range will be clipped by the DSP after further processing. 438/// Values that exceed s16 range will be clipped by the DSP after further processing.
438struct IntermediateMixSamples { 439struct IntermediateMixSamples {
439 struct Samples { 440 struct Samples {
440 s32_le pcm32[4][AudioCore::samples_per_frame]; ///< Little-endian as opposed to DSP middle-endian. 441 s32_le pcm32[4][samples_per_frame]; ///< Little-endian as opposed to DSP middle-endian.
441 }; 442 };
442 443
443 Samples mix1; 444 Samples mix1;
@@ -502,6 +503,8 @@ struct SharedMemory {
502}; 503};
503ASSERT_DSP_STRUCT(SharedMemory, 0x8000); 504ASSERT_DSP_STRUCT(SharedMemory, 0x8000);
504 505
506extern std::array<SharedMemory, 2> g_regions;
507
505// Structures must have an offset that is a multiple of two. 508// Structures must have an offset that is a multiple of two.
506static_assert(offsetof(SharedMemory, frame_counter) % 2 == 0, "Structures in DSP::HLE::SharedMemory must be 2-byte aligned"); 509static_assert(offsetof(SharedMemory, frame_counter) % 2 == 0, "Structures in DSP::HLE::SharedMemory must be 2-byte aligned");
507static_assert(offsetof(SharedMemory, source_configurations) % 2 == 0, "Structures in DSP::HLE::SharedMemory must be 2-byte aligned"); 510static_assert(offsetof(SharedMemory, source_configurations) % 2 == 0, "Structures in DSP::HLE::SharedMemory must be 2-byte aligned");
@@ -535,8 +538,11 @@ void Shutdown();
535 */ 538 */
536bool Tick(); 539bool Tick();
537 540
538/// Returns a mutable reference to the current region. Current region is selected based on the frame counter. 541/**
539SharedMemory& CurrentRegion(); 542 * Set the output sink. This must be called before calling Tick().
543 * @param sink The sink to which audio will be output to.
544 */
545void SetSink(std::unique_ptr<AudioCore::Sink> sink);
540 546
541} // namespace HLE 547} // namespace HLE
542} // namespace DSP 548} // namespace DSP
diff --git a/src/audio_core/hle/filter.h b/src/audio_core/hle/filter.h
index 75738f600..43d2035cd 100644
--- a/src/audio_core/hle/filter.h
+++ b/src/audio_core/hle/filter.h
@@ -16,6 +16,7 @@ namespace HLE {
16 16
17/// Preprocessing filters. There is an independent set of filters for each Source. 17/// Preprocessing filters. There is an independent set of filters for each Source.
18class SourceFilters final { 18class SourceFilters final {
19public:
19 SourceFilters() { Reset(); } 20 SourceFilters() { Reset(); }
20 21
21 /// Reset internal state. 22 /// Reset internal state.
diff --git a/src/audio_core/hle/mixers.cpp b/src/audio_core/hle/mixers.cpp
new file mode 100644
index 000000000..18335f7f0
--- /dev/null
+++ b/src/audio_core/hle/mixers.cpp
@@ -0,0 +1,201 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstddef>
6
7#include "audio_core/hle/common.h"
8#include "audio_core/hle/dsp.h"
9#include "audio_core/hle/mixers.h"
10
11#include "common/assert.h"
12#include "common/logging/log.h"
13#include "common/math_util.h"
14
15namespace DSP {
16namespace HLE {
17
18void Mixers::Reset() {
19 current_frame.fill({});
20 state = {};
21}
22
23DspStatus Mixers::Tick(DspConfiguration& config,
24 const IntermediateMixSamples& read_samples,
25 IntermediateMixSamples& write_samples,
26 const std::array<QuadFrame32, 3>& input)
27{
28 ParseConfig(config);
29
30 AuxReturn(read_samples);
31 AuxSend(write_samples, input);
32
33 MixCurrentFrame();
34
35 return GetCurrentStatus();
36}
37
38void Mixers::ParseConfig(DspConfiguration& config) {
39 if (!config.dirty_raw) {
40 return;
41 }
42
43 if (config.mixer1_enabled_dirty) {
44 config.mixer1_enabled_dirty.Assign(0);
45 state.mixer1_enabled = config.mixer1_enabled != 0;
46 LOG_TRACE(Audio_DSP, "mixers mixer1_enabled = %hu", config.mixer1_enabled);
47 }
48
49 if (config.mixer2_enabled_dirty) {
50 config.mixer2_enabled_dirty.Assign(0);
51 state.mixer2_enabled = config.mixer2_enabled != 0;
52 LOG_TRACE(Audio_DSP, "mixers mixer2_enabled = %hu", config.mixer2_enabled);
53 }
54
55 if (config.volume_0_dirty) {
56 config.volume_0_dirty.Assign(0);
57 state.intermediate_mixer_volume[0] = config.volume[0];
58 LOG_TRACE(Audio_DSP, "mixers volume[0] = %f", config.volume[0]);
59 }
60
61 if (config.volume_1_dirty) {
62 config.volume_1_dirty.Assign(0);
63 state.intermediate_mixer_volume[1] = config.volume[1];
64 LOG_TRACE(Audio_DSP, "mixers volume[1] = %f", config.volume[1]);
65 }
66
67 if (config.volume_2_dirty) {
68 config.volume_2_dirty.Assign(0);
69 state.intermediate_mixer_volume[2] = config.volume[2];
70 LOG_TRACE(Audio_DSP, "mixers volume[2] = %f", config.volume[2]);
71 }
72
73 if (config.output_format_dirty) {
74 config.output_format_dirty.Assign(0);
75 state.output_format = config.output_format;
76 LOG_TRACE(Audio_DSP, "mixers output_format = %zu", static_cast<size_t>(config.output_format));
77 }
78
79 if (config.headphones_connected_dirty) {
80 config.headphones_connected_dirty.Assign(0);
81 // Do nothing.
82 // (Note: Whether headphones are connected does affect coefficients used for surround sound.)
83 LOG_TRACE(Audio_DSP, "mixers headphones_connected=%hu", config.headphones_connected);
84 }
85
86 if (config.dirty_raw) {
87 LOG_DEBUG(Audio_DSP, "mixers remaining_dirty=%x", config.dirty_raw);
88 }
89
90 config.dirty_raw = 0;
91}
92
93static s16 ClampToS16(s32 value) {
94 return static_cast<s16>(MathUtil::Clamp(value, -32768, 32767));
95}
96
97static std::array<s16, 2> AddAndClampToS16(const std::array<s16, 2>& a, const std::array<s16, 2>& b) {
98 return {
99 ClampToS16(static_cast<s32>(a[0]) + static_cast<s32>(b[0])),
100 ClampToS16(static_cast<s32>(a[1]) + static_cast<s32>(b[1]))
101 };
102}
103
104void Mixers::DownmixAndMixIntoCurrentFrame(float gain, const QuadFrame32& samples) {
105 // TODO(merry): Limiter. (Currently we're performing final mixing assuming a disabled limiter.)
106
107 switch (state.output_format) {
108 case OutputFormat::Mono:
109 std::transform(current_frame.begin(), current_frame.end(), samples.begin(), current_frame.begin(),
110 [gain](const std::array<s16, 2>& accumulator, const std::array<s32, 4>& sample) -> std::array<s16, 2> {
111 // Downmix to mono
112 s16 mono = ClampToS16(static_cast<s32>((gain * sample[0] + gain * sample[1] + gain * sample[2] + gain * sample[3]) / 2));
113 // Mix into current frame
114 return AddAndClampToS16(accumulator, { mono, mono });
115 });
116 return;
117
118 case OutputFormat::Surround:
119 // TODO(merry): Implement surround sound.
120 // fallthrough
121
122 case OutputFormat::Stereo:
123 std::transform(current_frame.begin(), current_frame.end(), samples.begin(), current_frame.begin(),
124 [gain](const std::array<s16, 2>& accumulator, const std::array<s32, 4>& sample) -> std::array<s16, 2> {
125 // Downmix to stereo
126 s16 left = ClampToS16(static_cast<s32>(gain * sample[0] + gain * sample[2]));
127 s16 right = ClampToS16(static_cast<s32>(gain * sample[1] + gain * sample[3]));
128 // Mix into current frame
129 return AddAndClampToS16(accumulator, { left, right });
130 });
131 return;
132 }
133
134 UNREACHABLE_MSG("Invalid output_format %zu", static_cast<size_t>(state.output_format));
135}
136
137void Mixers::AuxReturn(const IntermediateMixSamples& read_samples) {
138 // NOTE: read_samples.mix{1,2}.pcm32 annoyingly have their dimensions in reverse order to QuadFrame32.
139
140 if (state.mixer1_enabled) {
141 for (size_t sample = 0; sample < samples_per_frame; sample++) {
142 for (size_t channel = 0; channel < 4; channel++) {
143 state.intermediate_mix_buffer[1][sample][channel] = read_samples.mix1.pcm32[channel][sample];
144 }
145 }
146 }
147
148 if (state.mixer2_enabled) {
149 for (size_t sample = 0; sample < samples_per_frame; sample++) {
150 for (size_t channel = 0; channel < 4; channel++) {
151 state.intermediate_mix_buffer[2][sample][channel] = read_samples.mix2.pcm32[channel][sample];
152 }
153 }
154 }
155}
156
157void Mixers::AuxSend(IntermediateMixSamples& write_samples, const std::array<QuadFrame32, 3>& input) {
158 // NOTE: read_samples.mix{1,2}.pcm32 annoyingly have their dimensions in reverse order to QuadFrame32.
159
160 state.intermediate_mix_buffer[0] = input[0];
161
162 if (state.mixer1_enabled) {
163 for (size_t sample = 0; sample < samples_per_frame; sample++) {
164 for (size_t channel = 0; channel < 4; channel++) {
165 write_samples.mix1.pcm32[channel][sample] = input[1][sample][channel];
166 }
167 }
168 } else {
169 state.intermediate_mix_buffer[1] = input[1];
170 }
171
172 if (state.mixer2_enabled) {
173 for (size_t sample = 0; sample < samples_per_frame; sample++) {
174 for (size_t channel = 0; channel < 4; channel++) {
175 write_samples.mix2.pcm32[channel][sample] = input[2][sample][channel];
176 }
177 }
178 } else {
179 state.intermediate_mix_buffer[2] = input[2];
180 }
181}
182
183void Mixers::MixCurrentFrame() {
184 current_frame.fill({});
185
186 for (size_t mix = 0; mix < 3; mix++) {
187 DownmixAndMixIntoCurrentFrame(state.intermediate_mixer_volume[mix], state.intermediate_mix_buffer[mix]);
188 }
189
190 // TODO(merry): Compressor. (We currently assume a disabled compressor.)
191}
192
193DspStatus Mixers::GetCurrentStatus() const {
194 DspStatus status;
195 status.unknown = 0;
196 status.dropped_frames = 0;
197 return status;
198}
199
200} // namespace HLE
201} // namespace DSP
diff --git a/src/audio_core/hle/mixers.h b/src/audio_core/hle/mixers.h
new file mode 100644
index 000000000..b52952eb5
--- /dev/null
+++ b/src/audio_core/hle/mixers.h
@@ -0,0 +1,63 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8
9#include "audio_core/hle/common.h"
10#include "audio_core/hle/dsp.h"
11
12namespace DSP {
13namespace HLE {
14
15class Mixers final {
16public:
17 Mixers() {
18 Reset();
19 }
20
21 void Reset();
22
23 DspStatus Tick(DspConfiguration& config,
24 const IntermediateMixSamples& read_samples,
25 IntermediateMixSamples& write_samples,
26 const std::array<QuadFrame32, 3>& input);
27
28 StereoFrame16 GetOutput() const {
29 return current_frame;
30 }
31
32private:
33 StereoFrame16 current_frame = {};
34
35 using OutputFormat = DspConfiguration::OutputFormat;
36
37 struct {
38 std::array<float, 3> intermediate_mixer_volume = {};
39
40 bool mixer1_enabled = false;
41 bool mixer2_enabled = false;
42 std::array<QuadFrame32, 3> intermediate_mix_buffer = {};
43
44 OutputFormat output_format = OutputFormat::Stereo;
45
46 } state;
47
48 /// INTERNAL: Update our internal state based on the current config.
49 void ParseConfig(DspConfiguration& config);
50 /// INTERNAL: Read samples from shared memory that have been modified by the ARM11.
51 void AuxReturn(const IntermediateMixSamples& read_samples);
52 /// INTERNAL: Write samples to shared memory for the ARM11 to modify.
53 void AuxSend(IntermediateMixSamples& write_samples, const std::array<QuadFrame32, 3>& input);
54 /// INTERNAL: Mix current_frame.
55 void MixCurrentFrame();
56 /// INTERNAL: Downmix from quadraphonic to stereo based on status.output_format and accumulate into current_frame.
57 void DownmixAndMixIntoCurrentFrame(float gain, const QuadFrame32& samples);
58 /// INTERNAL: Generate DspStatus based on internal state.
59 DspStatus GetCurrentStatus() const;
60};
61
62} // namespace HLE
63} // namespace DSP
diff --git a/src/audio_core/hle/pipe.cpp b/src/audio_core/hle/pipe.cpp
index 9381883b4..44dff1345 100644
--- a/src/audio_core/hle/pipe.cpp
+++ b/src/audio_core/hle/pipe.cpp
@@ -12,12 +12,14 @@
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "common/logging/log.h" 13#include "common/logging/log.h"
14 14
15#include "core/hle/service/dsp_dsp.h"
16
15namespace DSP { 17namespace DSP {
16namespace HLE { 18namespace HLE {
17 19
18static DspState dsp_state = DspState::Off; 20static DspState dsp_state = DspState::Off;
19 21
20static std::array<std::vector<u8>, static_cast<size_t>(DspPipe::DspPipe_MAX)> pipe_data; 22static std::array<std::vector<u8>, NUM_DSP_PIPE> pipe_data;
21 23
22void ResetPipes() { 24void ResetPipes() {
23 for (auto& data : pipe_data) { 25 for (auto& data : pipe_data) {
@@ -27,17 +29,24 @@ void ResetPipes() {
27} 29}
28 30
29std::vector<u8> PipeRead(DspPipe pipe_number, u32 length) { 31std::vector<u8> PipeRead(DspPipe pipe_number, u32 length) {
30 if (pipe_number >= DspPipe::DspPipe_MAX) { 32 const size_t pipe_index = static_cast<size_t>(pipe_number);
31 LOG_ERROR(Audio_DSP, "pipe_number = %u invalid", pipe_number); 33
34 if (pipe_index >= NUM_DSP_PIPE) {
35 LOG_ERROR(Audio_DSP, "pipe_number = %zu invalid", pipe_index);
32 return {}; 36 return {};
33 } 37 }
34 38
35 std::vector<u8>& data = pipe_data[static_cast<size_t>(pipe_number)]; 39 if (length > UINT16_MAX) { // Can only read at most UINT16_MAX from the pipe
40 LOG_ERROR(Audio_DSP, "length of %u greater than max of %u", length, UINT16_MAX);
41 return {};
42 }
43
44 std::vector<u8>& data = pipe_data[pipe_index];
36 45
37 if (length > data.size()) { 46 if (length > data.size()) {
38 LOG_WARNING(Audio_DSP, "pipe_number = %u is out of data, application requested read of %u but %zu remain", 47 LOG_WARNING(Audio_DSP, "pipe_number = %zu is out of data, application requested read of %u but %zu remain",
39 pipe_number, length, data.size()); 48 pipe_index, length, data.size());
40 length = data.size(); 49 length = static_cast<u32>(data.size());
41 } 50 }
42 51
43 if (length == 0) 52 if (length == 0)
@@ -49,16 +58,20 @@ std::vector<u8> PipeRead(DspPipe pipe_number, u32 length) {
49} 58}
50 59
51size_t GetPipeReadableSize(DspPipe pipe_number) { 60size_t GetPipeReadableSize(DspPipe pipe_number) {
52 if (pipe_number >= DspPipe::DspPipe_MAX) { 61 const size_t pipe_index = static_cast<size_t>(pipe_number);
53 LOG_ERROR(Audio_DSP, "pipe_number = %u invalid", pipe_number); 62
63 if (pipe_index >= NUM_DSP_PIPE) {
64 LOG_ERROR(Audio_DSP, "pipe_number = %zu invalid", pipe_index);
54 return 0; 65 return 0;
55 } 66 }
56 67
57 return pipe_data[static_cast<size_t>(pipe_number)].size(); 68 return pipe_data[pipe_index].size();
58} 69}
59 70
60static void WriteU16(DspPipe pipe_number, u16 value) { 71static void WriteU16(DspPipe pipe_number, u16 value) {
61 std::vector<u8>& data = pipe_data[static_cast<size_t>(pipe_number)]; 72 const size_t pipe_index = static_cast<size_t>(pipe_number);
73
74 std::vector<u8>& data = pipe_data.at(pipe_index);
62 // Little endian 75 // Little endian
63 data.emplace_back(value & 0xFF); 76 data.emplace_back(value & 0xFF);
64 data.emplace_back(value >> 8); 77 data.emplace_back(value >> 8);
@@ -86,11 +99,13 @@ static void AudioPipeWriteStructAddresses() {
86 }; 99 };
87 100
88 // Begin with a u16 denoting the number of structs. 101 // Begin with a u16 denoting the number of structs.
89 WriteU16(DspPipe::Audio, struct_addresses.size()); 102 WriteU16(DspPipe::Audio, static_cast<u16>(struct_addresses.size()));
90 // Then write the struct addresses. 103 // Then write the struct addresses.
91 for (u16 addr : struct_addresses) { 104 for (u16 addr : struct_addresses) {
92 WriteU16(DspPipe::Audio, addr); 105 WriteU16(DspPipe::Audio, addr);
93 } 106 }
107 // Signal that we have data on this pipe.
108 DSP_DSP::SignalPipeInterrupt(DspPipe::Audio);
94} 109}
95 110
96void PipeWrite(DspPipe pipe_number, const std::vector<u8>& buffer) { 111void PipeWrite(DspPipe pipe_number, const std::vector<u8>& buffer) {
@@ -145,7 +160,7 @@ void PipeWrite(DspPipe pipe_number, const std::vector<u8>& buffer) {
145 return; 160 return;
146 } 161 }
147 default: 162 default:
148 LOG_CRITICAL(Audio_DSP, "pipe_number = %u unimplemented", pipe_number); 163 LOG_CRITICAL(Audio_DSP, "pipe_number = %zu unimplemented", static_cast<size_t>(pipe_number));
149 UNIMPLEMENTED(); 164 UNIMPLEMENTED();
150 return; 165 return;
151 } 166 }
diff --git a/src/audio_core/hle/pipe.h b/src/audio_core/hle/pipe.h
index 382d35e87..b714c0496 100644
--- a/src/audio_core/hle/pipe.h
+++ b/src/audio_core/hle/pipe.h
@@ -19,15 +19,19 @@ enum class DspPipe {
19 Debug = 0, 19 Debug = 0,
20 Dma = 1, 20 Dma = 1,
21 Audio = 2, 21 Audio = 2,
22 Binary = 3, 22 Binary = 3
23 DspPipe_MAX
24}; 23};
24constexpr size_t NUM_DSP_PIPE = 8;
25 25
26/** 26/**
27 * Read a DSP pipe. 27 * Reads `length` bytes from the DSP pipe identified with `pipe_number`.
28 * @param pipe_number The Pipe ID 28 * @note Can read up to the maximum value of a u16 in bytes (65,535).
29 * @param length How much data to request. 29 * @note IF an error is encoutered with either an invalid `pipe_number` or `length` value, an empty vector will be returned.
30 * @return The data read from the pipe. The size of this vector can be less than the length requested. 30 * @note IF `length` is set to 0, an empty vector will be returned.
31 * @note IF `length` is greater than the amount of data available, this function will only read the available amount.
32 * @param pipe_number a `DspPipe`
33 * @param length the number of bytes to read. The max is 65,535 (max of u16).
34 * @returns a vector of bytes from the specified pipe. On error, will be empty.
31 */ 35 */
32std::vector<u8> PipeRead(DspPipe pipe_number, u32 length); 36std::vector<u8> PipeRead(DspPipe pipe_number, u32 length);
33 37
diff --git a/src/audio_core/hle/source.cpp b/src/audio_core/hle/source.cpp
new file mode 100644
index 000000000..30552fe26
--- /dev/null
+++ b/src/audio_core/hle/source.cpp
@@ -0,0 +1,320 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7
8#include "audio_core/codec.h"
9#include "audio_core/hle/common.h"
10#include "audio_core/hle/source.h"
11#include "audio_core/interpolate.h"
12
13#include "common/assert.h"
14#include "common/logging/log.h"
15
16#include "core/memory.h"
17
18namespace DSP {
19namespace HLE {
20
21SourceStatus::Status Source::Tick(SourceConfiguration::Configuration& config, const s16_le (&adpcm_coeffs)[16]) {
22 ParseConfig(config, adpcm_coeffs);
23
24 if (state.enabled) {
25 GenerateFrame();
26 }
27
28 return GetCurrentStatus();
29}
30
31void Source::MixInto(QuadFrame32& dest, size_t intermediate_mix_id) const {
32 if (!state.enabled)
33 return;
34
35 const std::array<float, 4>& gains = state.gain.at(intermediate_mix_id);
36 for (size_t samplei = 0; samplei < samples_per_frame; samplei++) {
37 // Conversion from stereo (current_frame) to quadraphonic (dest) occurs here.
38 dest[samplei][0] += static_cast<s32>(gains[0] * current_frame[samplei][0]);
39 dest[samplei][1] += static_cast<s32>(gains[1] * current_frame[samplei][1]);
40 dest[samplei][2] += static_cast<s32>(gains[2] * current_frame[samplei][0]);
41 dest[samplei][3] += static_cast<s32>(gains[3] * current_frame[samplei][1]);
42 }
43}
44
45void Source::Reset() {
46 current_frame.fill({});
47 state = {};
48}
49
50void Source::ParseConfig(SourceConfiguration::Configuration& config, const s16_le (&adpcm_coeffs)[16]) {
51 if (!config.dirty_raw) {
52 return;
53 }
54
55 if (config.reset_flag) {
56 config.reset_flag.Assign(0);
57 Reset();
58 LOG_TRACE(Audio_DSP, "source_id=%zu reset", source_id);
59 }
60
61 if (config.partial_reset_flag) {
62 config.partial_reset_flag.Assign(0);
63 state.input_queue = std::priority_queue<Buffer, std::vector<Buffer>, BufferOrder>{};
64 LOG_TRACE(Audio_DSP, "source_id=%zu partial_reset", source_id);
65 }
66
67 if (config.enable_dirty) {
68 config.enable_dirty.Assign(0);
69 state.enabled = config.enable != 0;
70 LOG_TRACE(Audio_DSP, "source_id=%zu enable=%d", source_id, state.enabled);
71 }
72
73 if (config.sync_dirty) {
74 config.sync_dirty.Assign(0);
75 state.sync = config.sync;
76 LOG_TRACE(Audio_DSP, "source_id=%zu sync=%u", source_id, state.sync);
77 }
78
79 if (config.rate_multiplier_dirty) {
80 config.rate_multiplier_dirty.Assign(0);
81 state.rate_multiplier = config.rate_multiplier;
82 LOG_TRACE(Audio_DSP, "source_id=%zu rate=%f", source_id, state.rate_multiplier);
83
84 if (state.rate_multiplier <= 0) {
85 LOG_ERROR(Audio_DSP, "Was given an invalid rate multiplier: source_id=%zu rate=%f", source_id, state.rate_multiplier);
86 state.rate_multiplier = 1.0f;
87 // Note: Actual firmware starts producing garbage if this occurs.
88 }
89 }
90
91 if (config.adpcm_coefficients_dirty) {
92 config.adpcm_coefficients_dirty.Assign(0);
93 std::transform(adpcm_coeffs, adpcm_coeffs + state.adpcm_coeffs.size(), state.adpcm_coeffs.begin(),
94 [](const auto& coeff) { return static_cast<s16>(coeff); });
95 LOG_TRACE(Audio_DSP, "source_id=%zu adpcm update", source_id);
96 }
97
98 if (config.gain_0_dirty) {
99 config.gain_0_dirty.Assign(0);
100 std::transform(config.gain[0], config.gain[0] + state.gain[0].size(), state.gain[0].begin(),
101 [](const auto& coeff) { return static_cast<float>(coeff); });
102 LOG_TRACE(Audio_DSP, "source_id=%zu gain 0 update", source_id);
103 }
104
105 if (config.gain_1_dirty) {
106 config.gain_1_dirty.Assign(0);
107 std::transform(config.gain[1], config.gain[1] + state.gain[1].size(), state.gain[1].begin(),
108 [](const auto& coeff) { return static_cast<float>(coeff); });
109 LOG_TRACE(Audio_DSP, "source_id=%zu gain 1 update", source_id);
110 }
111
112 if (config.gain_2_dirty) {
113 config.gain_2_dirty.Assign(0);
114 std::transform(config.gain[2], config.gain[2] + state.gain[2].size(), state.gain[2].begin(),
115 [](const auto& coeff) { return static_cast<float>(coeff); });
116 LOG_TRACE(Audio_DSP, "source_id=%zu gain 2 update", source_id);
117 }
118
119 if (config.filters_enabled_dirty) {
120 config.filters_enabled_dirty.Assign(0);
121 state.filters.Enable(config.simple_filter_enabled.ToBool(), config.biquad_filter_enabled.ToBool());
122 LOG_TRACE(Audio_DSP, "source_id=%zu enable_simple=%hu enable_biquad=%hu",
123 source_id, config.simple_filter_enabled.Value(), config.biquad_filter_enabled.Value());
124 }
125
126 if (config.simple_filter_dirty) {
127 config.simple_filter_dirty.Assign(0);
128 state.filters.Configure(config.simple_filter);
129 LOG_TRACE(Audio_DSP, "source_id=%zu simple filter update", source_id);
130 }
131
132 if (config.biquad_filter_dirty) {
133 config.biquad_filter_dirty.Assign(0);
134 state.filters.Configure(config.biquad_filter);
135 LOG_TRACE(Audio_DSP, "source_id=%zu biquad filter update", source_id);
136 }
137
138 if (config.interpolation_dirty) {
139 config.interpolation_dirty.Assign(0);
140 state.interpolation_mode = config.interpolation_mode;
141 LOG_TRACE(Audio_DSP, "source_id=%zu interpolation_mode=%zu", source_id, static_cast<size_t>(state.interpolation_mode));
142 }
143
144 if (config.format_dirty || config.embedded_buffer_dirty) {
145 config.format_dirty.Assign(0);
146 state.format = config.format;
147 LOG_TRACE(Audio_DSP, "source_id=%zu format=%zu", source_id, static_cast<size_t>(state.format));
148 }
149
150 if (config.mono_or_stereo_dirty || config.embedded_buffer_dirty) {
151 config.mono_or_stereo_dirty.Assign(0);
152 state.mono_or_stereo = config.mono_or_stereo;
153 LOG_TRACE(Audio_DSP, "source_id=%zu mono_or_stereo=%zu", source_id, static_cast<size_t>(state.mono_or_stereo));
154 }
155
156 if (config.embedded_buffer_dirty) {
157 config.embedded_buffer_dirty.Assign(0);
158 state.input_queue.emplace(Buffer{
159 config.physical_address,
160 config.length,
161 static_cast<u8>(config.adpcm_ps),
162 { config.adpcm_yn[0], config.adpcm_yn[1] },
163 config.adpcm_dirty.ToBool(),
164 config.is_looping.ToBool(),
165 config.buffer_id,
166 state.mono_or_stereo,
167 state.format,
168 false
169 });
170 LOG_TRACE(Audio_DSP, "enqueuing embedded addr=0x%08x len=%u id=%hu", config.physical_address, config.length, config.buffer_id);
171 }
172
173 if (config.buffer_queue_dirty) {
174 config.buffer_queue_dirty.Assign(0);
175 for (size_t i = 0; i < 4; i++) {
176 if (config.buffers_dirty & (1 << i)) {
177 const auto& b = config.buffers[i];
178 state.input_queue.emplace(Buffer{
179 b.physical_address,
180 b.length,
181 static_cast<u8>(b.adpcm_ps),
182 { b.adpcm_yn[0], b.adpcm_yn[1] },
183 b.adpcm_dirty != 0,
184 b.is_looping != 0,
185 b.buffer_id,
186 state.mono_or_stereo,
187 state.format,
188 true
189 });
190 LOG_TRACE(Audio_DSP, "enqueuing queued %zu addr=0x%08x len=%u id=%hu", i, b.physical_address, b.length, b.buffer_id);
191 }
192 }
193 config.buffers_dirty = 0;
194 }
195
196 if (config.dirty_raw) {
197 LOG_DEBUG(Audio_DSP, "source_id=%zu remaining_dirty=%x", source_id, config.dirty_raw);
198 }
199
200 config.dirty_raw = 0;
201}
202
203void Source::GenerateFrame() {
204 current_frame.fill({});
205
206 if (state.current_buffer.empty() && !DequeueBuffer()) {
207 state.enabled = false;
208 state.buffer_update = true;
209 state.current_buffer_id = 0;
210 return;
211 }
212
213 size_t frame_position = 0;
214
215 state.current_sample_number = state.next_sample_number;
216 while (frame_position < current_frame.size()) {
217 if (state.current_buffer.empty() && !DequeueBuffer()) {
218 break;
219 }
220
221 const size_t size_to_copy = std::min(state.current_buffer.size(), current_frame.size() - frame_position);
222
223 std::copy(state.current_buffer.begin(), state.current_buffer.begin() + size_to_copy, current_frame.begin() + frame_position);
224 state.current_buffer.erase(state.current_buffer.begin(), state.current_buffer.begin() + size_to_copy);
225
226 frame_position += size_to_copy;
227 state.next_sample_number += static_cast<u32>(size_to_copy);
228 }
229
230 state.filters.ProcessFrame(current_frame);
231}
232
233
234bool Source::DequeueBuffer() {
235 ASSERT_MSG(state.current_buffer.empty(), "Shouldn't dequeue; we still have data in current_buffer");
236
237 if (state.input_queue.empty())
238 return false;
239
240 const Buffer buf = state.input_queue.top();
241 state.input_queue.pop();
242
243 if (buf.adpcm_dirty) {
244 state.adpcm_state.yn1 = buf.adpcm_yn[0];
245 state.adpcm_state.yn2 = buf.adpcm_yn[1];
246 }
247
248 if (buf.is_looping) {
249 LOG_ERROR(Audio_DSP, "Looped buffers are unimplemented at the moment");
250 }
251
252 const u8* const memory = Memory::GetPhysicalPointer(buf.physical_address);
253 if (memory) {
254 const unsigned num_channels = buf.mono_or_stereo == MonoOrStereo::Stereo ? 2 : 1;
255 switch (buf.format) {
256 case Format::PCM8:
257 state.current_buffer = Codec::DecodePCM8(num_channels, memory, buf.length);
258 break;
259 case Format::PCM16:
260 state.current_buffer = Codec::DecodePCM16(num_channels, memory, buf.length);
261 break;
262 case Format::ADPCM:
263 DEBUG_ASSERT(num_channels == 1);
264 state.current_buffer = Codec::DecodeADPCM(memory, buf.length, state.adpcm_coeffs, state.adpcm_state);
265 break;
266 default:
267 UNIMPLEMENTED();
268 break;
269 }
270 } else {
271 LOG_WARNING(Audio_DSP, "source_id=%zu buffer_id=%hu length=%u: Invalid physical address 0x%08X",
272 source_id, buf.buffer_id, buf.length, buf.physical_address);
273 state.current_buffer.clear();
274 return true;
275 }
276
277 switch (state.interpolation_mode) {
278 case InterpolationMode::None:
279 state.current_buffer = AudioInterp::None(state.interp_state, state.current_buffer, state.rate_multiplier);
280 break;
281 case InterpolationMode::Linear:
282 state.current_buffer = AudioInterp::Linear(state.interp_state, state.current_buffer, state.rate_multiplier);
283 break;
284 case InterpolationMode::Polyphase:
285 // TODO(merry): Implement polyphase interpolation
286 state.current_buffer = AudioInterp::Linear(state.interp_state, state.current_buffer, state.rate_multiplier);
287 break;
288 default:
289 UNIMPLEMENTED();
290 break;
291 }
292
293 state.current_sample_number = 0;
294 state.next_sample_number = 0;
295 state.current_buffer_id = buf.buffer_id;
296 state.buffer_update = buf.from_queue;
297
298 LOG_TRACE(Audio_DSP, "source_id=%zu buffer_id=%hu from_queue=%s current_buffer.size()=%zu",
299 source_id, buf.buffer_id, buf.from_queue ? "true" : "false", state.current_buffer.size());
300 return true;
301}
302
303SourceStatus::Status Source::GetCurrentStatus() {
304 SourceStatus::Status ret;
305
306 // Applications depend on the correct emulation of
307 // current_buffer_id_dirty and current_buffer_id to synchronise
308 // audio with video.
309 ret.is_enabled = state.enabled;
310 ret.current_buffer_id_dirty = state.buffer_update ? 1 : 0;
311 state.buffer_update = false;
312 ret.current_buffer_id = state.current_buffer_id;
313 ret.buffer_position = state.current_sample_number;
314 ret.sync = state.sync;
315
316 return ret;
317}
318
319} // namespace HLE
320} // namespace DSP
diff --git a/src/audio_core/hle/source.h b/src/audio_core/hle/source.h
new file mode 100644
index 000000000..7ee08d424
--- /dev/null
+++ b/src/audio_core/hle/source.h
@@ -0,0 +1,144 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <queue>
9#include <vector>
10
11#include "audio_core/codec.h"
12#include "audio_core/hle/common.h"
13#include "audio_core/hle/dsp.h"
14#include "audio_core/hle/filter.h"
15#include "audio_core/interpolate.h"
16
17#include "common/common_types.h"
18
19namespace DSP {
20namespace HLE {
21
22/**
23 * This module performs:
24 * - Buffer management
25 * - Decoding of buffers
26 * - Buffer resampling and interpolation
27 * - Per-source filtering (SimpleFilter, BiquadFilter)
28 * - Per-source gain
29 * - Other per-source processing
30 */
31class Source final {
32public:
33 explicit Source(size_t source_id_) : source_id(source_id_) {
34 Reset();
35 }
36
37 /// Resets internal state.
38 void Reset();
39
40 /**
41 * This is called once every audio frame. This performs per-source processing every frame.
42 * @param config The new configuration we've got for this Source from the application.
43 * @param adpcm_coeffs ADPCM coefficients to use if config tells us to use them (may contain invalid values otherwise).
44 * @return The current status of this Source. This is given back to the emulated application via SharedMemory.
45 */
46 SourceStatus::Status Tick(SourceConfiguration::Configuration& config, const s16_le (&adpcm_coeffs)[16]);
47
48 /**
49 * Mix this source's output into dest, using the gains for the `intermediate_mix_id`-th intermediate mixer.
50 * @param dest The QuadFrame32 to mix into.
51 * @param intermediate_mix_id The id of the intermediate mix whose gains we are using.
52 */
53 void MixInto(QuadFrame32& dest, size_t intermediate_mix_id) const;
54
55private:
56 const size_t source_id;
57 StereoFrame16 current_frame;
58
59 using Format = SourceConfiguration::Configuration::Format;
60 using InterpolationMode = SourceConfiguration::Configuration::InterpolationMode;
61 using MonoOrStereo = SourceConfiguration::Configuration::MonoOrStereo;
62
63 /// Internal representation of a buffer for our buffer queue
64 struct Buffer {
65 PAddr physical_address;
66 u32 length;
67 u8 adpcm_ps;
68 std::array<u16, 2> adpcm_yn;
69 bool adpcm_dirty;
70 bool is_looping;
71 u16 buffer_id;
72
73 MonoOrStereo mono_or_stereo;
74 Format format;
75
76 bool from_queue;
77 };
78
79 struct BufferOrder {
80 bool operator() (const Buffer& a, const Buffer& b) const {
81 // Lower buffer_id comes first.
82 return a.buffer_id > b.buffer_id;
83 }
84 };
85
86 struct {
87
88 // State variables
89
90 bool enabled = false;
91 u16 sync = 0;
92
93 // Mixing
94
95 std::array<std::array<float, 4>, 3> gain = {};
96
97 // Buffer queue
98
99 std::priority_queue<Buffer, std::vector<Buffer>, BufferOrder> input_queue;
100 MonoOrStereo mono_or_stereo = MonoOrStereo::Mono;
101 Format format = Format::ADPCM;
102
103 // Current buffer
104
105 u32 current_sample_number = 0;
106 u32 next_sample_number = 0;
107 std::vector<std::array<s16, 2>> current_buffer;
108
109 // buffer_id state
110
111 bool buffer_update = false;
112 u32 current_buffer_id = 0;
113
114 // Decoding state
115
116 std::array<s16, 16> adpcm_coeffs = {};
117 Codec::ADPCMState adpcm_state = {};
118
119 // Resampling state
120
121 float rate_multiplier = 1.0;
122 InterpolationMode interpolation_mode = InterpolationMode::Polyphase;
123 AudioInterp::State interp_state = {};
124
125 // Filter state
126
127 SourceFilters filters;
128
129 } state;
130
131 // Internal functions
132
133 /// INTERNAL: Update our internal state based on the current config.
134 void ParseConfig(SourceConfiguration::Configuration& config, const s16_le (&adpcm_coeffs)[16]);
135 /// INTERNAL: Generate the current audio output for this frame based on our internal state.
136 void GenerateFrame();
137 /// INTERNAL: Dequeues a buffer and does preprocessing on it (decoding, resampling). Puts it into current_buffer.
138 bool DequeueBuffer();
139 /// INTERNAL: Generates a SourceStatus::Status based on our internal state.
140 SourceStatus::Status GetCurrentStatus();
141};
142
143} // namespace HLE
144} // namespace DSP
diff --git a/src/audio_core/interpolate.cpp b/src/audio_core/interpolate.cpp
new file mode 100644
index 000000000..fcd3aa066
--- /dev/null
+++ b/src/audio_core/interpolate.cpp
@@ -0,0 +1,85 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "audio_core/interpolate.h"
6
7#include "common/assert.h"
8#include "common/math_util.h"
9
10namespace AudioInterp {
11
12// Calculations are done in fixed point with 24 fractional bits.
13// (This is not verified. This was chosen for minimal error.)
14constexpr u64 scale_factor = 1 << 24;
15constexpr u64 scale_mask = scale_factor - 1;
16
17/// Here we step over the input in steps of rate_multiplier, until we consume all of the input.
18/// Three adjacent samples are passed to fn each step.
19template <typename Function>
20static StereoBuffer16 StepOverSamples(State& state, const StereoBuffer16& input, float rate_multiplier, Function fn) {
21 ASSERT(rate_multiplier > 0);
22
23 if (input.size() < 2)
24 return {};
25
26 StereoBuffer16 output;
27 output.reserve(static_cast<size_t>(input.size() / rate_multiplier));
28
29 u64 step_size = static_cast<u64>(rate_multiplier * scale_factor);
30
31 u64 fposition = 0;
32 const u64 max_fposition = input.size() * scale_factor;
33
34 while (fposition < 1 * scale_factor) {
35 u64 fraction = fposition & scale_mask;
36
37 output.push_back(fn(fraction, state.xn2, state.xn1, input[0]));
38
39 fposition += step_size;
40 }
41
42 while (fposition < 2 * scale_factor) {
43 u64 fraction = fposition & scale_mask;
44
45 output.push_back(fn(fraction, state.xn1, input[0], input[1]));
46
47 fposition += step_size;
48 }
49
50 while (fposition < max_fposition) {
51 u64 fraction = fposition & scale_mask;
52
53 size_t index = static_cast<size_t>(fposition / scale_factor);
54 output.push_back(fn(fraction, input[index - 2], input[index - 1], input[index]));
55
56 fposition += step_size;
57 }
58
59 state.xn2 = input[input.size() - 2];
60 state.xn1 = input[input.size() - 1];
61
62 return output;
63}
64
65StereoBuffer16 None(State& state, const StereoBuffer16& input, float rate_multiplier) {
66 return StepOverSamples(state, input, rate_multiplier, [](u64 fraction, const auto& x0, const auto& x1, const auto& x2) {
67 return x0;
68 });
69}
70
71StereoBuffer16 Linear(State& state, const StereoBuffer16& input, float rate_multiplier) {
72 // Note on accuracy: Some values that this produces are +/- 1 from the actual firmware.
73 return StepOverSamples(state, input, rate_multiplier, [](u64 fraction, const auto& x0, const auto& x1, const auto& x2) {
74 // This is a saturated subtraction. (Verified by black-box fuzzing.)
75 s64 delta0 = MathUtil::Clamp<s64>(x1[0] - x0[0], -32768, 32767);
76 s64 delta1 = MathUtil::Clamp<s64>(x1[1] - x0[1], -32768, 32767);
77
78 return std::array<s16, 2> {
79 static_cast<s16>(x0[0] + fraction * delta0 / scale_factor),
80 static_cast<s16>(x0[1] + fraction * delta1 / scale_factor)
81 };
82 });
83}
84
85} // namespace AudioInterp
diff --git a/src/audio_core/interpolate.h b/src/audio_core/interpolate.h
new file mode 100644
index 000000000..a4c0a453d
--- /dev/null
+++ b/src/audio_core/interpolate.h
@@ -0,0 +1,41 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <vector>
9
10#include "common/common_types.h"
11
12namespace AudioInterp {
13
14/// A variable length buffer of signed PCM16 stereo samples.
15using StereoBuffer16 = std::vector<std::array<s16, 2>>;
16
17struct State {
18 // Two historical samples.
19 std::array<s16, 2> xn1 = {}; ///< x[n-1]
20 std::array<s16, 2> xn2 = {}; ///< x[n-2]
21};
22
23/**
24 * No interpolation. This is equivalent to a zero-order hold. There is a two-sample predelay.
25 * @param input Input buffer.
26 * @param rate_multiplier Stretch factor. Must be a positive non-zero value.
27 * rate_multiplier > 1.0 performs decimation and rate_multipler < 1.0 performs upsampling.
28 * @return The resampled audio buffer.
29 */
30StereoBuffer16 None(State& state, const StereoBuffer16& input, float rate_multiplier);
31
32/**
33 * Linear interpolation. This is equivalent to a first-order hold. There is a two-sample predelay.
34 * @param input Input buffer.
35 * @param rate_multiplier Stretch factor. Must be a positive non-zero value.
36 * rate_multiplier > 1.0 performs decimation and rate_multipler < 1.0 performs upsampling.
37 * @return The resampled audio buffer.
38 */
39StereoBuffer16 Linear(State& state, const StereoBuffer16& input, float rate_multiplier);
40
41} // namespace AudioInterp
diff --git a/src/audio_core/null_sink.h b/src/audio_core/null_sink.h
new file mode 100644
index 000000000..faf0ee4e1
--- /dev/null
+++ b/src/audio_core/null_sink.h
@@ -0,0 +1,29 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstddef>
8
9#include "audio_core/audio_core.h"
10#include "audio_core/sink.h"
11
12namespace AudioCore {
13
14class NullSink final : public Sink {
15public:
16 ~NullSink() override = default;
17
18 unsigned int GetNativeSampleRate() const override {
19 return native_sample_rate;
20 }
21
22 void EnqueueSamples(const std::vector<s16>&) override {}
23
24 size_t SamplesInQueue() const override {
25 return 0;
26 }
27};
28
29} // namespace AudioCore
diff --git a/src/audio_core/sdl2_sink.cpp b/src/audio_core/sdl2_sink.cpp
new file mode 100644
index 000000000..dc75c04ee
--- /dev/null
+++ b/src/audio_core/sdl2_sink.cpp
@@ -0,0 +1,126 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <list>
6#include <vector>
7
8#include <SDL.h>
9
10#include "audio_core/audio_core.h"
11#include "audio_core/sdl2_sink.h"
12
13#include "common/assert.h"
14#include "common/logging/log.h"
15#include <numeric>
16
17namespace AudioCore {
18
19struct SDL2Sink::Impl {
20 unsigned int sample_rate = 0;
21
22 SDL_AudioDeviceID audio_device_id = 0;
23
24 std::list<std::vector<s16>> queue;
25
26 static void Callback(void* impl_, u8* buffer, int buffer_size_in_bytes);
27};
28
29SDL2Sink::SDL2Sink() : impl(std::make_unique<Impl>()) {
30 if (SDL_Init(SDL_INIT_AUDIO) < 0) {
31 LOG_CRITICAL(Audio_Sink, "SDL_Init(SDL_INIT_AUDIO) failed");
32 impl->audio_device_id = 0;
33 return;
34 }
35
36 SDL_AudioSpec desired_audiospec;
37 SDL_zero(desired_audiospec);
38 desired_audiospec.format = AUDIO_S16;
39 desired_audiospec.channels = 2;
40 desired_audiospec.freq = native_sample_rate;
41 desired_audiospec.samples = 1024;
42 desired_audiospec.userdata = impl.get();
43 desired_audiospec.callback = &Impl::Callback;
44
45 SDL_AudioSpec obtained_audiospec;
46 SDL_zero(obtained_audiospec);
47
48 impl->audio_device_id = SDL_OpenAudioDevice(nullptr, false, &desired_audiospec, &obtained_audiospec, 0);
49 if (impl->audio_device_id <= 0) {
50 LOG_CRITICAL(Audio_Sink, "SDL_OpenAudioDevice failed");
51 return;
52 }
53
54 impl->sample_rate = obtained_audiospec.freq;
55
56 // SDL2 audio devices start out paused, unpause it:
57 SDL_PauseAudioDevice(impl->audio_device_id, 0);
58}
59
60SDL2Sink::~SDL2Sink() {
61 if (impl->audio_device_id <= 0)
62 return;
63
64 SDL_CloseAudioDevice(impl->audio_device_id);
65}
66
67unsigned int SDL2Sink::GetNativeSampleRate() const {
68 if (impl->audio_device_id <= 0)
69 return native_sample_rate;
70
71 return impl->sample_rate;
72}
73
74void SDL2Sink::EnqueueSamples(const std::vector<s16>& samples) {
75 if (impl->audio_device_id <= 0)
76 return;
77
78 ASSERT_MSG(samples.size() % 2 == 0, "Samples must be in interleaved stereo PCM16 format (size must be a multiple of two)");
79
80 SDL_LockAudioDevice(impl->audio_device_id);
81 impl->queue.emplace_back(samples);
82 SDL_UnlockAudioDevice(impl->audio_device_id);
83}
84
85size_t SDL2Sink::SamplesInQueue() const {
86 if (impl->audio_device_id <= 0)
87 return 0;
88
89 SDL_LockAudioDevice(impl->audio_device_id);
90
91 size_t total_size = std::accumulate(impl->queue.begin(), impl->queue.end(), static_cast<size_t>(0),
92 [](size_t sum, const auto& buffer) {
93 // Division by two because each stereo sample is made of two s16.
94 return sum + buffer.size() / 2;
95 });
96
97 SDL_UnlockAudioDevice(impl->audio_device_id);
98
99 return total_size;
100}
101
102void SDL2Sink::Impl::Callback(void* impl_, u8* buffer, int buffer_size_in_bytes) {
103 Impl* impl = reinterpret_cast<Impl*>(impl_);
104
105 size_t remaining_size = static_cast<size_t>(buffer_size_in_bytes) / sizeof(s16); // Keep track of size in 16-bit increments.
106
107 while (remaining_size > 0 && !impl->queue.empty()) {
108 if (impl->queue.front().size() <= remaining_size) {
109 memcpy(buffer, impl->queue.front().data(), impl->queue.front().size() * sizeof(s16));
110 buffer += impl->queue.front().size() * sizeof(s16);
111 remaining_size -= impl->queue.front().size();
112 impl->queue.pop_front();
113 } else {
114 memcpy(buffer, impl->queue.front().data(), remaining_size * sizeof(s16));
115 buffer += remaining_size * sizeof(s16);
116 impl->queue.front().erase(impl->queue.front().begin(), impl->queue.front().begin() + remaining_size);
117 remaining_size = 0;
118 }
119 }
120
121 if (remaining_size > 0) {
122 memset(buffer, 0, remaining_size * sizeof(s16));
123 }
124}
125
126} // namespace AudioCore
diff --git a/src/audio_core/sdl2_sink.h b/src/audio_core/sdl2_sink.h
new file mode 100644
index 000000000..0f296b673
--- /dev/null
+++ b/src/audio_core/sdl2_sink.h
@@ -0,0 +1,30 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstddef>
8#include <memory>
9
10#include "audio_core/sink.h"
11
12namespace AudioCore {
13
14class SDL2Sink final : public Sink {
15public:
16 SDL2Sink();
17 ~SDL2Sink() override;
18
19 unsigned int GetNativeSampleRate() const override;
20
21 void EnqueueSamples(const std::vector<s16>& samples) override;
22
23 size_t SamplesInQueue() const override;
24
25private:
26 struct Impl;
27 std::unique_ptr<Impl> impl;
28};
29
30} // namespace AudioCore
diff --git a/src/audio_core/sink.h b/src/audio_core/sink.h
index cad21a85e..1c881c3d2 100644
--- a/src/audio_core/sink.h
+++ b/src/audio_core/sink.h
@@ -19,7 +19,7 @@ public:
19 virtual ~Sink() = default; 19 virtual ~Sink() = default;
20 20
21 /// The native rate of this sink. The sink expects to be fed samples that respect this. (Units: samples/sec) 21 /// The native rate of this sink. The sink expects to be fed samples that respect this. (Units: samples/sec)
22 virtual unsigned GetNativeSampleRate() const = 0; 22 virtual unsigned int GetNativeSampleRate() const = 0;
23 23
24 /** 24 /**
25 * Feed stereo samples to sink. 25 * Feed stereo samples to sink.
diff --git a/src/audio_core/sink_details.cpp b/src/audio_core/sink_details.cpp
new file mode 100644
index 000000000..ba5e83d17
--- /dev/null
+++ b/src/audio_core/sink_details.cpp
@@ -0,0 +1,25 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <memory>
6#include <vector>
7
8#include "audio_core/null_sink.h"
9#include "audio_core/sink_details.h"
10
11#ifdef HAVE_SDL2
12#include "audio_core/sdl2_sink.h"
13#endif
14
15namespace AudioCore {
16
17// g_sink_details is ordered in terms of desirability, with the best choice at the top.
18const std::vector<SinkDetails> g_sink_details = {
19#ifdef HAVE_SDL2
20 { "sdl2", []() { return std::make_unique<SDL2Sink>(); } },
21#endif
22 { "null", []() { return std::make_unique<NullSink>(); } },
23};
24
25} // namespace AudioCore
diff --git a/src/audio_core/sink_details.h b/src/audio_core/sink_details.h
new file mode 100644
index 000000000..4b30cf835
--- /dev/null
+++ b/src/audio_core/sink_details.h
@@ -0,0 +1,27 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <functional>
8#include <memory>
9#include <vector>
10
11namespace AudioCore {
12
13class Sink;
14
15struct SinkDetails {
16 SinkDetails(const char* id_, std::function<std::unique_ptr<Sink>()> factory_)
17 : id(id_), factory(factory_) {}
18
19 /// Name for this sink.
20 const char* id;
21 /// A method to call to construct an instance of this type of sink.
22 std::function<std::unique_ptr<Sink>()> factory;
23};
24
25extern const std::vector<SinkDetails> g_sink_details;
26
27} // namespace AudioCore
diff --git a/src/audio_core/time_stretch.cpp b/src/audio_core/time_stretch.cpp
new file mode 100644
index 000000000..ea38f40d0
--- /dev/null
+++ b/src/audio_core/time_stretch.cpp
@@ -0,0 +1,144 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <chrono>
6#include <cmath>
7#include <vector>
8
9#include <SoundTouch.h>
10
11#include "audio_core/audio_core.h"
12#include "audio_core/time_stretch.h"
13
14#include "common/common_types.h"
15#include "common/logging/log.h"
16#include "common/math_util.h"
17
18using steady_clock = std::chrono::steady_clock;
19
20namespace AudioCore {
21
22constexpr double MIN_RATIO = 0.1;
23constexpr double MAX_RATIO = 100.0;
24
25static double ClampRatio(double ratio) {
26 return MathUtil::Clamp(ratio, MIN_RATIO, MAX_RATIO);
27}
28
29constexpr double MIN_DELAY_TIME = 0.05; // Units: seconds
30constexpr double MAX_DELAY_TIME = 0.25; // Units: seconds
31constexpr size_t DROP_FRAMES_SAMPLE_DELAY = 16000; // Units: samples
32
33constexpr double SMOOTHING_FACTOR = 0.007;
34
35struct TimeStretcher::Impl {
36 soundtouch::SoundTouch soundtouch;
37
38 steady_clock::time_point frame_timer = steady_clock::now();
39 size_t samples_queued = 0;
40
41 double smoothed_ratio = 1.0;
42
43 double sample_rate = static_cast<double>(native_sample_rate);
44};
45
46std::vector<s16> TimeStretcher::Process(size_t samples_in_queue) {
47 // This is a very simple algorithm without any fancy control theory. It works and is stable.
48
49 double ratio = CalculateCurrentRatio();
50 ratio = CorrectForUnderAndOverflow(ratio, samples_in_queue);
51 impl->smoothed_ratio = (1.0 - SMOOTHING_FACTOR) * impl->smoothed_ratio + SMOOTHING_FACTOR * ratio;
52 impl->smoothed_ratio = ClampRatio(impl->smoothed_ratio);
53
54 // SoundTouch's tempo definition the inverse of our ratio definition.
55 impl->soundtouch.setTempo(1.0 / impl->smoothed_ratio);
56
57 std::vector<s16> samples = GetSamples();
58 if (samples_in_queue >= DROP_FRAMES_SAMPLE_DELAY) {
59 samples.clear();
60 LOG_DEBUG(Audio, "Dropping frames!");
61 }
62 return samples;
63}
64
65TimeStretcher::TimeStretcher() : impl(std::make_unique<Impl>()) {
66 impl->soundtouch.setPitch(1.0);
67 impl->soundtouch.setChannels(2);
68 impl->soundtouch.setSampleRate(native_sample_rate);
69 Reset();
70}
71
72TimeStretcher::~TimeStretcher() {
73 impl->soundtouch.clear();
74}
75
76void TimeStretcher::SetOutputSampleRate(unsigned int sample_rate) {
77 impl->sample_rate = static_cast<double>(sample_rate);
78 impl->soundtouch.setRate(static_cast<double>(native_sample_rate) / impl->sample_rate);
79}
80
81void TimeStretcher::AddSamples(const s16* buffer, size_t num_samples) {
82 impl->soundtouch.putSamples(buffer, static_cast<uint>(num_samples));
83 impl->samples_queued += num_samples;
84}
85
86void TimeStretcher::Flush() {
87 impl->soundtouch.flush();
88}
89
90void TimeStretcher::Reset() {
91 impl->soundtouch.setTempo(1.0);
92 impl->soundtouch.clear();
93 impl->smoothed_ratio = 1.0;
94 impl->frame_timer = steady_clock::now();
95 impl->samples_queued = 0;
96 SetOutputSampleRate(native_sample_rate);
97}
98
99double TimeStretcher::CalculateCurrentRatio() {
100 const steady_clock::time_point now = steady_clock::now();
101 const std::chrono::duration<double> duration = now - impl->frame_timer;
102
103 const double expected_time = static_cast<double>(impl->samples_queued) / static_cast<double>(native_sample_rate);
104 const double actual_time = duration.count();
105
106 double ratio;
107 if (expected_time != 0) {
108 ratio = ClampRatio(actual_time / expected_time);
109 } else {
110 ratio = impl->smoothed_ratio;
111 }
112
113 impl->frame_timer = now;
114 impl->samples_queued = 0;
115
116 return ratio;
117}
118
119double TimeStretcher::CorrectForUnderAndOverflow(double ratio, size_t sample_delay) const {
120 const size_t min_sample_delay = static_cast<size_t>(MIN_DELAY_TIME * impl->sample_rate);
121 const size_t max_sample_delay = static_cast<size_t>(MAX_DELAY_TIME * impl->sample_rate);
122
123 if (sample_delay < min_sample_delay) {
124 // Make the ratio bigger.
125 ratio = ratio > 1.0 ? ratio * ratio : sqrt(ratio);
126 } else if (sample_delay > max_sample_delay) {
127 // Make the ratio smaller.
128 ratio = ratio > 1.0 ? sqrt(ratio) : ratio * ratio;
129 }
130
131 return ClampRatio(ratio);
132}
133
134std::vector<s16> TimeStretcher::GetSamples() {
135 uint available = impl->soundtouch.numSamples();
136
137 std::vector<s16> output(static_cast<size_t>(available) * 2);
138
139 impl->soundtouch.receiveSamples(output.data(), available);
140
141 return output;
142}
143
144} // namespace AudioCore
diff --git a/src/audio_core/time_stretch.h b/src/audio_core/time_stretch.h
new file mode 100644
index 000000000..1fde3f72a
--- /dev/null
+++ b/src/audio_core/time_stretch.h
@@ -0,0 +1,57 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstddef>
6#include <memory>
7#include <vector>
8
9#include "common/common_types.h"
10
11namespace AudioCore {
12
13class TimeStretcher final {
14public:
15 TimeStretcher();
16 ~TimeStretcher();
17
18 /**
19 * Set sample rate for the samples that Process returns.
20 * @param sample_rate The sample rate.
21 */
22 void SetOutputSampleRate(unsigned int sample_rate);
23
24 /**
25 * Add samples to be processed.
26 * @param sample_buffer Buffer of samples in interleaved stereo PCM16 format.
27 * @param num_sample Number of samples.
28 */
29 void AddSamples(const s16* sample_buffer, size_t num_samples);
30
31 /// Flush audio remaining in internal buffers.
32 void Flush();
33
34 /// Resets internal state and clears buffers.
35 void Reset();
36
37 /**
38 * Does audio stretching and produces the time-stretched samples.
39 * Timer calculations use sample_delay to determine how much of a margin we have.
40 * @param sample_delay How many samples are buffered downstream of this module and haven't been played yet.
41 * @return Samples to play in interleaved stereo PCM16 format.
42 */
43 std::vector<s16> Process(size_t sample_delay);
44
45private:
46 struct Impl;
47 std::unique_ptr<Impl> impl;
48
49 /// INTERNAL: ratio = wallclock time / emulated time
50 double CalculateCurrentRatio();
51 /// INTERNAL: If we have too many or too few samples downstream, nudge ratio in the appropriate direction.
52 double CorrectForUnderAndOverflow(double ratio, size_t sample_delay) const;
53 /// INTERNAL: Gets the time-stretched samples from SoundTouch.
54 std::vector<s16> GetSamples();
55};
56
57} // namespace AudioCore
diff --git a/src/citra/CMakeLists.txt b/src/citra/CMakeLists.txt
index fa615deb9..43fa06b4e 100644
--- a/src/citra/CMakeLists.txt
+++ b/src/citra/CMakeLists.txt
@@ -21,7 +21,7 @@ target_link_libraries(citra ${SDL2_LIBRARY} ${OPENGL_gl_LIBRARY} inih glad)
21if (MSVC) 21if (MSVC)
22 target_link_libraries(citra getopt) 22 target_link_libraries(citra getopt)
23endif() 23endif()
24target_link_libraries(citra ${PLATFORM_LIBRARIES}) 24target_link_libraries(citra ${PLATFORM_LIBRARIES} Threads::Threads)
25 25
26if(${CMAKE_SYSTEM_NAME} MATCHES "Linux|FreeBSD|OpenBSD|NetBSD") 26if(${CMAKE_SYSTEM_NAME} MATCHES "Linux|FreeBSD|OpenBSD|NetBSD")
27 install(TARGETS citra RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/bin") 27 install(TARGETS citra RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/bin")
diff --git a/src/citra/citra.cpp b/src/citra/citra.cpp
index 3a1fbe3f7..b4501eb2e 100644
--- a/src/citra/citra.cpp
+++ b/src/citra/citra.cpp
@@ -20,6 +20,7 @@
20#include "common/logging/log.h" 20#include "common/logging/log.h"
21#include "common/logging/backend.h" 21#include "common/logging/backend.h"
22#include "common/logging/filter.h" 22#include "common/logging/filter.h"
23#include "common/scm_rev.h"
23#include "common/scope_exit.h" 24#include "common/scope_exit.h"
24 25
25#include "core/settings.h" 26#include "core/settings.h"
@@ -34,11 +35,17 @@
34#include "video_core/video_core.h" 35#include "video_core/video_core.h"
35 36
36 37
37static void PrintHelp() 38static void PrintHelp(const char *argv0)
38{ 39{
39 std::cout << "Usage: citra [options] <filename>" << std::endl; 40 std::cout << "Usage: " << argv0 << " [options] <filename>\n"
40 std::cout << "--help, -h Display this information" << std::endl; 41 "-g, --gdbport=NUMBER Enable gdb stub on port NUMBER\n"
41 std::cout << "--gdbport, -g number Enable gdb stub on port number" << std::endl; 42 "-h, --help Display this help and exit\n"
43 "-v, --version Output version information and exit\n";
44}
45
46static void PrintVersion()
47{
48 std::cout << "Citra " << Common::g_scm_branch << " " << Common::g_scm_desc << std::endl;
42} 49}
43 50
44/// Application entry point 51/// Application entry point
@@ -51,18 +58,16 @@ int main(int argc, char **argv) {
51 std::string boot_filename; 58 std::string boot_filename;
52 59
53 static struct option long_options[] = { 60 static struct option long_options[] = {
54 { "help", no_argument, 0, 'h' },
55 { "gdbport", required_argument, 0, 'g' }, 61 { "gdbport", required_argument, 0, 'g' },
62 { "help", no_argument, 0, 'h' },
63 { "version", no_argument, 0, 'v' },
56 { 0, 0, 0, 0 } 64 { 0, 0, 0, 0 }
57 }; 65 };
58 66
59 while (optind < argc) { 67 while (optind < argc) {
60 char arg = getopt_long(argc, argv, ":hg:", long_options, &option_index); 68 char arg = getopt_long(argc, argv, "g:hv", long_options, &option_index);
61 if (arg != -1) { 69 if (arg != -1) {
62 switch (arg) { 70 switch (arg) {
63 case 'h':
64 PrintHelp();
65 return 0;
66 case 'g': 71 case 'g':
67 errno = 0; 72 errno = 0;
68 gdb_port = strtoul(optarg, &endarg, 0); 73 gdb_port = strtoul(optarg, &endarg, 0);
@@ -73,6 +78,12 @@ int main(int argc, char **argv) {
73 exit(1); 78 exit(1);
74 } 79 }
75 break; 80 break;
81 case 'h':
82 PrintHelp(argv[0]);
83 return 0;
84 case 'v':
85 PrintVersion();
86 return 0;
76 } 87 }
77 } else { 88 } else {
78 boot_filename = argv[optind]; 89 boot_filename = argv[optind];
@@ -93,14 +104,13 @@ int main(int argc, char **argv) {
93 104
94 log_filter.ParseFilterString(Settings::values.log_filter); 105 log_filter.ParseFilterString(Settings::values.log_filter);
95 106
96 GDBStub::ToggleServer(use_gdbstub); 107 // Apply the command line arguments
97 GDBStub::SetServerPort(gdb_port); 108 Settings::values.gdbstub_port = gdb_port;
109 Settings::values.use_gdbstub = use_gdbstub;
110 Settings::Apply();
98 111
99 std::unique_ptr<EmuWindow_SDL2> emu_window = std::make_unique<EmuWindow_SDL2>(); 112 std::unique_ptr<EmuWindow_SDL2> emu_window = std::make_unique<EmuWindow_SDL2>();
100 113
101 VideoCore::g_hw_renderer_enabled = Settings::values.use_hw_renderer;
102 VideoCore::g_shader_jit_enabled = Settings::values.use_shader_jit;
103
104 System::Init(emu_window.get()); 114 System::Init(emu_window.get());
105 SCOPE_EXIT({ System::Shutdown(); }); 115 SCOPE_EXIT({ System::Shutdown(); });
106 116
diff --git a/src/citra/config.cpp b/src/citra/config.cpp
index ebea5f840..4d170dec8 100644
--- a/src/citra/config.cpp
+++ b/src/citra/config.cpp
@@ -65,11 +65,15 @@ void Config::ReadValues() {
65 // Renderer 65 // Renderer
66 Settings::values.use_hw_renderer = sdl2_config->GetBoolean("Renderer", "use_hw_renderer", false); 66 Settings::values.use_hw_renderer = sdl2_config->GetBoolean("Renderer", "use_hw_renderer", false);
67 Settings::values.use_shader_jit = sdl2_config->GetBoolean("Renderer", "use_shader_jit", true); 67 Settings::values.use_shader_jit = sdl2_config->GetBoolean("Renderer", "use_shader_jit", true);
68 Settings::values.use_scaled_resolution = sdl2_config->GetBoolean("Renderer", "use_scaled_resolution", false);
68 69
69 Settings::values.bg_red = (float)sdl2_config->GetReal("Renderer", "bg_red", 1.0); 70 Settings::values.bg_red = (float)sdl2_config->GetReal("Renderer", "bg_red", 1.0);
70 Settings::values.bg_green = (float)sdl2_config->GetReal("Renderer", "bg_green", 1.0); 71 Settings::values.bg_green = (float)sdl2_config->GetReal("Renderer", "bg_green", 1.0);
71 Settings::values.bg_blue = (float)sdl2_config->GetReal("Renderer", "bg_blue", 1.0); 72 Settings::values.bg_blue = (float)sdl2_config->GetReal("Renderer", "bg_blue", 1.0);
72 73
74 // Audio
75 Settings::values.sink_id = sdl2_config->Get("Audio", "output_engine", "auto");
76
73 // Data Storage 77 // Data Storage
74 Settings::values.use_virtual_sd = sdl2_config->GetBoolean("Data Storage", "use_virtual_sd", true); 78 Settings::values.use_virtual_sd = sdl2_config->GetBoolean("Data Storage", "use_virtual_sd", true);
75 79
@@ -82,7 +86,7 @@ void Config::ReadValues() {
82 86
83 // Debugging 87 // Debugging
84 Settings::values.use_gdbstub = sdl2_config->GetBoolean("Debugging", "use_gdbstub", false); 88 Settings::values.use_gdbstub = sdl2_config->GetBoolean("Debugging", "use_gdbstub", false);
85 Settings::values.gdbstub_port = sdl2_config->GetInteger("Debugging", "gdbstub_port", 24689); 89 Settings::values.gdbstub_port = static_cast<u16>(sdl2_config->GetInteger("Debugging", "gdbstub_port", 24689));
86} 90}
87 91
88void Config::Reload() { 92void Config::Reload() {
diff --git a/src/citra/default_ini.h b/src/citra/default_ini.h
index c9b490a00..49126356f 100644
--- a/src/citra/default_ini.h
+++ b/src/citra/default_ini.h
@@ -46,12 +46,21 @@ use_hw_renderer =
46# 0 : Interpreter (slow), 1 (default): JIT (fast) 46# 0 : Interpreter (slow), 1 (default): JIT (fast)
47use_shader_jit = 47use_shader_jit =
48 48
49# Whether to use native 3DS screen resolution or to scale rendering resolution to the displayed screen size.
50# 0 (default): Native, 1: Scaled
51use_scaled_resolution =
52
49# The clear color for the renderer. What shows up on the sides of the bottom screen. 53# The clear color for the renderer. What shows up on the sides of the bottom screen.
50# Must be in range of 0.0-1.0. Defaults to 1.0 for all. 54# Must be in range of 0.0-1.0. Defaults to 1.0 for all.
51bg_red = 55bg_red =
52bg_blue = 56bg_blue =
53bg_green = 57bg_green =
54 58
59[Audio]
60# Which audio output engine to use.
61# auto (default): Auto-select, null: No audio output, sdl2: SDL2 (if available)
62output_engine =
63
55[Data Storage] 64[Data Storage]
56# Whether to create a virtual SD card. 65# Whether to create a virtual SD card.
57# 1 (default): Yes, 0: No 66# 1 (default): Yes, 0: No
diff --git a/src/citra/emu_window/emu_window_sdl2.cpp b/src/citra/emu_window/emu_window_sdl2.cpp
index 924189f4c..12cdd9d95 100644
--- a/src/citra/emu_window/emu_window_sdl2.cpp
+++ b/src/citra/emu_window/emu_window_sdl2.cpp
@@ -9,6 +9,8 @@
9#define SDL_MAIN_HANDLED 9#define SDL_MAIN_HANDLED
10#include <SDL.h> 10#include <SDL.h>
11 11
12#include <glad/glad.h>
13
12#include "common/key_map.h" 14#include "common/key_map.h"
13#include "common/logging/log.h" 15#include "common/logging/log.h"
14#include "common/scm_rev.h" 16#include "common/scm_rev.h"
@@ -98,6 +100,11 @@ EmuWindow_SDL2::EmuWindow_SDL2() {
98 exit(1); 100 exit(1);
99 } 101 }
100 102
103 if (!gladLoadGLLoader(static_cast<GLADloadproc>(SDL_GL_GetProcAddress))) {
104 LOG_CRITICAL(Frontend, "Failed to initialize GL functions! Exiting...");
105 exit(1);
106 }
107
101 OnResize(); 108 OnResize();
102 OnMinimalClientAreaChangeRequest(GetActiveConfig().min_client_area_size); 109 OnMinimalClientAreaChangeRequest(GetActiveConfig().min_client_area_size);
103 SDL_PumpEvents(); 110 SDL_PumpEvents();
diff --git a/src/citra_qt/CMakeLists.txt b/src/citra_qt/CMakeLists.txt
index 9b3eb2cd6..0a5d4624b 100644
--- a/src/citra_qt/CMakeLists.txt
+++ b/src/citra_qt/CMakeLists.txt
@@ -17,12 +17,17 @@ set(SRCS
17 debugger/profiler.cpp 17 debugger/profiler.cpp
18 debugger/ramview.cpp 18 debugger/ramview.cpp
19 debugger/registers.cpp 19 debugger/registers.cpp
20 game_list.cpp
21 util/spinbox.cpp 20 util/spinbox.cpp
22 util/util.cpp 21 util/util.cpp
23 bootmanager.cpp 22 bootmanager.cpp
23 configure_audio.cpp
24 configure_debug.cpp
25 configure_dialog.cpp
26 configure_general.cpp
27 game_list.cpp
24 hotkeys.cpp 28 hotkeys.cpp
25 main.cpp 29 main.cpp
30 ui_settings.cpp
26 citra-qt.rc 31 citra-qt.rc
27 Info.plist 32 Info.plist
28 ) 33 )
@@ -44,12 +49,18 @@ set(HEADERS
44 debugger/profiler.h 49 debugger/profiler.h
45 debugger/ramview.h 50 debugger/ramview.h
46 debugger/registers.h 51 debugger/registers.h
47 game_list.h
48 util/spinbox.h 52 util/spinbox.h
49 util/util.h 53 util/util.h
50 bootmanager.h 54 bootmanager.h
55 configure_audio.h
56 configure_debug.h
57 configure_dialog.h
58 configure_general.h
59 game_list.h
60 game_list_p.h
51 hotkeys.h 61 hotkeys.h
52 main.h 62 main.h
63 ui_settings.h
53 version.h 64 version.h
54 ) 65 )
55 66
@@ -59,6 +70,10 @@ set(UIS
59 debugger/disassembler.ui 70 debugger/disassembler.ui
60 debugger/profiler.ui 71 debugger/profiler.ui
61 debugger/registers.ui 72 debugger/registers.ui
73 configure.ui
74 configure_audio.ui
75 configure_debug.ui
76 configure_general.ui
62 hotkeys.ui 77 hotkeys.ui
63 main.ui 78 main.ui
64 ) 79 )
@@ -81,7 +96,7 @@ else()
81endif() 96endif()
82target_link_libraries(citra-qt core video_core audio_core common qhexedit) 97target_link_libraries(citra-qt core video_core audio_core common qhexedit)
83target_link_libraries(citra-qt ${OPENGL_gl_LIBRARY} ${CITRA_QT_LIBS}) 98target_link_libraries(citra-qt ${OPENGL_gl_LIBRARY} ${CITRA_QT_LIBS})
84target_link_libraries(citra-qt ${PLATFORM_LIBRARIES}) 99target_link_libraries(citra-qt ${PLATFORM_LIBRARIES} Threads::Threads)
85 100
86if(${CMAKE_SYSTEM_NAME} MATCHES "Linux|FreeBSD|OpenBSD|NetBSD") 101if(${CMAKE_SYSTEM_NAME} MATCHES "Linux|FreeBSD|OpenBSD|NetBSD")
87 install(TARGETS citra-qt RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/bin") 102 install(TARGETS citra-qt RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/bin")
diff --git a/src/citra_qt/bootmanager.cpp b/src/citra_qt/bootmanager.cpp
index 8e60b9cad..01b81c11c 100644
--- a/src/citra_qt/bootmanager.cpp
+++ b/src/citra_qt/bootmanager.cpp
@@ -71,7 +71,9 @@ void EmuThread::run() {
71 // Shutdown the core emulation 71 // Shutdown the core emulation
72 System::Shutdown(); 72 System::Shutdown();
73 73
74#if MICROPROFILE_ENABLED
74 MicroProfileOnThreadExit(); 75 MicroProfileOnThreadExit();
76#endif
75 77
76 render_window->moveContext(); 78 render_window->moveContext();
77} 79}
diff --git a/src/citra_qt/config.cpp b/src/citra_qt/config.cpp
index 66271aa7b..f6e498128 100644
--- a/src/citra_qt/config.cpp
+++ b/src/citra_qt/config.cpp
@@ -7,12 +7,12 @@
7#include <QStringList> 7#include <QStringList>
8 8
9#include "citra_qt/config.h" 9#include "citra_qt/config.h"
10#include "citra_qt/ui_settings.h"
10 11
11#include "common/file_util.h" 12#include "common/file_util.h"
12#include "core/settings.h" 13#include "core/settings.h"
13 14
14Config::Config() { 15Config::Config() {
15
16 // TODO: Don't hardcode the path; let the frontend decide where to put the config files. 16 // TODO: Don't hardcode the path; let the frontend decide where to put the config files.
17 qt_config_loc = FileUtil::GetUserPath(D_CONFIG_IDX) + "qt-config.ini"; 17 qt_config_loc = FileUtil::GetUserPath(D_CONFIG_IDX) + "qt-config.ini";
18 FileUtil::CreateFullPath(qt_config_loc); 18 FileUtil::CreateFullPath(qt_config_loc);
@@ -45,12 +45,17 @@ void Config::ReadValues() {
45 qt_config->beginGroup("Renderer"); 45 qt_config->beginGroup("Renderer");
46 Settings::values.use_hw_renderer = qt_config->value("use_hw_renderer", false).toBool(); 46 Settings::values.use_hw_renderer = qt_config->value("use_hw_renderer", false).toBool();
47 Settings::values.use_shader_jit = qt_config->value("use_shader_jit", true).toBool(); 47 Settings::values.use_shader_jit = qt_config->value("use_shader_jit", true).toBool();
48 Settings::values.use_scaled_resolution = qt_config->value("use_scaled_resolution", false).toBool();
48 49
49 Settings::values.bg_red = qt_config->value("bg_red", 1.0).toFloat(); 50 Settings::values.bg_red = qt_config->value("bg_red", 1.0).toFloat();
50 Settings::values.bg_green = qt_config->value("bg_green", 1.0).toFloat(); 51 Settings::values.bg_green = qt_config->value("bg_green", 1.0).toFloat();
51 Settings::values.bg_blue = qt_config->value("bg_blue", 1.0).toFloat(); 52 Settings::values.bg_blue = qt_config->value("bg_blue", 1.0).toFloat();
52 qt_config->endGroup(); 53 qt_config->endGroup();
53 54
55 qt_config->beginGroup("Audio");
56 Settings::values.sink_id = qt_config->value("output_engine", "auto").toString().toStdString();
57 qt_config->endGroup();
58
54 qt_config->beginGroup("Data Storage"); 59 qt_config->beginGroup("Data Storage");
55 Settings::values.use_virtual_sd = qt_config->value("use_virtual_sd", true).toBool(); 60 Settings::values.use_virtual_sd = qt_config->value("use_virtual_sd", true).toBool();
56 qt_config->endGroup(); 61 qt_config->endGroup();
@@ -68,6 +73,51 @@ void Config::ReadValues() {
68 Settings::values.use_gdbstub = qt_config->value("use_gdbstub", false).toBool(); 73 Settings::values.use_gdbstub = qt_config->value("use_gdbstub", false).toBool();
69 Settings::values.gdbstub_port = qt_config->value("gdbstub_port", 24689).toInt(); 74 Settings::values.gdbstub_port = qt_config->value("gdbstub_port", 24689).toInt();
70 qt_config->endGroup(); 75 qt_config->endGroup();
76
77 qt_config->beginGroup("UI");
78
79 qt_config->beginGroup("UILayout");
80 UISettings::values.geometry = qt_config->value("geometry").toByteArray();
81 UISettings::values.state = qt_config->value("state").toByteArray();
82 UISettings::values.renderwindow_geometry = qt_config->value("geometryRenderWindow").toByteArray();
83 UISettings::values.gamelist_header_state = qt_config->value("gameListHeaderState").toByteArray();
84 UISettings::values.microprofile_geometry = qt_config->value("microProfileDialogGeometry").toByteArray();
85 UISettings::values.microprofile_visible = qt_config->value("microProfileDialogVisible", false).toBool();
86 qt_config->endGroup();
87
88 qt_config->beginGroup("Paths");
89 UISettings::values.roms_path = qt_config->value("romsPath").toString();
90 UISettings::values.symbols_path = qt_config->value("symbolsPath").toString();
91 UISettings::values.gamedir = qt_config->value("gameListRootDir", ".").toString();
92 UISettings::values.gamedir_deepscan = qt_config->value("gameListDeepScan", false).toBool();
93 UISettings::values.recent_files = qt_config->value("recentFiles").toStringList();
94 qt_config->endGroup();
95
96 qt_config->beginGroup("Shortcuts");
97 QStringList groups = qt_config->childGroups();
98 for (auto group : groups) {
99 qt_config->beginGroup(group);
100
101 QStringList hotkeys = qt_config->childGroups();
102 for (auto hotkey : hotkeys) {
103 qt_config->beginGroup(hotkey);
104 UISettings::values.shortcuts.emplace_back(
105 UISettings::Shortcut(group + "/" + hotkey,
106 UISettings::ContextualShortcut(qt_config->value("KeySeq").toString(),
107 qt_config->value("Context").toInt())));
108 qt_config->endGroup();
109 }
110
111 qt_config->endGroup();
112 }
113 qt_config->endGroup();
114
115 UISettings::values.single_window_mode = qt_config->value("singleWindowMode", true).toBool();
116 UISettings::values.display_titlebar = qt_config->value("displayTitleBars", true).toBool();
117 UISettings::values.confirm_before_closing = qt_config->value("confirmClose",true).toBool();
118 UISettings::values.first_start = qt_config->value("firstStart", true).toBool();
119
120 qt_config->endGroup();
71} 121}
72 122
73void Config::SaveValues() { 123void Config::SaveValues() {
@@ -85,6 +135,7 @@ void Config::SaveValues() {
85 qt_config->beginGroup("Renderer"); 135 qt_config->beginGroup("Renderer");
86 qt_config->setValue("use_hw_renderer", Settings::values.use_hw_renderer); 136 qt_config->setValue("use_hw_renderer", Settings::values.use_hw_renderer);
87 qt_config->setValue("use_shader_jit", Settings::values.use_shader_jit); 137 qt_config->setValue("use_shader_jit", Settings::values.use_shader_jit);
138 qt_config->setValue("use_scaled_resolution", Settings::values.use_scaled_resolution);
88 139
89 // Cast to double because Qt's written float values are not human-readable 140 // Cast to double because Qt's written float values are not human-readable
90 qt_config->setValue("bg_red", (double)Settings::values.bg_red); 141 qt_config->setValue("bg_red", (double)Settings::values.bg_red);
@@ -92,6 +143,10 @@ void Config::SaveValues() {
92 qt_config->setValue("bg_blue", (double)Settings::values.bg_blue); 143 qt_config->setValue("bg_blue", (double)Settings::values.bg_blue);
93 qt_config->endGroup(); 144 qt_config->endGroup();
94 145
146 qt_config->beginGroup("Audio");
147 qt_config->setValue("output_engine", QString::fromStdString(Settings::values.sink_id));
148 qt_config->endGroup();
149
95 qt_config->beginGroup("Data Storage"); 150 qt_config->beginGroup("Data Storage");
96 qt_config->setValue("use_virtual_sd", Settings::values.use_virtual_sd); 151 qt_config->setValue("use_virtual_sd", Settings::values.use_virtual_sd);
97 qt_config->endGroup(); 152 qt_config->endGroup();
@@ -109,10 +164,44 @@ void Config::SaveValues() {
109 qt_config->setValue("use_gdbstub", Settings::values.use_gdbstub); 164 qt_config->setValue("use_gdbstub", Settings::values.use_gdbstub);
110 qt_config->setValue("gdbstub_port", Settings::values.gdbstub_port); 165 qt_config->setValue("gdbstub_port", Settings::values.gdbstub_port);
111 qt_config->endGroup(); 166 qt_config->endGroup();
167
168 qt_config->beginGroup("UI");
169
170 qt_config->beginGroup("UILayout");
171 qt_config->setValue("geometry", UISettings::values.geometry);
172 qt_config->setValue("state", UISettings::values.state);
173 qt_config->setValue("geometryRenderWindow", UISettings::values.renderwindow_geometry);
174 qt_config->setValue("gameListHeaderState", UISettings::values.gamelist_header_state);
175 qt_config->setValue("microProfileDialogGeometry", UISettings::values.microprofile_geometry);
176 qt_config->setValue("microProfileDialogVisible", UISettings::values.microprofile_visible);
177 qt_config->endGroup();
178
179 qt_config->beginGroup("Paths");
180 qt_config->setValue("romsPath", UISettings::values.roms_path);
181 qt_config->setValue("symbolsPath", UISettings::values.symbols_path);
182 qt_config->setValue("gameListRootDir", UISettings::values.gamedir);
183 qt_config->setValue("gameListDeepScan", UISettings::values.gamedir_deepscan);
184 qt_config->setValue("recentFiles", UISettings::values.recent_files);
185 qt_config->endGroup();
186
187 qt_config->beginGroup("Shortcuts");
188 for (auto shortcut : UISettings::values.shortcuts ) {
189 qt_config->setValue(shortcut.first + "/KeySeq", shortcut.second.first);
190 qt_config->setValue(shortcut.first + "/Context", shortcut.second.second);
191 }
192 qt_config->endGroup();
193
194 qt_config->setValue("singleWindowMode", UISettings::values.single_window_mode);
195 qt_config->setValue("displayTitleBars", UISettings::values.display_titlebar);
196 qt_config->setValue("confirmClose", UISettings::values.confirm_before_closing);
197 qt_config->setValue("firstStart", UISettings::values.first_start);
198
199 qt_config->endGroup();
112} 200}
113 201
114void Config::Reload() { 202void Config::Reload() {
115 ReadValues(); 203 ReadValues();
204 Settings::Apply();
116} 205}
117 206
118void Config::Save() { 207void Config::Save() {
diff --git a/src/citra_qt/configure.ui b/src/citra_qt/configure.ui
new file mode 100644
index 000000000..e1624bbef
--- /dev/null
+++ b/src/citra_qt/configure.ui
@@ -0,0 +1,108 @@
1<?xml version="1.0" encoding="UTF-8"?>
2<ui version="4.0">
3 <class>ConfigureDialog</class>
4 <widget class="QDialog" name="ConfigureDialog">
5 <property name="geometry">
6 <rect>
7 <x>0</x>
8 <y>0</y>
9 <width>441</width>
10 <height>501</height>
11 </rect>
12 </property>
13 <property name="windowTitle">
14 <string>Citra Configuration</string>
15 </property>
16 <layout class="QVBoxLayout" name="verticalLayout">
17 <item>
18 <widget class="QTabWidget" name="tabWidget">
19 <property name="currentIndex">
20 <number>0</number>
21 </property>
22 <widget class="ConfigureGeneral" name="generalTab">
23 <attribute name="title">
24 <string>General</string>
25 </attribute>
26 </widget>
27 <widget class="QWidget" name="inputTab">
28 <attribute name="title">
29 <string>Input</string>
30 </attribute>
31 </widget>
32 <widget class="ConfigureAudio" name="audioTab">
33 <attribute name="title">
34 <string>Audio</string>
35 </attribute>
36 </widget>
37 <widget class="ConfigureDebug" name="debugTab">
38 <attribute name="title">
39 <string>Debug</string>
40 </attribute>
41 </widget>
42 </widget>
43 </item>
44 <item>
45 <widget class="QDialogButtonBox" name="buttonBox">
46 <property name="standardButtons">
47 <set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set>
48 </property>
49 </widget>
50 </item>
51 </layout>
52 </widget>
53 <customwidgets>
54 <customwidget>
55 <class>ConfigureGeneral</class>
56 <extends>QWidget</extends>
57 <header>configure_general.h</header>
58 <container>1</container>
59 </customwidget>
60 <customwidget>
61 <class>ConfigureAudio</class>
62 <extends>QWidget</extends>
63 <header>configure_audio.h</header>
64 <container>1</container>
65 </customwidget>
66 <customwidget>
67 <class>ConfigureDebug</class>
68 <extends>QWidget</extends>
69 <header>configure_debug.h</header>
70 <container>1</container>
71 </customwidget>
72 </customwidgets>
73 <resources/>
74 <connections>
75 <connection>
76 <sender>buttonBox</sender>
77 <signal>accepted()</signal>
78 <receiver>ConfigureDialog</receiver>
79 <slot>accept()</slot>
80 <hints>
81 <hint type="sourcelabel">
82 <x>220</x>
83 <y>380</y>
84 </hint>
85 <hint type="destinationlabel">
86 <x>220</x>
87 <y>200</y>
88 </hint>
89 </hints>
90 </connection>
91 <connection>
92 <sender>buttonBox</sender>
93 <signal>rejected()</signal>
94 <receiver>ConfigureDialog</receiver>
95 <slot>reject()</slot>
96 <hints>
97 <hint type="sourcelabel">
98 <x>220</x>
99 <y>380</y>
100 </hint>
101 <hint type="destinationlabel">
102 <x>220</x>
103 <y>200</y>
104 </hint>
105 </hints>
106 </connection>
107 </connections>
108</ui>
diff --git a/src/citra_qt/configure_audio.cpp b/src/citra_qt/configure_audio.cpp
new file mode 100644
index 000000000..cedfa2f2a
--- /dev/null
+++ b/src/citra_qt/configure_audio.cpp
@@ -0,0 +1,44 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "audio_core/sink_details.h"
6
7#include "citra_qt/configure_audio.h"
8#include "ui_configure_audio.h"
9
10#include "core/settings.h"
11
12ConfigureAudio::ConfigureAudio(QWidget* parent) :
13 QWidget(parent),
14 ui(std::make_unique<Ui::ConfigureAudio>())
15{
16 ui->setupUi(this);
17
18 ui->output_sink_combo_box->clear();
19 ui->output_sink_combo_box->addItem("auto");
20 for (const auto& sink_detail : AudioCore::g_sink_details) {
21 ui->output_sink_combo_box->addItem(sink_detail.id);
22 }
23
24 this->setConfiguration();
25}
26
27ConfigureAudio::~ConfigureAudio() {
28}
29
30void ConfigureAudio::setConfiguration() {
31 int new_sink_index = 0;
32 for (int index = 0; index < ui->output_sink_combo_box->count(); index++) {
33 if (ui->output_sink_combo_box->itemText(index).toStdString() == Settings::values.sink_id) {
34 new_sink_index = index;
35 break;
36 }
37 }
38 ui->output_sink_combo_box->setCurrentIndex(new_sink_index);
39}
40
41void ConfigureAudio::applyConfiguration() {
42 Settings::values.sink_id = ui->output_sink_combo_box->itemText(ui->output_sink_combo_box->currentIndex()).toStdString();
43 Settings::Apply();
44}
diff --git a/src/citra_qt/configure_audio.h b/src/citra_qt/configure_audio.h
new file mode 100644
index 000000000..51df2e27b
--- /dev/null
+++ b/src/citra_qt/configure_audio.h
@@ -0,0 +1,27 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <QWidget>
9
10namespace Ui {
11class ConfigureAudio;
12}
13
14class ConfigureAudio : public QWidget {
15 Q_OBJECT
16
17public:
18 explicit ConfigureAudio(QWidget* parent = nullptr);
19 ~ConfigureAudio();
20
21 void applyConfiguration();
22
23private:
24 void setConfiguration();
25
26 std::unique_ptr<Ui::ConfigureAudio> ui;
27};
diff --git a/src/citra_qt/configure_audio.ui b/src/citra_qt/configure_audio.ui
new file mode 100644
index 000000000..d7f6946ca
--- /dev/null
+++ b/src/citra_qt/configure_audio.ui
@@ -0,0 +1,48 @@
1<?xml version="1.0" encoding="utf-8"?>
2
3<ui version="4.0">
4 <class>ConfigureAudio</class>
5 <widget class="QWidget" name="ConfigureAudio">
6 <layout class="QVBoxLayout">
7 <item>
8 <widget class="QGroupBox">
9 <property name="title">
10 <string>Audio</string>
11 </property>
12 <layout class="QVBoxLayout">
13 <item>
14 <layout class="QHBoxLayout">
15 <item>
16 <widget class="QLabel">
17 <property name="text">
18 <string>Output Engine:</string>
19 </property>
20 </widget>
21 </item>
22 <item>
23 <widget class="QComboBox" name="output_sink_combo_box">
24 </widget>
25 </item>
26 </layout>
27 </item>
28 </layout>
29 </widget>
30 </item>
31 <item>
32 <spacer>
33 <property name="orientation">
34 <enum>Qt::Vertical</enum>
35 </property>
36 <property name="sizeHint" stdset="0">
37 <size>
38 <width>20</width>
39 <height>40</height>
40 </size>
41 </property>
42 </spacer>
43 </item>
44 </layout>
45 </widget>
46 <resources />
47 <connections />
48</ui>
diff --git a/src/citra_qt/configure_debug.cpp b/src/citra_qt/configure_debug.cpp
new file mode 100644
index 000000000..dc3d7b906
--- /dev/null
+++ b/src/citra_qt/configure_debug.cpp
@@ -0,0 +1,31 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "citra_qt/configure_debug.h"
6#include "ui_configure_debug.h"
7
8#include "core/settings.h"
9
10ConfigureDebug::ConfigureDebug(QWidget *parent) :
11 QWidget(parent),
12 ui(new Ui::ConfigureDebug)
13{
14 ui->setupUi(this);
15 this->setConfiguration();
16}
17
18ConfigureDebug::~ConfigureDebug() {
19}
20
21void ConfigureDebug::setConfiguration() {
22 ui->toogle_gdbstub->setChecked(Settings::values.use_gdbstub);
23 ui->gdbport_spinbox->setEnabled(Settings::values.use_gdbstub);
24 ui->gdbport_spinbox->setValue(Settings::values.gdbstub_port);
25}
26
27void ConfigureDebug::applyConfiguration() {
28 Settings::values.use_gdbstub = ui->toogle_gdbstub->isChecked();
29 Settings::values.gdbstub_port = ui->gdbport_spinbox->value();
30 Settings::Apply();
31}
diff --git a/src/citra_qt/configure_debug.h b/src/citra_qt/configure_debug.h
new file mode 100644
index 000000000..ab58ebbdc
--- /dev/null
+++ b/src/citra_qt/configure_debug.h
@@ -0,0 +1,29 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <QWidget>
9
10namespace Ui {
11class ConfigureDebug;
12}
13
14class ConfigureDebug : public QWidget
15{
16 Q_OBJECT
17
18public:
19 explicit ConfigureDebug(QWidget *parent = nullptr);
20 ~ConfigureDebug();
21
22 void applyConfiguration();
23
24private:
25 void setConfiguration();
26
27private:
28 std::unique_ptr<Ui::ConfigureDebug> ui;
29};
diff --git a/src/citra_qt/configure_debug.ui b/src/citra_qt/configure_debug.ui
new file mode 100644
index 000000000..3ba7f44da
--- /dev/null
+++ b/src/citra_qt/configure_debug.ui
@@ -0,0 +1,102 @@
1<?xml version="1.0" encoding="UTF-8"?>
2<ui version="4.0">
3 <class>ConfigureDebug</class>
4 <widget class="QWidget" name="ConfigureDebug">
5 <property name="geometry">
6 <rect>
7 <x>0</x>
8 <y>0</y>
9 <width>400</width>
10 <height>300</height>
11 </rect>
12 </property>
13 <property name="windowTitle">
14 <string>Form</string>
15 </property>
16 <layout class="QVBoxLayout" name="verticalLayout">
17 <item>
18 <layout class="QVBoxLayout" name="verticalLayout_3">
19 <item>
20 <widget class="QGroupBox" name="groupBox">
21 <property name="title">
22 <string>GDB</string>
23 </property>
24 <layout class="QVBoxLayout" name="verticalLayout_2">
25 <item>
26 <layout class="QHBoxLayout" name="horizontalLayout_3">
27 <item>
28 <widget class="QCheckBox" name="toogle_gdbstub">
29 <property name="text">
30 <string>Enable GDB Stub</string>
31 </property>
32 </widget>
33 </item>
34 <item>
35 <spacer name="horizontalSpacer">
36 <property name="orientation">
37 <enum>Qt::Horizontal</enum>
38 </property>
39 <property name="sizeHint" stdset="0">
40 <size>
41 <width>40</width>
42 <height>20</height>
43 </size>
44 </property>
45 </spacer>
46 </item>
47 <item>
48 <widget class="QLabel" name="label">
49 <property name="text">
50 <string>Port:</string>
51 </property>
52 </widget>
53 </item>
54 <item>
55 <widget class="QSpinBox" name="gdbport_spinbox">
56 <property name="maximum">
57 <number>65536</number>
58 </property>
59 </widget>
60 </item>
61 </layout>
62 </item>
63 </layout>
64 </widget>
65 </item>
66 </layout>
67 </item>
68 <item>
69 <spacer name="verticalSpacer">
70 <property name="orientation">
71 <enum>Qt::Vertical</enum>
72 </property>
73 <property name="sizeHint" stdset="0">
74 <size>
75 <width>20</width>
76 <height>40</height>
77 </size>
78 </property>
79 </spacer>
80 </item>
81 </layout>
82 </widget>
83 <resources/>
84 <connections>
85 <connection>
86 <sender>toogle_gdbstub</sender>
87 <signal>toggled(bool)</signal>
88 <receiver>gdbport_spinbox</receiver>
89 <slot>setEnabled(bool)</slot>
90 <hints>
91 <hint type="sourcelabel">
92 <x>84</x>
93 <y>157</y>
94 </hint>
95 <hint type="destinationlabel">
96 <x>342</x>
97 <y>158</y>
98 </hint>
99 </hints>
100 </connection>
101 </connections>
102</ui>
diff --git a/src/citra_qt/configure_dialog.cpp b/src/citra_qt/configure_dialog.cpp
new file mode 100644
index 000000000..2f0317fe0
--- /dev/null
+++ b/src/citra_qt/configure_dialog.cpp
@@ -0,0 +1,30 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "citra_qt/config.h"
6#include "citra_qt/configure_dialog.h"
7#include "ui_configure.h"
8
9
10#include "core/settings.h"
11
12ConfigureDialog::ConfigureDialog(QWidget *parent) :
13 QDialog(parent),
14 ui(new Ui::ConfigureDialog)
15{
16 ui->setupUi(this);
17 this->setConfiguration();
18}
19
20ConfigureDialog::~ConfigureDialog() {
21}
22
23void ConfigureDialog::setConfiguration() {
24}
25
26void ConfigureDialog::applyConfiguration() {
27 ui->generalTab->applyConfiguration();
28 ui->audioTab->applyConfiguration();
29 ui->debugTab->applyConfiguration();
30}
diff --git a/src/citra_qt/configure_dialog.h b/src/citra_qt/configure_dialog.h
new file mode 100644
index 000000000..89020eeb4
--- /dev/null
+++ b/src/citra_qt/configure_dialog.h
@@ -0,0 +1,29 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <QDialog>
9
10namespace Ui {
11class ConfigureDialog;
12}
13
14class ConfigureDialog : public QDialog
15{
16 Q_OBJECT
17
18public:
19 explicit ConfigureDialog(QWidget *parent = nullptr);
20 ~ConfigureDialog();
21
22 void applyConfiguration();
23
24private:
25 void setConfiguration();
26
27private:
28 std::unique_ptr<Ui::ConfigureDialog> ui;
29};
diff --git a/src/citra_qt/configure_general.cpp b/src/citra_qt/configure_general.cpp
new file mode 100644
index 000000000..62648e665
--- /dev/null
+++ b/src/citra_qt/configure_general.cpp
@@ -0,0 +1,39 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "citra_qt/configure_general.h"
6#include "citra_qt/ui_settings.h"
7#include "ui_configure_general.h"
8
9#include "core/settings.h"
10
11ConfigureGeneral::ConfigureGeneral(QWidget *parent) :
12 QWidget(parent),
13 ui(new Ui::ConfigureGeneral)
14{
15 ui->setupUi(this);
16 this->setConfiguration();
17}
18
19ConfigureGeneral::~ConfigureGeneral() {
20}
21
22void ConfigureGeneral::setConfiguration() {
23 ui->toogle_deepscan->setChecked(UISettings::values.gamedir_deepscan);
24 ui->toogle_check_exit->setChecked(UISettings::values.confirm_before_closing);
25 ui->region_combobox->setCurrentIndex(Settings::values.region_value);
26 ui->toogle_hw_renderer->setChecked(Settings::values.use_hw_renderer);
27 ui->toogle_shader_jit->setChecked(Settings::values.use_shader_jit);
28 ui->toogle_scaled_resolution->setChecked(Settings::values.use_scaled_resolution);
29}
30
31void ConfigureGeneral::applyConfiguration() {
32 UISettings::values.gamedir_deepscan = ui->toogle_deepscan->isChecked();
33 UISettings::values.confirm_before_closing = ui->toogle_check_exit->isChecked();
34 Settings::values.region_value = ui->region_combobox->currentIndex();
35 Settings::values.use_hw_renderer = ui->toogle_hw_renderer->isChecked();
36 Settings::values.use_shader_jit = ui->toogle_shader_jit->isChecked();
37 Settings::values.use_scaled_resolution = ui->toogle_scaled_resolution->isChecked();
38 Settings::Apply();
39}
diff --git a/src/citra_qt/configure_general.h b/src/citra_qt/configure_general.h
new file mode 100644
index 000000000..a6c68e62d
--- /dev/null
+++ b/src/citra_qt/configure_general.h
@@ -0,0 +1,29 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <QWidget>
9
10namespace Ui {
11class ConfigureGeneral;
12}
13
14class ConfigureGeneral : public QWidget
15{
16 Q_OBJECT
17
18public:
19 explicit ConfigureGeneral(QWidget *parent = nullptr);
20 ~ConfigureGeneral();
21
22 void applyConfiguration();
23
24private:
25 void setConfiguration();
26
27private:
28 std::unique_ptr<Ui::ConfigureGeneral> ui;
29};
diff --git a/src/citra_qt/configure_general.ui b/src/citra_qt/configure_general.ui
new file mode 100644
index 000000000..5eb309793
--- /dev/null
+++ b/src/citra_qt/configure_general.ui
@@ -0,0 +1,173 @@
1<?xml version="1.0" encoding="UTF-8"?>
2<ui version="4.0">
3 <class>ConfigureGeneral</class>
4 <widget class="QWidget" name="ConfigureGeneral">
5 <property name="geometry">
6 <rect>
7 <x>0</x>
8 <y>0</y>
9 <width>300</width>
10 <height>377</height>
11 </rect>
12 </property>
13 <property name="windowTitle">
14 <string>Form</string>
15 </property>
16 <layout class="QHBoxLayout" name="horizontalLayout">
17 <item>
18 <layout class="QVBoxLayout" name="verticalLayout">
19 <item>
20 <widget class="QGroupBox" name="groupBox">
21 <property name="title">
22 <string>General</string>
23 </property>
24 <layout class="QHBoxLayout" name="horizontalLayout_3">
25 <item>
26 <layout class="QVBoxLayout" name="verticalLayout_2">
27 <item>
28 <widget class="QCheckBox" name="toogle_deepscan">
29 <property name="text">
30 <string>Recursive scan for game folder</string>
31 </property>
32 </widget>
33 </item>
34 <item>
35 <widget class="QCheckBox" name="toogle_check_exit">
36 <property name="text">
37 <string>Confirm exit while emulation is running</string>
38 </property>
39 </widget>
40 </item>
41 </layout>
42 </item>
43 </layout>
44 </widget>
45 </item>
46 <item>
47 <widget class="QGroupBox" name="groupBox_4">
48 <property name="title">
49 <string>Emulation</string>
50 </property>
51 <layout class="QHBoxLayout" name="horizontalLayout_5">
52 <item>
53 <layout class="QVBoxLayout" name="verticalLayout_6">
54 <item>
55 <layout class="QHBoxLayout" name="horizontalLayout_6">
56 <item>
57 <widget class="QLabel" name="label">
58 <property name="text">
59 <string>Region:</string>
60 </property>
61 </widget>
62 </item>
63 <item>
64 <widget class="QComboBox" name="region_combobox">
65 <item>
66 <property name="text">
67 <string notr="true">JPN</string>
68 </property>
69 </item>
70 <item>
71 <property name="text">
72 <string notr="true">USA</string>
73 </property>
74 </item>
75 <item>
76 <property name="text">
77 <string notr="true">EUR</string>
78 </property>
79 </item>
80 <item>
81 <property name="text">
82 <string notr="true">AUS</string>
83 </property>
84 </item>
85 <item>
86 <property name="text">
87 <string notr="true">CHN</string>
88 </property>
89 </item>
90 <item>
91 <property name="text">
92 <string notr="true">KOR</string>
93 </property>
94 </item>
95 <item>
96 <property name="text">
97 <string notr="true">TWN</string>
98 </property>
99 </item>
100 </widget>
101 </item>
102 </layout>
103 </item>
104 </layout>
105 </item>
106 </layout>
107 </widget>
108 </item>
109 <item>
110 <widget class="QGroupBox" name="groupBox_2">
111 <property name="title">
112 <string>Performance</string>
113 </property>
114 <layout class="QHBoxLayout" name="horizontalLayout_2">
115 <item>
116 <layout class="QVBoxLayout" name="verticalLayout_3">
117 <item>
118 <widget class="QCheckBox" name="toogle_hw_renderer">
119 <property name="text">
120 <string>Enable hardware renderer</string>
121 </property>
122 </widget>
123 </item>
124 <item>
125 <widget class="QCheckBox" name="toogle_shader_jit">
126 <property name="text">
127 <string>Enable shader JIT</string>
128 </property>
129 </widget>
130 </item>
131 <item>
132 <widget class="QCheckBox" name="toogle_scaled_resolution">
133 <property name="text">
134 <string>Enable scaled resolution</string>
135 </property>
136 </widget>
137 </item>
138 </layout>
139 </item>
140 </layout>
141 </widget>
142 </item>
143 <item>
144 <widget class="QGroupBox" name="groupBox_3">
145 <property name="title">
146 <string>Hotkeys</string>
147 </property>
148 <layout class="QHBoxLayout" name="horizontalLayout_4">
149 <item>
150 <layout class="QVBoxLayout" name="verticalLayout_4">
151 <item>
152 <widget class="GHotkeysDialog" name="widget" native="true"/>
153 </item>
154 </layout>
155 </item>
156 </layout>
157 </widget>
158 </item>
159 </layout>
160 </item>
161 </layout>
162 </widget>
163 <customwidgets>
164 <customwidget>
165 <class>GHotkeysDialog</class>
166 <extends>QWidget</extends>
167 <header>hotkeys.h</header>
168 <container>1</container>
169 </customwidget>
170 </customwidgets>
171 <resources/>
172 <connections/>
173</ui>
diff --git a/src/citra_qt/debugger/graphics_breakpoints.cpp b/src/citra_qt/debugger/graphics_breakpoints.cpp
index 819ec7707..fe66918a8 100644
--- a/src/citra_qt/debugger/graphics_breakpoints.cpp
+++ b/src/citra_qt/debugger/graphics_breakpoints.cpp
@@ -44,7 +44,7 @@ QVariant BreakPointModel::data(const QModelIndex& index, int role) const
44 { Pica::DebugContext::Event::PicaCommandProcessed, tr("Pica command processed") }, 44 { Pica::DebugContext::Event::PicaCommandProcessed, tr("Pica command processed") },
45 { Pica::DebugContext::Event::IncomingPrimitiveBatch, tr("Incoming primitive batch") }, 45 { Pica::DebugContext::Event::IncomingPrimitiveBatch, tr("Incoming primitive batch") },
46 { Pica::DebugContext::Event::FinishedPrimitiveBatch, tr("Finished primitive batch") }, 46 { Pica::DebugContext::Event::FinishedPrimitiveBatch, tr("Finished primitive batch") },
47 { Pica::DebugContext::Event::VertexLoaded, tr("Vertex loaded") }, 47 { Pica::DebugContext::Event::VertexShaderInvocation, tr("Vertex shader invocation") },
48 { Pica::DebugContext::Event::IncomingDisplayTransfer, tr("Incoming display transfer") }, 48 { Pica::DebugContext::Event::IncomingDisplayTransfer, tr("Incoming display transfer") },
49 { Pica::DebugContext::Event::GSPCommandProcessed, tr("GSP command processed") }, 49 { Pica::DebugContext::Event::GSPCommandProcessed, tr("GSP command processed") },
50 { Pica::DebugContext::Event::BufferSwapped, tr("Buffers swapped") } 50 { Pica::DebugContext::Event::BufferSwapped, tr("Buffers swapped") }
@@ -75,7 +75,7 @@ QVariant BreakPointModel::data(const QModelIndex& index, int role) const
75 case Role_IsEnabled: 75 case Role_IsEnabled:
76 { 76 {
77 auto context = context_weak.lock(); 77 auto context = context_weak.lock();
78 return context && context->breakpoints[event].enabled; 78 return context && context->breakpoints[(int)event].enabled;
79 } 79 }
80 80
81 default: 81 default:
@@ -110,7 +110,7 @@ bool BreakPointModel::setData(const QModelIndex& index, const QVariant& value, i
110 if (!context) 110 if (!context)
111 return false; 111 return false;
112 112
113 context->breakpoints[event].enabled = value == Qt::Checked; 113 context->breakpoints[(int)event].enabled = value == Qt::Checked;
114 QModelIndex changed_index = createIndex(index.row(), 0); 114 QModelIndex changed_index = createIndex(index.row(), 0);
115 emit dataChanged(changed_index, changed_index); 115 emit dataChanged(changed_index, changed_index);
116 return true; 116 return true;
diff --git a/src/citra_qt/debugger/graphics_framebuffer.cpp b/src/citra_qt/debugger/graphics_framebuffer.cpp
index c30e75933..68cff78b2 100644
--- a/src/citra_qt/debugger/graphics_framebuffer.cpp
+++ b/src/citra_qt/debugger/graphics_framebuffer.cpp
@@ -346,5 +346,11 @@ u32 GraphicsFramebufferWidget::BytesPerPixel(GraphicsFramebufferWidget::Format f
346 case Format::RGBA4: 346 case Format::RGBA4:
347 case Format::D16: 347 case Format::D16:
348 return 2; 348 return 2;
349 default:
350 UNREACHABLE_MSG("GraphicsFramebufferWidget::BytesPerPixel: this "
351 "should not be reached as this function should "
352 "be given a format which is in "
353 "GraphicsFramebufferWidget::Format. Instead got %i",
354 static_cast<int>(format));
349 } 355 }
350} 356}
diff --git a/src/citra_qt/debugger/graphics_tracing.cpp b/src/citra_qt/debugger/graphics_tracing.cpp
index e06498744..9c80f7ec9 100644
--- a/src/citra_qt/debugger/graphics_tracing.cpp
+++ b/src/citra_qt/debugger/graphics_tracing.cpp
@@ -2,6 +2,9 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
6#include <array>
7#include <iterator>
5#include <memory> 8#include <memory>
6 9
7#include <boost/range/algorithm/copy.hpp> 10#include <boost/range/algorithm/copy.hpp>
@@ -18,6 +21,7 @@
18 21
19#include "core/hw/gpu.h" 22#include "core/hw/gpu.h"
20#include "core/hw/lcd.h" 23#include "core/hw/lcd.h"
24#include "core/tracer/recorder.h"
21 25
22#include "nihstro/float24.h" 26#include "nihstro/float24.h"
23 27
@@ -70,7 +74,7 @@ void GraphicsTracingWidget::StartRecording() {
70 std::array<u32, 4 * 16> default_attributes; 74 std::array<u32, 4 * 16> default_attributes;
71 for (unsigned i = 0; i < 16; ++i) { 75 for (unsigned i = 0; i < 16; ++i) {
72 for (unsigned comp = 0; comp < 3; ++comp) { 76 for (unsigned comp = 0; comp < 3; ++comp) {
73 default_attributes[4 * i + comp] = nihstro::to_float24(Pica::g_state.vs.default_attributes[i][comp].ToFloat32()); 77 default_attributes[4 * i + comp] = nihstro::to_float24(Pica::g_state.vs_default_attributes[i][comp].ToFloat32());
74 } 78 }
75 } 79 }
76 80
diff --git a/src/citra_qt/debugger/graphics_vertex_shader.cpp b/src/citra_qt/debugger/graphics_vertex_shader.cpp
index d648d4640..391666d35 100644
--- a/src/citra_qt/debugger/graphics_vertex_shader.cpp
+++ b/src/citra_qt/debugger/graphics_vertex_shader.cpp
@@ -365,7 +365,7 @@ GraphicsVertexShaderWidget::GraphicsVertexShaderWidget(std::shared_ptr< Pica::De
365 input_data[i]->setValidator(new QDoubleValidator(input_data[i])); 365 input_data[i]->setValidator(new QDoubleValidator(input_data[i]));
366 } 366 }
367 367
368 breakpoint_warning = new QLabel(tr("(data only available at VertexLoaded breakpoints)")); 368 breakpoint_warning = new QLabel(tr("(data only available at vertex shader invocation breakpoints)"));
369 369
370 // TODO: Add some button for jumping to the shader entry point 370 // TODO: Add some button for jumping to the shader entry point
371 371
@@ -454,7 +454,7 @@ GraphicsVertexShaderWidget::GraphicsVertexShaderWidget(std::shared_ptr< Pica::De
454 454
455void GraphicsVertexShaderWidget::OnBreakPointHit(Pica::DebugContext::Event event, void* data) { 455void GraphicsVertexShaderWidget::OnBreakPointHit(Pica::DebugContext::Event event, void* data) {
456 auto input = static_cast<Pica::Shader::InputVertex*>(data); 456 auto input = static_cast<Pica::Shader::InputVertex*>(data);
457 if (event == Pica::DebugContext::Event::VertexLoaded) { 457 if (event == Pica::DebugContext::Event::VertexShaderInvocation) {
458 Reload(true, data); 458 Reload(true, data);
459 } else { 459 } else {
460 // No vertex data is retrievable => invalidate currently stored vertex data 460 // No vertex data is retrievable => invalidate currently stored vertex data
@@ -501,7 +501,7 @@ void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_d
501 info.labels.insert({ entry_point, "main" }); 501 info.labels.insert({ entry_point, "main" });
502 502
503 // Generate debug information 503 // Generate debug information
504 debug_data = Pica::Shader::ProduceDebugInfo(input_vertex, num_attributes, shader_config, shader_setup); 504 debug_data = Pica::g_state.vs.ProduceDebugInfo(input_vertex, num_attributes, shader_config, shader_setup);
505 505
506 // Reload widget state 506 // Reload widget state
507 for (int attr = 0; attr < num_attributes; ++attr) { 507 for (int attr = 0; attr < num_attributes; ++attr) {
@@ -515,7 +515,7 @@ void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_d
515 } 515 }
516 516
517 // Initialize debug info text for current cycle count 517 // Initialize debug info text for current cycle count
518 cycle_index->setMaximum(debug_data.records.size() - 1); 518 cycle_index->setMaximum(static_cast<int>(debug_data.records.size() - 1));
519 OnCycleIndexChanged(cycle_index->value()); 519 OnCycleIndexChanged(cycle_index->value());
520 520
521 model->endResetModel(); 521 model->endResetModel();
diff --git a/src/citra_qt/debugger/profiler.cpp b/src/citra_qt/debugger/profiler.cpp
index 4f6ba0e1f..585ac049a 100644
--- a/src/citra_qt/debugger/profiler.cpp
+++ b/src/citra_qt/debugger/profiler.cpp
@@ -9,13 +9,16 @@
9#include "citra_qt/debugger/profiler.h" 9#include "citra_qt/debugger/profiler.h"
10#include "citra_qt/util/util.h" 10#include "citra_qt/util/util.h"
11 11
12#include "common/common_types.h"
12#include "common/microprofile.h" 13#include "common/microprofile.h"
13#include "common/profiler_reporting.h" 14#include "common/profiler_reporting.h"
14 15
15// Include the implementation of the UI in this file. This isn't in microprofile.cpp because the 16// Include the implementation of the UI in this file. This isn't in microprofile.cpp because the
16// non-Qt frontends don't need it (and don't implement the UI drawing hooks either). 17// non-Qt frontends don't need it (and don't implement the UI drawing hooks either).
18#if MICROPROFILE_ENABLED
17#define MICROPROFILEUI_IMPL 1 19#define MICROPROFILEUI_IMPL 1
18#include "common/microprofileui.h" 20#include "common/microprofileui.h"
21#endif
19 22
20using namespace Common::Profiling; 23using namespace Common::Profiling;
21 24
@@ -34,21 +37,9 @@ static QVariant GetDataForColumn(int col, const AggregatedDuration& duration)
34 } 37 }
35} 38}
36 39
37static const TimingCategoryInfo* GetCategoryInfo(int id)
38{
39 const auto& categories = GetProfilingManager().GetTimingCategoriesInfo();
40 if ((size_t)id >= categories.size()) {
41 return nullptr;
42 } else {
43 return &categories[id];
44 }
45}
46
47ProfilerModel::ProfilerModel(QObject* parent) : QAbstractItemModel(parent) 40ProfilerModel::ProfilerModel(QObject* parent) : QAbstractItemModel(parent)
48{ 41{
49 updateProfilingInfo(); 42 updateProfilingInfo();
50 const auto& categories = GetProfilingManager().GetTimingCategoriesInfo();
51 results.time_per_category.resize(categories.size());
52} 43}
53 44
54QVariant ProfilerModel::headerData(int section, Qt::Orientation orientation, int role) const 45QVariant ProfilerModel::headerData(int section, Qt::Orientation orientation, int role) const
@@ -85,7 +76,7 @@ int ProfilerModel::rowCount(const QModelIndex& parent) const
85 if (parent.isValid()) { 76 if (parent.isValid()) {
86 return 0; 77 return 0;
87 } else { 78 } else {
88 return static_cast<int>(results.time_per_category.size() + 2); 79 return 2;
89 } 80 }
90} 81}
91 82
@@ -104,17 +95,6 @@ QVariant ProfilerModel::data(const QModelIndex& index, int role) const
104 } else { 95 } else {
105 return GetDataForColumn(index.column(), results.interframe_time); 96 return GetDataForColumn(index.column(), results.interframe_time);
106 } 97 }
107 } else {
108 if (index.column() == 0) {
109 const TimingCategoryInfo* info = GetCategoryInfo(index.row() - 2);
110 return info != nullptr ? QString(info->name) : QVariant();
111 } else {
112 if (index.row() - 2 < (int)results.time_per_category.size()) {
113 return GetDataForColumn(index.column(), results.time_per_category[index.row() - 2]);
114 } else {
115 return QVariant();
116 }
117 }
118 } 98 }
119 } 99 }
120 100
@@ -148,6 +128,8 @@ void ProfilerWidget::setProfilingInfoUpdateEnabled(bool enable)
148 } 128 }
149} 129}
150 130
131#if MICROPROFILE_ENABLED
132
151class MicroProfileWidget : public QWidget { 133class MicroProfileWidget : public QWidget {
152public: 134public:
153 MicroProfileWidget(QWidget* parent = nullptr); 135 MicroProfileWidget(QWidget* parent = nullptr);
@@ -169,8 +151,12 @@ private:
169 /// This timer is used to redraw the widget's contents continuously. To save resources, it only 151 /// This timer is used to redraw the widget's contents continuously. To save resources, it only
170 /// runs while the widget is visible. 152 /// runs while the widget is visible.
171 QTimer update_timer; 153 QTimer update_timer;
154 /// Scale the coordinate system appropriately when physical DPI != logical DPI.
155 qreal x_scale, y_scale;
172}; 156};
173 157
158#endif
159
174MicroProfileDialog::MicroProfileDialog(QWidget* parent) 160MicroProfileDialog::MicroProfileDialog(QWidget* parent)
175 : QWidget(parent, Qt::Dialog) 161 : QWidget(parent, Qt::Dialog)
176{ 162{
@@ -180,6 +166,8 @@ MicroProfileDialog::MicroProfileDialog(QWidget* parent)
180 // Remove the "?" button from the titlebar and enable the maximize button 166 // Remove the "?" button from the titlebar and enable the maximize button
181 setWindowFlags(windowFlags() & ~Qt::WindowContextHelpButtonHint | Qt::WindowMaximizeButtonHint); 167 setWindowFlags(windowFlags() & ~Qt::WindowContextHelpButtonHint | Qt::WindowMaximizeButtonHint);
182 168
169#if MICROPROFILE_ENABLED
170
183 MicroProfileWidget* widget = new MicroProfileWidget(this); 171 MicroProfileWidget* widget = new MicroProfileWidget(this);
184 172
185 QLayout* layout = new QVBoxLayout(this); 173 QLayout* layout = new QVBoxLayout(this);
@@ -191,6 +179,7 @@ MicroProfileDialog::MicroProfileDialog(QWidget* parent)
191 setFocusProxy(widget); 179 setFocusProxy(widget);
192 widget->setFocusPolicy(Qt::StrongFocus); 180 widget->setFocusPolicy(Qt::StrongFocus);
193 widget->setFocus(); 181 widget->setFocus();
182#endif
194} 183}
195 184
196QAction* MicroProfileDialog::toggleViewAction() { 185QAction* MicroProfileDialog::toggleViewAction() {
@@ -218,6 +207,9 @@ void MicroProfileDialog::hideEvent(QHideEvent* ev) {
218 QWidget::hideEvent(ev); 207 QWidget::hideEvent(ev);
219} 208}
220 209
210
211#if MICROPROFILE_ENABLED
212
221/// There's no way to pass a user pointer to MicroProfile, so this variable is used to make the 213/// There's no way to pass a user pointer to MicroProfile, so this variable is used to make the
222/// QPainter available inside the drawing callbacks. 214/// QPainter available inside the drawing callbacks.
223static QPainter* mp_painter = nullptr; 215static QPainter* mp_painter = nullptr;
@@ -230,11 +222,17 @@ MicroProfileWidget::MicroProfileWidget(QWidget* parent) : QWidget(parent) {
230 MicroProfileInitUI(); 222 MicroProfileInitUI();
231 223
232 connect(&update_timer, SIGNAL(timeout()), SLOT(update())); 224 connect(&update_timer, SIGNAL(timeout()), SLOT(update()));
225
226 QPainter painter(this);
227 x_scale = qreal(painter.device()->physicalDpiX()) / qreal(painter.device()->logicalDpiX());
228 y_scale = qreal(painter.device()->physicalDpiY()) / qreal(painter.device()->logicalDpiY());
233} 229}
234 230
235void MicroProfileWidget::paintEvent(QPaintEvent* ev) { 231void MicroProfileWidget::paintEvent(QPaintEvent* ev) {
236 QPainter painter(this); 232 QPainter painter(this);
237 233
234 painter.scale(x_scale, y_scale);
235
238 painter.setBackground(Qt::black); 236 painter.setBackground(Qt::black);
239 painter.eraseRect(rect()); 237 painter.eraseRect(rect());
240 238
@@ -258,24 +256,24 @@ void MicroProfileWidget::hideEvent(QHideEvent* ev) {
258} 256}
259 257
260void MicroProfileWidget::mouseMoveEvent(QMouseEvent* ev) { 258void MicroProfileWidget::mouseMoveEvent(QMouseEvent* ev) {
261 MicroProfileMousePosition(ev->x(), ev->y(), 0); 259 MicroProfileMousePosition(ev->x() / x_scale, ev->y() / y_scale, 0);
262 ev->accept(); 260 ev->accept();
263} 261}
264 262
265void MicroProfileWidget::mousePressEvent(QMouseEvent* ev) { 263void MicroProfileWidget::mousePressEvent(QMouseEvent* ev) {
266 MicroProfileMousePosition(ev->x(), ev->y(), 0); 264 MicroProfileMousePosition(ev->x() / x_scale, ev->y() / y_scale, 0);
267 MicroProfileMouseButton(ev->buttons() & Qt::LeftButton, ev->buttons() & Qt::RightButton); 265 MicroProfileMouseButton(ev->buttons() & Qt::LeftButton, ev->buttons() & Qt::RightButton);
268 ev->accept(); 266 ev->accept();
269} 267}
270 268
271void MicroProfileWidget::mouseReleaseEvent(QMouseEvent* ev) { 269void MicroProfileWidget::mouseReleaseEvent(QMouseEvent* ev) {
272 MicroProfileMousePosition(ev->x(), ev->y(), 0); 270 MicroProfileMousePosition(ev->x() / x_scale, ev->y() / y_scale, 0);
273 MicroProfileMouseButton(ev->buttons() & Qt::LeftButton, ev->buttons() & Qt::RightButton); 271 MicroProfileMouseButton(ev->buttons() & Qt::LeftButton, ev->buttons() & Qt::RightButton);
274 ev->accept(); 272 ev->accept();
275} 273}
276 274
277void MicroProfileWidget::wheelEvent(QWheelEvent* ev) { 275void MicroProfileWidget::wheelEvent(QWheelEvent* ev) {
278 MicroProfileMousePosition(ev->x(), ev->y(), ev->delta() / 120); 276 MicroProfileMousePosition(ev->x() / x_scale, ev->y() / y_scale, ev->delta() / 120);
279 ev->accept(); 277 ev->accept();
280} 278}
281 279
@@ -337,3 +335,4 @@ void MicroProfileDrawLine2D(u32 vertices_length, float* vertices, u32 hex_color)
337 mp_painter->drawPolyline(point_buf.data(), vertices_length); 335 mp_painter->drawPolyline(point_buf.data(), vertices_length);
338 point_buf.clear(); 336 point_buf.clear();
339} 337}
338#endif
diff --git a/src/citra_qt/debugger/profiler.h b/src/citra_qt/debugger/profiler.h
index 036054740..3b38ed8ec 100644
--- a/src/citra_qt/debugger/profiler.h
+++ b/src/citra_qt/debugger/profiler.h
@@ -7,8 +7,10 @@
7#include <QAbstractItemModel> 7#include <QAbstractItemModel>
8#include <QDockWidget> 8#include <QDockWidget>
9#include <QTimer> 9#include <QTimer>
10
10#include "ui_profiler.h" 11#include "ui_profiler.h"
11 12
13#include "common/microprofile.h"
12#include "common/profiler_reporting.h" 14#include "common/profiler_reporting.h"
13 15
14class ProfilerModel : public QAbstractItemModel 16class ProfilerModel : public QAbstractItemModel
@@ -49,6 +51,7 @@ private:
49 QTimer update_timer; 51 QTimer update_timer;
50}; 52};
51 53
54
52class MicroProfileDialog : public QWidget { 55class MicroProfileDialog : public QWidget {
53 Q_OBJECT 56 Q_OBJECT
54 57
diff --git a/src/citra_qt/game_list.cpp b/src/citra_qt/game_list.cpp
index ffcab1f03..d4ac9c96e 100644
--- a/src/citra_qt/game_list.cpp
+++ b/src/citra_qt/game_list.cpp
@@ -8,6 +8,7 @@
8 8
9#include "game_list.h" 9#include "game_list.h"
10#include "game_list_p.h" 10#include "game_list_p.h"
11#include "ui_settings.h"
11 12
12#include "core/loader/loader.h" 13#include "core/loader/loader.h"
13 14
@@ -33,8 +34,8 @@ GameList::GameList(QWidget* parent)
33 tree_view->setUniformRowHeights(true); 34 tree_view->setUniformRowHeights(true);
34 35
35 item_model->insertColumns(0, COLUMN_COUNT); 36 item_model->insertColumns(0, COLUMN_COUNT);
36 item_model->setHeaderData(COLUMN_FILE_TYPE, Qt::Horizontal, "File type");
37 item_model->setHeaderData(COLUMN_NAME, Qt::Horizontal, "Name"); 37 item_model->setHeaderData(COLUMN_NAME, Qt::Horizontal, "Name");
38 item_model->setHeaderData(COLUMN_FILE_TYPE, Qt::Horizontal, "File type");
38 item_model->setHeaderData(COLUMN_SIZE, Qt::Horizontal, "Size"); 39 item_model->setHeaderData(COLUMN_SIZE, Qt::Horizontal, "Size");
39 40
40 connect(tree_view, SIGNAL(activated(const QModelIndex&)), this, SLOT(ValidateEntry(const QModelIndex&))); 41 connect(tree_view, SIGNAL(activated(const QModelIndex&)), this, SLOT(ValidateEntry(const QModelIndex&)));
@@ -100,19 +101,19 @@ void GameList::PopulateAsync(const QString& dir_path, bool deep_scan)
100 current_worker = std::move(worker); 101 current_worker = std::move(worker);
101} 102}
102 103
103void GameList::SaveInterfaceLayout(QSettings& settings) 104void GameList::SaveInterfaceLayout()
104{ 105{
105 settings.beginGroup("UILayout"); 106 UISettings::values.gamelist_header_state = tree_view->header()->saveState();
106 settings.setValue("gameListHeaderState", tree_view->header()->saveState());
107 settings.endGroup();
108} 107}
109 108
110void GameList::LoadInterfaceLayout(QSettings& settings) 109void GameList::LoadInterfaceLayout()
111{ 110{
112 auto header = tree_view->header(); 111 auto header = tree_view->header();
113 settings.beginGroup("UILayout"); 112 if (!header->restoreState(UISettings::values.gamelist_header_state)) {
114 header->restoreState(settings.value("gameListHeaderState").toByteArray()); 113 // We are using the name column to display icons and titles
115 settings.endGroup(); 114 // so make it as large as possible as default.
115 header->resizeSection(COLUMN_NAME, header->width());
116 }
116 117
117 item_model->sort(header->sortIndicatorSection(), header->sortIndicatorOrder()); 118 item_model->sort(header->sortIndicatorSection(), header->sortIndicatorOrder());
118} 119}
@@ -146,9 +147,15 @@ void GameListWorker::AddFstEntriesToGameList(const std::string& dir_path, bool d
146 LOG_WARNING(Frontend, "Filetype and extension of file %s do not match.", physical_name.c_str()); 147 LOG_WARNING(Frontend, "Filetype and extension of file %s do not match.", physical_name.c_str());
147 } 148 }
148 149
150 std::vector<u8> smdh;
151 std::unique_ptr<Loader::AppLoader> loader = Loader::GetLoader(FileUtil::IOFile(physical_name, "rb"), filetype, filename_filename, physical_name);
152
153 if (loader)
154 loader->ReadIcon(smdh);
155
149 emit EntryReady({ 156 emit EntryReady({
157 new GameListItemPath(QString::fromStdString(physical_name), smdh),
150 new GameListItem(QString::fromStdString(Loader::GetFileTypeString(filetype))), 158 new GameListItem(QString::fromStdString(Loader::GetFileTypeString(filetype))),
151 new GameListItemPath(QString::fromStdString(physical_name)),
152 new GameListItemSize(FileUtil::GetSize(physical_name)), 159 new GameListItemSize(FileUtil::GetSize(physical_name)),
153 }); 160 });
154 } 161 }
diff --git a/src/citra_qt/game_list.h b/src/citra_qt/game_list.h
index 0950d9622..198674f04 100644
--- a/src/citra_qt/game_list.h
+++ b/src/citra_qt/game_list.h
@@ -20,8 +20,8 @@ class GameList : public QWidget {
20 20
21public: 21public:
22 enum { 22 enum {
23 COLUMN_FILE_TYPE,
24 COLUMN_NAME, 23 COLUMN_NAME,
24 COLUMN_FILE_TYPE,
25 COLUMN_SIZE, 25 COLUMN_SIZE,
26 COLUMN_COUNT, // Number of columns 26 COLUMN_COUNT, // Number of columns
27 }; 27 };
@@ -31,8 +31,8 @@ public:
31 31
32 void PopulateAsync(const QString& dir_path, bool deep_scan); 32 void PopulateAsync(const QString& dir_path, bool deep_scan);
33 33
34 void SaveInterfaceLayout(QSettings& settings); 34 void SaveInterfaceLayout();
35 void LoadInterfaceLayout(QSettings& settings); 35 void LoadInterfaceLayout();
36 36
37public slots: 37public slots:
38 void AddEntry(QList<QStandardItem*> entry_items); 38 void AddEntry(QList<QStandardItem*> entry_items);
diff --git a/src/citra_qt/game_list_p.h b/src/citra_qt/game_list_p.h
index 820012bce..284f5da81 100644
--- a/src/citra_qt/game_list_p.h
+++ b/src/citra_qt/game_list_p.h
@@ -6,13 +6,85 @@
6 6
7#include <atomic> 7#include <atomic>
8 8
9#include <QImage>
9#include <QRunnable> 10#include <QRunnable>
10#include <QStandardItem> 11#include <QStandardItem>
11#include <QString> 12#include <QString>
12 13
13#include "citra_qt/util/util.h" 14#include "citra_qt/util/util.h"
14#include "common/string_util.h" 15#include "common/string_util.h"
16#include "common/color.h"
15 17
18#include "core/loader/loader.h"
19
20#include "video_core/utils.h"
21
22/**
23 * Tests if data is a valid SMDH by its length and magic number.
24 * @param smdh_data data buffer to test
25 * @return bool test result
26 */
27static bool IsValidSMDH(const std::vector<u8>& smdh_data) {
28 if (smdh_data.size() < sizeof(Loader::SMDH))
29 return false;
30
31 u32 magic;
32 memcpy(&magic, smdh_data.data(), 4);
33
34 return Loader::MakeMagic('S', 'M', 'D', 'H') == magic;
35}
36
37/**
38 * Gets game icon from SMDH
39 * @param sdmh SMDH data
40 * @param large If true, returns large icon (48x48), otherwise returns small icon (24x24)
41 * @return QPixmap game icon
42 */
43static QPixmap GetIconFromSMDH(const Loader::SMDH& smdh, bool large) {
44 u32 size;
45 const u8* icon_data;
46
47 if (large) {
48 size = 48;
49 icon_data = smdh.large_icon.data();
50 } else {
51 size = 24;
52 icon_data = smdh.small_icon.data();
53 }
54
55 QImage icon(size, size, QImage::Format::Format_RGB888);
56 for (u32 x = 0; x < size; ++x) {
57 for (u32 y = 0; y < size; ++y) {
58 u32 coarse_y = y & ~7;
59 auto v = Color::DecodeRGB565(
60 icon_data + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * size * 2);
61 icon.setPixel(x, y, qRgb(v.r(), v.g(), v.b()));
62 }
63 }
64 return QPixmap::fromImage(icon);
65}
66
67/**
68 * Gets the default icon (for games without valid SMDH)
69 * @param large If true, returns large icon (48x48), otherwise returns small icon (24x24)
70 * @return QPixmap default icon
71 */
72static QPixmap GetDefaultIcon(bool large) {
73 int size = large ? 48 : 24;
74 QPixmap icon(size, size);
75 icon.fill(Qt::transparent);
76 return icon;
77}
78
79/**
80 * Gets the short game title fromn SMDH
81 * @param sdmh SMDH data
82 * @param language title language
83 * @return QString short title
84 */
85static QString GetShortTitleFromSMDH(const Loader::SMDH& smdh, Loader::SMDH::TitleLanguage language) {
86 return QString::fromUtf16(smdh.titles[static_cast<int>(language)].short_title.data());
87}
16 88
17class GameListItem : public QStandardItem { 89class GameListItem : public QStandardItem {
18 90
@@ -27,29 +99,43 @@ public:
27 * A specialization of GameListItem for path values. 99 * A specialization of GameListItem for path values.
28 * This class ensures that for every full path value it holds, a correct string representation 100 * This class ensures that for every full path value it holds, a correct string representation
29 * of just the filename (with no extension) will be displayed to the user. 101 * of just the filename (with no extension) will be displayed to the user.
102 * If this class recieves valid SMDH data, it will also display game icons and titles.
30 */ 103 */
31class GameListItemPath : public GameListItem { 104class GameListItemPath : public GameListItem {
32 105
33public: 106public:
34 static const int FullPathRole = Qt::UserRole + 1; 107 static const int FullPathRole = Qt::UserRole + 1;
108 static const int TitleRole = Qt::UserRole + 2;
35 109
36 GameListItemPath(): GameListItem() {} 110 GameListItemPath(): GameListItem() {}
37 GameListItemPath(const QString& game_path): GameListItem() 111 GameListItemPath(const QString& game_path, const std::vector<u8>& smdh_data): GameListItem()
38 { 112 {
39 setData(game_path, FullPathRole); 113 setData(game_path, FullPathRole);
114
115 if (!IsValidSMDH(smdh_data)) {
116 // SMDH is not valid, set a default icon
117 setData(GetDefaultIcon(true), Qt::DecorationRole);
118 return;
119 }
120
121 Loader::SMDH smdh;
122 memcpy(&smdh, smdh_data.data(), sizeof(Loader::SMDH));
123
124 // Get icon from SMDH
125 setData(GetIconFromSMDH(smdh, true), Qt::DecorationRole);
126
127 // Get title form SMDH
128 setData(GetShortTitleFromSMDH(smdh, Loader::SMDH::TitleLanguage::English), TitleRole);
40 } 129 }
41 130
42 void setData(const QVariant& value, int role) override 131 QVariant data(int role) const override {
43 { 132 if (role == Qt::DisplayRole) {
44 // By specializing setData for FullPathRole, we can ensure that the two string
45 // representations of the data are always accurate and in the correct format.
46 if (role == FullPathRole) {
47 std::string filename; 133 std::string filename;
48 Common::SplitPath(value.toString().toStdString(), nullptr, &filename, nullptr); 134 Common::SplitPath(data(FullPathRole).toString().toStdString(), nullptr, &filename, nullptr);
49 GameListItem::setData(QString::fromStdString(filename), Qt::DisplayRole); 135 QString title = data(TitleRole).toString();
50 GameListItem::setData(value, FullPathRole); 136 return QString::fromStdString(filename) + (title.isEmpty() ? "" : "\n " + title);
51 } else { 137 } else {
52 GameListItem::setData(value, role); 138 return GameListItem::data(role);
53 } 139 }
54 } 140 }
55}; 141};
diff --git a/src/citra_qt/hotkeys.cpp b/src/citra_qt/hotkeys.cpp
index ed6b12fc4..41f95c63d 100644
--- a/src/citra_qt/hotkeys.cpp
+++ b/src/citra_qt/hotkeys.cpp
@@ -4,11 +4,12 @@
4 4
5#include <map> 5#include <map>
6 6
7#include <QtGlobal>
7#include <QKeySequence> 8#include <QKeySequence>
8#include <QSettings>
9#include <QShortcut> 9#include <QShortcut>
10 10
11#include "citra_qt/hotkeys.h" 11#include "citra_qt/hotkeys.h"
12#include "citra_qt/ui_settings.h"
12 13
13struct Hotkey 14struct Hotkey
14{ 15{
@@ -24,54 +25,39 @@ typedef std::map<QString, HotkeyMap> HotkeyGroupMap;
24 25
25HotkeyGroupMap hotkey_groups; 26HotkeyGroupMap hotkey_groups;
26 27
27void SaveHotkeys(QSettings& settings) 28void SaveHotkeys()
28{ 29{
29 settings.beginGroup("Shortcuts"); 30 UISettings::values.shortcuts.clear();
30
31 for (auto group : hotkey_groups) 31 for (auto group : hotkey_groups)
32 { 32 {
33 settings.beginGroup(group.first);
34 for (auto hotkey : group.second) 33 for (auto hotkey : group.second)
35 { 34 {
36 settings.beginGroup(hotkey.first); 35 UISettings::values.shortcuts.emplace_back(
37 settings.setValue(QString("KeySeq"), hotkey.second.keyseq.toString()); 36 UISettings::Shortcut(group.first + "/" + hotkey.first,
38 settings.setValue(QString("Context"), hotkey.second.context); 37 UISettings::ContextualShortcut(hotkey.second.keyseq.toString(),
39 settings.endGroup(); 38 hotkey.second.context)));
40 } 39 }
41 settings.endGroup();
42 } 40 }
43 settings.endGroup();
44} 41}
45 42
46void LoadHotkeys(QSettings& settings) 43void LoadHotkeys()
47{ 44{
48 settings.beginGroup("Shortcuts");
49
50 // Make sure NOT to use a reference here because it would become invalid once we call beginGroup() 45 // Make sure NOT to use a reference here because it would become invalid once we call beginGroup()
51 QStringList groups = settings.childGroups(); 46 for (auto shortcut : UISettings::values.shortcuts)
52 for (auto group : groups)
53 { 47 {
54 settings.beginGroup(group); 48 QStringList cat = shortcut.first.split("/");
49 Q_ASSERT(cat.size() >= 2);
55 50
56 QStringList hotkeys = settings.childGroups(); 51 // RegisterHotkey assigns default keybindings, so use old values as default parameters
57 for (auto hotkey : hotkeys) 52 Hotkey& hk = hotkey_groups[cat[0]][cat[1]];
53 if (!shortcut.second.first.isEmpty())
58 { 54 {
59 settings.beginGroup(hotkey); 55 hk.keyseq = QKeySequence::fromString(shortcut.second.first);
60 56 hk.context = (Qt::ShortcutContext)shortcut.second.second;
61 // RegisterHotkey assigns default keybindings, so use old values as default parameters
62 Hotkey& hk = hotkey_groups[group][hotkey];
63 hk.keyseq = QKeySequence::fromString(settings.value("KeySeq", hk.keyseq.toString()).toString());
64 hk.context = (Qt::ShortcutContext)settings.value("Context", hk.context).toInt();
65 if (hk.shortcut)
66 hk.shortcut->setKey(hk.keyseq);
67
68 settings.endGroup();
69 } 57 }
70 58 if (hk.shortcut)
71 settings.endGroup(); 59 hk.shortcut->setKey(hk.keyseq);
72 } 60 }
73
74 settings.endGroup();
75} 61}
76 62
77void RegisterHotkey(const QString& group, const QString& action, const QKeySequence& default_keyseq, Qt::ShortcutContext default_context) 63void RegisterHotkey(const QString& group, const QString& action, const QKeySequence& default_keyseq, Qt::ShortcutContext default_context)
@@ -94,7 +80,7 @@ QShortcut* GetHotkey(const QString& group, const QString& action, QWidget* widge
94} 80}
95 81
96 82
97GHotkeysDialog::GHotkeysDialog(QWidget* parent): QDialog(parent) 83GHotkeysDialog::GHotkeysDialog(QWidget* parent): QWidget(parent)
98{ 84{
99 ui.setupUi(this); 85 ui.setupUi(this);
100 86
diff --git a/src/citra_qt/hotkeys.h b/src/citra_qt/hotkeys.h
index 2fe635882..38aa5f012 100644
--- a/src/citra_qt/hotkeys.h
+++ b/src/citra_qt/hotkeys.h
@@ -2,6 +2,8 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#pragma once
6
5#include "ui_hotkeys.h" 7#include "ui_hotkeys.h"
6 8
7class QDialog; 9class QDialog;
@@ -33,16 +35,16 @@ QShortcut* GetHotkey(const QString& group, const QString& action, QWidget* widge
33 * 35 *
34 * @note Each hotkey group will be stored a settings group; For each hotkey inside that group, a settings group will be created to store the key sequence and the hotkey context. 36 * @note Each hotkey group will be stored a settings group; For each hotkey inside that group, a settings group will be created to store the key sequence and the hotkey context.
35 */ 37 */
36void SaveHotkeys(QSettings& settings); 38void SaveHotkeys();
37 39
38/** 40/**
39 * Loads hotkeys from the settings file. 41 * Loads hotkeys from the settings file.
40 * 42 *
41 * @note Yet unregistered hotkeys which are present in the settings will automatically be registered. 43 * @note Yet unregistered hotkeys which are present in the settings will automatically be registered.
42 */ 44 */
43void LoadHotkeys(QSettings& settings); 45void LoadHotkeys();
44 46
45class GHotkeysDialog : public QDialog 47class GHotkeysDialog : public QWidget
46{ 48{
47 Q_OBJECT 49 Q_OBJECT
48 50
diff --git a/src/citra_qt/hotkeys.ui b/src/citra_qt/hotkeys.ui
index 38a9a14d1..050fe064e 100644
--- a/src/citra_qt/hotkeys.ui
+++ b/src/citra_qt/hotkeys.ui
@@ -1,7 +1,7 @@
1<?xml version="1.0" encoding="UTF-8"?> 1<?xml version="1.0" encoding="UTF-8"?>
2<ui version="4.0"> 2<ui version="4.0">
3 <class>hotkeys</class> 3 <class>hotkeys</class>
4 <widget class="QDialog" name="hotkeys"> 4 <widget class="QWidget" name="hotkeys">
5 <property name="geometry"> 5 <property name="geometry">
6 <rect> 6 <rect>
7 <x>0</x> 7 <x>0</x>
@@ -39,51 +39,8 @@
39 </column> 39 </column>
40 </widget> 40 </widget>
41 </item> 41 </item>
42 <item>
43 <widget class="QDialogButtonBox" name="buttonBox">
44 <property name="orientation">
45 <enum>Qt::Horizontal</enum>
46 </property>
47 <property name="standardButtons">
48 <set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok|QDialogButtonBox::Reset</set>
49 </property>
50 </widget>
51 </item>
52 </layout> 42 </layout>
53 </widget> 43 </widget>
54 <resources/> 44 <resources/>
55 <connections> 45 <connections/>
56 <connection>
57 <sender>buttonBox</sender>
58 <signal>accepted()</signal>
59 <receiver>hotkeys</receiver>
60 <slot>accept()</slot>
61 <hints>
62 <hint type="sourcelabel">
63 <x>248</x>
64 <y>254</y>
65 </hint>
66 <hint type="destinationlabel">
67 <x>157</x>
68 <y>274</y>
69 </hint>
70 </hints>
71 </connection>
72 <connection>
73 <sender>buttonBox</sender>
74 <signal>rejected()</signal>
75 <receiver>hotkeys</receiver>
76 <slot>reject()</slot>
77 <hints>
78 <hint type="sourcelabel">
79 <x>316</x>
80 <y>260</y>
81 </hint>
82 <hint type="destinationlabel">
83 <x>286</x>
84 <y>274</y>
85 </hint>
86 </hints>
87 </connection>
88 </connections>
89</ui> 46</ui>
diff --git a/src/citra_qt/main.cpp b/src/citra_qt/main.cpp
index ca0ae6f7b..a85c94a4b 100644
--- a/src/citra_qt/main.cpp
+++ b/src/citra_qt/main.cpp
@@ -6,6 +6,9 @@
6#include <memory> 6#include <memory>
7#include <thread> 7#include <thread>
8 8
9#include <glad/glad.h>
10
11#define QT_NO_OPENGL
9#include <QDesktopWidget> 12#include <QDesktopWidget>
10#include <QtGui> 13#include <QtGui>
11#include <QFileDialog> 14#include <QFileDialog>
@@ -14,9 +17,11 @@
14 17
15#include "citra_qt/bootmanager.h" 18#include "citra_qt/bootmanager.h"
16#include "citra_qt/config.h" 19#include "citra_qt/config.h"
20#include "citra_qt/configure_dialog.h"
17#include "citra_qt/game_list.h" 21#include "citra_qt/game_list.h"
18#include "citra_qt/hotkeys.h" 22#include "citra_qt/hotkeys.h"
19#include "citra_qt/main.h" 23#include "citra_qt/main.h"
24#include "citra_qt/ui_settings.h"
20 25
21// Debugger 26// Debugger
22#include "citra_qt/debugger/callstack.h" 27#include "citra_qt/debugger/callstack.h"
@@ -50,12 +55,10 @@
50 55
51#include "video_core/video_core.h" 56#include "video_core/video_core.h"
52 57
53GMainWindow::GMainWindow() : emu_thread(nullptr) 58GMainWindow::GMainWindow() : config(new Config()), emu_thread(nullptr)
54{ 59{
55 Pica::g_debug_context = Pica::DebugContext::Construct(); 60 Pica::g_debug_context = Pica::DebugContext::Construct();
56 61
57 Config config;
58
59 ui.setupUi(this); 62 ui.setupUi(this);
60 statusBar()->hide(); 63 statusBar()->hide();
61 64
@@ -69,8 +72,10 @@ GMainWindow::GMainWindow() : emu_thread(nullptr)
69 addDockWidget(Qt::BottomDockWidgetArea, profilerWidget); 72 addDockWidget(Qt::BottomDockWidgetArea, profilerWidget);
70 profilerWidget->hide(); 73 profilerWidget->hide();
71 74
75#if MICROPROFILE_ENABLED
72 microProfileDialog = new MicroProfileDialog(this); 76 microProfileDialog = new MicroProfileDialog(this);
73 microProfileDialog->hide(); 77 microProfileDialog->hide();
78#endif
74 79
75 disasmWidget = new DisassemblerWidget(this, emu_thread.get()); 80 disasmWidget = new DisassemblerWidget(this, emu_thread.get());
76 addDockWidget(Qt::BottomDockWidgetArea, disasmWidget); 81 addDockWidget(Qt::BottomDockWidgetArea, disasmWidget);
@@ -110,7 +115,9 @@ GMainWindow::GMainWindow() : emu_thread(nullptr)
110 115
111 QMenu* debug_menu = ui.menu_View->addMenu(tr("Debugging")); 116 QMenu* debug_menu = ui.menu_View->addMenu(tr("Debugging"));
112 debug_menu->addAction(profilerWidget->toggleViewAction()); 117 debug_menu->addAction(profilerWidget->toggleViewAction());
118#if MICROPROFILE_ENABLED
113 debug_menu->addAction(microProfileDialog->toggleViewAction()); 119 debug_menu->addAction(microProfileDialog->toggleViewAction());
120#endif
114 debug_menu->addAction(disasmWidget->toggleViewAction()); 121 debug_menu->addAction(disasmWidget->toggleViewAction());
115 debug_menu->addAction(registersWidget->toggleViewAction()); 122 debug_menu->addAction(registersWidget->toggleViewAction());
116 debug_menu->addAction(callstackWidget->toggleViewAction()); 123 debug_menu->addAction(callstackWidget->toggleViewAction());
@@ -133,33 +140,20 @@ GMainWindow::GMainWindow() : emu_thread(nullptr)
133 setGeometry(x, y, w, h); 140 setGeometry(x, y, w, h);
134 141
135 // Restore UI state 142 // Restore UI state
136 QSettings settings; 143 restoreGeometry(UISettings::values.geometry);
137 144 restoreState(UISettings::values.state);
138 settings.beginGroup("UILayout"); 145 render_window->restoreGeometry(UISettings::values.renderwindow_geometry);
139 restoreGeometry(settings.value("geometry").toByteArray()); 146#if MICROPROFILE_ENABLED
140 restoreState(settings.value("state").toByteArray()); 147 microProfileDialog->restoreGeometry(UISettings::values.microprofile_geometry);
141 render_window->restoreGeometry(settings.value("geometryRenderWindow").toByteArray()); 148 microProfileDialog->setVisible(UISettings::values.microprofile_visible);
142 microProfileDialog->restoreGeometry(settings.value("microProfileDialogGeometry").toByteArray()); 149#endif
143 microProfileDialog->setVisible(settings.value("microProfileDialogVisible").toBool());
144 settings.endGroup();
145
146 game_list->LoadInterfaceLayout(settings);
147
148 ui.action_Use_Gdbstub->setChecked(Settings::values.use_gdbstub);
149 SetGdbstubEnabled(ui.action_Use_Gdbstub->isChecked());
150
151 GDBStub::SetServerPort(static_cast<u32>(Settings::values.gdbstub_port));
152
153 ui.action_Use_Hardware_Renderer->setChecked(Settings::values.use_hw_renderer);
154 SetHardwareRendererEnabled(ui.action_Use_Hardware_Renderer->isChecked());
155 150
156 ui.action_Use_Shader_JIT->setChecked(Settings::values.use_shader_jit); 151 game_list->LoadInterfaceLayout();
157 SetShaderJITEnabled(ui.action_Use_Shader_JIT->isChecked());
158 152
159 ui.action_Single_Window_Mode->setChecked(settings.value("singleWindowMode", true).toBool()); 153 ui.action_Single_Window_Mode->setChecked(UISettings::values.single_window_mode);
160 ToggleWindowMode(); 154 ToggleWindowMode();
161 155
162 ui.actionDisplay_widget_title_bars->setChecked(settings.value("displayTitleBars", true).toBool()); 156 ui.actionDisplay_widget_title_bars->setChecked(UISettings::values.display_titlebar);
163 OnDisplayTitleBars(ui.actionDisplay_widget_title_bars->isChecked()); 157 OnDisplayTitleBars(ui.actionDisplay_widget_title_bars->isChecked());
164 158
165 // Prepare actions for recent files 159 // Prepare actions for recent files
@@ -172,21 +166,16 @@ GMainWindow::GMainWindow() : emu_thread(nullptr)
172 } 166 }
173 UpdateRecentFiles(); 167 UpdateRecentFiles();
174 168
175 confirm_before_closing = settings.value("confirmClose", true).toBool();
176
177 // Setup connections 169 // Setup connections
178 connect(game_list, SIGNAL(GameChosen(QString)), this, SLOT(OnGameListLoadFile(QString))); 170 connect(game_list, SIGNAL(GameChosen(QString)), this, SLOT(OnGameListLoadFile(QString)), Qt::DirectConnection);
179 connect(ui.action_Load_File, SIGNAL(triggered()), this, SLOT(OnMenuLoadFile())); 171 connect(ui.action_Configure, SIGNAL(triggered()), this, SLOT(OnConfigure()));
172 connect(ui.action_Load_File, SIGNAL(triggered()), this, SLOT(OnMenuLoadFile()),Qt::DirectConnection);
180 connect(ui.action_Load_Symbol_Map, SIGNAL(triggered()), this, SLOT(OnMenuLoadSymbolMap())); 173 connect(ui.action_Load_Symbol_Map, SIGNAL(triggered()), this, SLOT(OnMenuLoadSymbolMap()));
181 connect(ui.action_Select_Game_List_Root, SIGNAL(triggered()), this, SLOT(OnMenuSelectGameListRoot())); 174 connect(ui.action_Select_Game_List_Root, SIGNAL(triggered()), this, SLOT(OnMenuSelectGameListRoot()));
182 connect(ui.action_Start, SIGNAL(triggered()), this, SLOT(OnStartGame())); 175 connect(ui.action_Start, SIGNAL(triggered()), this, SLOT(OnStartGame()));
183 connect(ui.action_Pause, SIGNAL(triggered()), this, SLOT(OnPauseGame())); 176 connect(ui.action_Pause, SIGNAL(triggered()), this, SLOT(OnPauseGame()));
184 connect(ui.action_Stop, SIGNAL(triggered()), this, SLOT(OnStopGame())); 177 connect(ui.action_Stop, SIGNAL(triggered()), this, SLOT(OnStopGame()));
185 connect(ui.action_Use_Hardware_Renderer, SIGNAL(triggered(bool)), this, SLOT(SetHardwareRendererEnabled(bool)));
186 connect(ui.action_Use_Shader_JIT, SIGNAL(triggered(bool)), this, SLOT(SetShaderJITEnabled(bool)));
187 connect(ui.action_Use_Gdbstub, SIGNAL(triggered(bool)), this, SLOT(SetGdbstubEnabled(bool)));
188 connect(ui.action_Single_Window_Mode, SIGNAL(triggered(bool)), this, SLOT(ToggleWindowMode())); 178 connect(ui.action_Single_Window_Mode, SIGNAL(triggered(bool)), this, SLOT(ToggleWindowMode()));
189 connect(ui.action_Hotkeys, SIGNAL(triggered()), this, SLOT(OnOpenHotkeysDialog()));
190 179
191 connect(this, SIGNAL(EmulationStarting(EmuThread*)), disasmWidget, SLOT(OnEmulationStarting(EmuThread*))); 180 connect(this, SIGNAL(EmulationStarting(EmuThread*)), disasmWidget, SLOT(OnEmulationStarting(EmuThread*)));
192 connect(this, SIGNAL(EmulationStopping()), disasmWidget, SLOT(OnEmulationStopping())); 181 connect(this, SIGNAL(EmulationStopping()), disasmWidget, SLOT(OnEmulationStopping()));
@@ -201,7 +190,7 @@ GMainWindow::GMainWindow() : emu_thread(nullptr)
201 // Setup hotkeys 190 // Setup hotkeys
202 RegisterHotkey("Main Window", "Load File", QKeySequence::Open); 191 RegisterHotkey("Main Window", "Load File", QKeySequence::Open);
203 RegisterHotkey("Main Window", "Start Emulation"); 192 RegisterHotkey("Main Window", "Start Emulation");
204 LoadHotkeys(settings); 193 LoadHotkeys();
205 194
206 connect(GetHotkey("Main Window", "Load File", this), SIGNAL(activated()), this, SLOT(OnMenuLoadFile())); 195 connect(GetHotkey("Main Window", "Load File", this), SIGNAL(activated()), this, SLOT(OnMenuLoadFile()));
207 connect(GetHotkey("Main Window", "Start Emulation", this), SIGNAL(activated()), this, SLOT(OnStartGame())); 196 connect(GetHotkey("Main Window", "Start Emulation", this), SIGNAL(activated()), this, SLOT(OnStartGame()));
@@ -211,7 +200,7 @@ GMainWindow::GMainWindow() : emu_thread(nullptr)
211 200
212 show(); 201 show();
213 202
214 game_list->PopulateAsync(settings.value("gameListRootDir", ".").toString(), settings.value("gameListDeepScan", false).toBool()); 203 game_list->PopulateAsync(UISettings::values.gamedir, UISettings::values.gamedir_deepscan);
215 204
216 QStringList args = QApplication::arguments(); 205 QStringList args = QApplication::arguments();
217 if (args.length() >= 2) { 206 if (args.length() >= 2) {
@@ -254,6 +243,14 @@ bool GMainWindow::InitializeSystem() {
254 if (emu_thread != nullptr) 243 if (emu_thread != nullptr)
255 ShutdownGame(); 244 ShutdownGame();
256 245
246 render_window->MakeCurrent();
247 if (!gladLoadGL()) {
248 QMessageBox::critical(this, tr("Error while starting Citra!"),
249 tr("Failed to initialize the video core!\n\n"
250 "Please ensure that your GPU supports OpenGL 3.3 and that you have the latest graphics driver."));
251 return false;
252 }
253
257 // Initialize the core emulation 254 // Initialize the core emulation
258 System::Result system_result = System::Init(render_window); 255 System::Result system_result = System::Init(render_window);
259 if (System::Result::Success != system_result) { 256 if (System::Result::Success != system_result) {
@@ -375,32 +372,24 @@ void GMainWindow::ShutdownGame() {
375 emulation_running = false; 372 emulation_running = false;
376} 373}
377 374
378void GMainWindow::StoreRecentFile(const std::string& filename) 375void GMainWindow::StoreRecentFile(const std::string& filename) {
379{ 376 UISettings::values.recent_files.prepend(QString::fromStdString(filename));
380 QSettings settings; 377 UISettings::values.recent_files.removeDuplicates();
381 QStringList recent_files = settings.value("recentFiles").toStringList(); 378 while (UISettings::values.recent_files.size() > max_recent_files_item) {
382 recent_files.prepend(QString::fromStdString(filename)); 379 UISettings::values.recent_files.removeLast();
383 recent_files.removeDuplicates();
384 while (recent_files.size() > max_recent_files_item) {
385 recent_files.removeLast();
386 } 380 }
387 381
388 settings.setValue("recentFiles", recent_files);
389
390 UpdateRecentFiles(); 382 UpdateRecentFiles();
391} 383}
392 384
393void GMainWindow::UpdateRecentFiles() { 385void GMainWindow::UpdateRecentFiles() {
394 QSettings settings; 386 unsigned int num_recent_files = std::min(UISettings::values.recent_files.size(), static_cast<int>(max_recent_files_item));
395 QStringList recent_files = settings.value("recentFiles").toStringList();
396
397 unsigned int num_recent_files = std::min(recent_files.size(), static_cast<int>(max_recent_files_item));
398 387
399 for (unsigned int i = 0; i < num_recent_files; i++) { 388 for (unsigned int i = 0; i < num_recent_files; i++) {
400 QString text = QString("&%1. %2").arg(i + 1).arg(QFileInfo(recent_files[i]).fileName()); 389 QString text = QString("&%1. %2").arg(i + 1).arg(QFileInfo(UISettings::values.recent_files[i]).fileName());
401 actions_recent_files[i]->setText(text); 390 actions_recent_files[i]->setText(text);
402 actions_recent_files[i]->setData(recent_files[i]); 391 actions_recent_files[i]->setData(UISettings::values.recent_files[i]);
403 actions_recent_files[i]->setToolTip(recent_files[i]); 392 actions_recent_files[i]->setToolTip(UISettings::values.recent_files[i]);
404 actions_recent_files[i]->setVisible(true); 393 actions_recent_files[i]->setVisible(true);
405 } 394 }
406 395
@@ -421,36 +410,28 @@ void GMainWindow::OnGameListLoadFile(QString game_path) {
421} 410}
422 411
423void GMainWindow::OnMenuLoadFile() { 412void GMainWindow::OnMenuLoadFile() {
424 QSettings settings; 413 QString filename = QFileDialog::getOpenFileName(this, tr("Load File"), UISettings::values.roms_path, tr("3DS executable (*.3ds *.3dsx *.elf *.axf *.cci *.cxi)"));
425 QString rom_path = settings.value("romsPath", QString()).toString();
426
427 QString filename = QFileDialog::getOpenFileName(this, tr("Load File"), rom_path, tr("3DS executable (*.3ds *.3dsx *.elf *.axf *.cci *.cxi)"));
428 if (!filename.isEmpty()) { 414 if (!filename.isEmpty()) {
429 settings.setValue("romsPath", QFileInfo(filename).path()); 415 UISettings::values.roms_path = QFileInfo(filename).path();
430 416
431 BootGame(filename.toStdString()); 417 BootGame(filename.toStdString());
432 } 418 }
433} 419}
434 420
435void GMainWindow::OnMenuLoadSymbolMap() { 421void GMainWindow::OnMenuLoadSymbolMap() {
436 QSettings settings; 422 QString filename = QFileDialog::getOpenFileName(this, tr("Load Symbol Map"), UISettings::values.symbols_path, tr("Symbol map (*)"));
437 QString symbol_path = settings.value("symbolsPath", QString()).toString();
438
439 QString filename = QFileDialog::getOpenFileName(this, tr("Load Symbol Map"), symbol_path, tr("Symbol map (*)"));
440 if (!filename.isEmpty()) { 423 if (!filename.isEmpty()) {
441 settings.setValue("symbolsPath", QFileInfo(filename).path()); 424 UISettings::values.symbols_path = QFileInfo(filename).path();
442 425
443 LoadSymbolMap(filename.toStdString()); 426 LoadSymbolMap(filename.toStdString());
444 } 427 }
445} 428}
446 429
447void GMainWindow::OnMenuSelectGameListRoot() { 430void GMainWindow::OnMenuSelectGameListRoot() {
448 QSettings settings;
449
450 QString dir_path = QFileDialog::getExistingDirectory(this, tr("Select Directory")); 431 QString dir_path = QFileDialog::getExistingDirectory(this, tr("Select Directory"));
451 if (!dir_path.isEmpty()) { 432 if (!dir_path.isEmpty()) {
452 settings.setValue("gameListRootDir", dir_path); 433 UISettings::values.gamedir = dir_path;
453 game_list->PopulateAsync(dir_path, settings.value("gameListDeepScan").toBool()); 434 game_list->PopulateAsync(dir_path, UISettings::values.gamedir_deepscan);
454 } 435 }
455} 436}
456 437
@@ -466,10 +447,7 @@ void GMainWindow::OnMenuRecentFile() {
466 // Display an error message and remove the file from the list. 447 // Display an error message and remove the file from the list.
467 QMessageBox::information(this, tr("File not found"), tr("File \"%1\" not found").arg(filename)); 448 QMessageBox::information(this, tr("File not found"), tr("File \"%1\" not found").arg(filename));
468 449
469 QSettings settings; 450 UISettings::values.recent_files.removeOne(filename);
470 QStringList recent_files = settings.value("recentFiles").toStringList();
471 recent_files.removeOne(filename);
472 settings.setValue("recentFiles", recent_files);
473 UpdateRecentFiles(); 451 UpdateRecentFiles();
474 } 452 }
475} 453}
@@ -496,31 +474,6 @@ void GMainWindow::OnStopGame() {
496 ShutdownGame(); 474 ShutdownGame();
497} 475}
498 476
499void GMainWindow::OnOpenHotkeysDialog() {
500 GHotkeysDialog dialog(this);
501 dialog.exec();
502}
503
504void GMainWindow::SetHardwareRendererEnabled(bool enabled) {
505 VideoCore::g_hw_renderer_enabled = enabled;
506
507 Config config;
508 Settings::values.use_hw_renderer = enabled;
509 config.Save();
510}
511
512void GMainWindow::SetGdbstubEnabled(bool enabled) {
513 GDBStub::ToggleServer(enabled);
514}
515
516void GMainWindow::SetShaderJITEnabled(bool enabled) {
517 VideoCore::g_shader_jit_enabled = enabled;
518
519 Config config;
520 Settings::values.use_shader_jit = enabled;
521 config.Save();
522}
523
524void GMainWindow::ToggleWindowMode() { 477void GMainWindow::ToggleWindowMode() {
525 if (ui.action_Single_Window_Mode->isChecked()) { 478 if (ui.action_Single_Window_Mode->isChecked()) {
526 // Render in the main window... 479 // Render in the main window...
@@ -547,11 +500,17 @@ void GMainWindow::ToggleWindowMode() {
547} 500}
548 501
549void GMainWindow::OnConfigure() { 502void GMainWindow::OnConfigure() {
550 //GControllerConfigDialog* dialog = new GControllerConfigDialog(controller_ports, this); 503 ConfigureDialog configureDialog(this);
504 auto result = configureDialog.exec();
505 if (result == QDialog::Accepted)
506 {
507 configureDialog.applyConfiguration();
508 config->Save();
509 }
551} 510}
552 511
553bool GMainWindow::ConfirmClose() { 512bool GMainWindow::ConfirmClose() {
554 if (emu_thread == nullptr || !confirm_before_closing) 513 if (emu_thread == nullptr || !UISettings::values.confirm_before_closing)
555 return true; 514 return true;
556 515
557 auto answer = QMessageBox::question(this, tr("Citra"), 516 auto answer = QMessageBox::question(this, tr("Citra"),
@@ -566,23 +525,19 @@ void GMainWindow::closeEvent(QCloseEvent* event) {
566 return; 525 return;
567 } 526 }
568 527
569 // Save window layout 528 UISettings::values.geometry = saveGeometry();
570 QSettings settings(QSettings::IniFormat, QSettings::UserScope, "Citra team", "Citra"); 529 UISettings::values.state = saveState();
571 530 UISettings::values.renderwindow_geometry = render_window->saveGeometry();
572 settings.beginGroup("UILayout"); 531#if MICROPROFILE_ENABLED
573 settings.setValue("geometry", saveGeometry()); 532 UISettings::values.microprofile_geometry = microProfileDialog->saveGeometry();
574 settings.setValue("state", saveState()); 533 UISettings::values.microprofile_visible = microProfileDialog->isVisible();
575 settings.setValue("geometryRenderWindow", render_window->saveGeometry()); 534#endif
576 settings.setValue("microProfileDialogGeometry", microProfileDialog->saveGeometry()); 535 UISettings::values.single_window_mode = ui.action_Single_Window_Mode->isChecked();
577 settings.setValue("microProfileDialogVisible", microProfileDialog->isVisible()); 536 UISettings::values.display_titlebar = ui.actionDisplay_widget_title_bars->isChecked();
578 settings.endGroup(); 537 UISettings::values.first_start = false;
579 538
580 settings.setValue("singleWindowMode", ui.action_Single_Window_Mode->isChecked()); 539 game_list->SaveInterfaceLayout();
581 settings.setValue("displayTitleBars", ui.actionDisplay_widget_title_bars->isChecked()); 540 SaveHotkeys();
582 settings.setValue("firstStart", false);
583 settings.setValue("confirmClose", confirm_before_closing);
584 game_list->SaveInterfaceLayout(settings);
585 SaveHotkeys(settings);
586 541
587 // Shutdown session if the emu thread is active... 542 // Shutdown session if the emu thread is active...
588 if (emu_thread != nullptr) 543 if (emu_thread != nullptr)
@@ -607,7 +562,6 @@ int main(int argc, char* argv[]) {
607 }); 562 });
608 563
609 // Init settings params 564 // Init settings params
610 QSettings::setDefaultFormat(QSettings::IniFormat);
611 QCoreApplication::setOrganizationName("Citra team"); 565 QCoreApplication::setOrganizationName("Citra team");
612 QCoreApplication::setApplicationName("Citra"); 566 QCoreApplication::setApplicationName("Citra");
613 567
diff --git a/src/citra_qt/main.h b/src/citra_qt/main.h
index 6e4e56689..477db5c5c 100644
--- a/src/citra_qt/main.h
+++ b/src/citra_qt/main.h
@@ -10,6 +10,7 @@
10 10
11#include "ui_main.h" 11#include "ui_main.h"
12 12
13class Config;
13class GameList; 14class GameList;
14class GImageInfo; 15class GImageInfo;
15class GRenderWindow; 16class GRenderWindow;
@@ -104,12 +105,8 @@ private slots:
104 /// Called whenever a user selects the "File->Select Game List Root" menu item 105 /// Called whenever a user selects the "File->Select Game List Root" menu item
105 void OnMenuSelectGameListRoot(); 106 void OnMenuSelectGameListRoot();
106 void OnMenuRecentFile(); 107 void OnMenuRecentFile();
107 void OnOpenHotkeysDialog();
108 void OnConfigure(); 108 void OnConfigure();
109 void OnDisplayTitleBars(bool); 109 void OnDisplayTitleBars(bool);
110 void SetHardwareRendererEnabled(bool);
111 void SetGdbstubEnabled(bool);
112 void SetShaderJITEnabled(bool);
113 void ToggleWindowMode(); 110 void ToggleWindowMode();
114 111
115private: 112private:
@@ -118,6 +115,8 @@ private:
118 GRenderWindow* render_window; 115 GRenderWindow* render_window;
119 GameList* game_list; 116 GameList* game_list;
120 117
118 std::unique_ptr<Config> config;
119
121 // Whether emulation is currently running in Citra. 120 // Whether emulation is currently running in Citra.
122 bool emulation_running = false; 121 bool emulation_running = false;
123 std::unique_ptr<EmuThread> emu_thread; 122 std::unique_ptr<EmuThread> emu_thread;
@@ -131,7 +130,6 @@ private:
131 GPUCommandListWidget* graphicsCommandsWidget; 130 GPUCommandListWidget* graphicsCommandsWidget;
132 131
133 QAction* actions_recent_files[max_recent_files_item]; 132 QAction* actions_recent_files[max_recent_files_item];
134 bool confirm_before_closing;
135}; 133};
136 134
137#endif // _CITRA_QT_MAIN_HXX_ 135#endif // _CITRA_QT_MAIN_HXX_
diff --git a/src/citra_qt/main.ui b/src/citra_qt/main.ui
index 1e8a07cfb..441e0b81e 100644
--- a/src/citra_qt/main.ui
+++ b/src/citra_qt/main.ui
@@ -45,7 +45,7 @@
45 <x>0</x> 45 <x>0</x>
46 <y>0</y> 46 <y>0</y>
47 <width>1081</width> 47 <width>1081</width>
48 <height>22</height> 48 <height>19</height>
49 </rect> 49 </rect>
50 </property> 50 </property>
51 <widget class="QMenu" name="menu_File"> 51 <widget class="QMenu" name="menu_File">
@@ -73,9 +73,6 @@
73 <addaction name="action_Pause"/> 73 <addaction name="action_Pause"/>
74 <addaction name="action_Stop"/> 74 <addaction name="action_Stop"/>
75 <addaction name="separator"/> 75 <addaction name="separator"/>
76 <addaction name="action_Use_Hardware_Renderer"/>
77 <addaction name="action_Use_Shader_JIT"/>
78 <addaction name="action_Use_Gdbstub"/>
79 <addaction name="action_Configure"/> 76 <addaction name="action_Configure"/>
80 </widget> 77 </widget>
81 <widget class="QMenu" name="menu_View"> 78 <widget class="QMenu" name="menu_View">
@@ -84,7 +81,6 @@
84 </property> 81 </property>
85 <addaction name="action_Single_Window_Mode"/> 82 <addaction name="action_Single_Window_Mode"/>
86 <addaction name="actionDisplay_widget_title_bars"/> 83 <addaction name="actionDisplay_widget_title_bars"/>
87 <addaction name="action_Hotkeys"/>
88 </widget> 84 </widget>
89 <widget class="QMenu" name="menu_Help"> 85 <widget class="QMenu" name="menu_Help">
90 <property name="title"> 86 <property name="title">
@@ -150,35 +146,6 @@
150 <string>Single Window Mode</string> 146 <string>Single Window Mode</string>
151 </property> 147 </property>
152 </action> 148 </action>
153 <action name="action_Hotkeys">
154 <property name="text">
155 <string>Configure &amp;Hotkeys ...</string>
156 </property>
157 </action>
158 <action name="action_Use_Hardware_Renderer">
159 <property name="checkable">
160 <bool>true</bool>
161 </property>
162 <property name="text">
163 <string>Use Hardware Renderer</string>
164 </property>
165 </action>
166 <action name="action_Use_Shader_JIT">
167 <property name="checkable">
168 <bool>true</bool>
169 </property>
170 <property name="text">
171 <string>Use Shader JIT</string>
172 </property>
173 </action>
174 <action name="action_Use_Gdbstub">
175 <property name="checkable">
176 <bool>true</bool>
177 </property>
178 <property name="text">
179 <string>Use Gdbstub</string>
180 </property>
181 </action>
182 <action name="action_Configure"> 149 <action name="action_Configure">
183 <property name="text"> 150 <property name="text">
184 <string>Configure ...</string> 151 <string>Configure ...</string>
@@ -220,22 +187,6 @@
220 </hints> 187 </hints>
221 </connection> 188 </connection>
222 <connection> 189 <connection>
223 <sender>action_Configure</sender>
224 <signal>triggered()</signal>
225 <receiver>MainWindow</receiver>
226 <slot>OnConfigure()</slot>
227 <hints>
228 <hint type="sourcelabel">
229 <x>-1</x>
230 <y>-1</y>
231 </hint>
232 <hint type="destinationlabel">
233 <x>540</x>
234 <y>364</y>
235 </hint>
236 </hints>
237 </connection>
238 <connection>
239 <sender>actionDisplay_widget_title_bars</sender> 190 <sender>actionDisplay_widget_title_bars</sender>
240 <signal>triggered(bool)</signal> 191 <signal>triggered(bool)</signal>
241 <receiver>MainWindow</receiver> 192 <receiver>MainWindow</receiver>
diff --git a/src/citra_qt/ui_settings.cpp b/src/citra_qt/ui_settings.cpp
new file mode 100644
index 000000000..5f2215899
--- /dev/null
+++ b/src/citra_qt/ui_settings.cpp
@@ -0,0 +1,11 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "ui_settings.h"
6
7namespace UISettings {
8
9Values values = {};
10
11}
diff --git a/src/citra_qt/ui_settings.h b/src/citra_qt/ui_settings.h
new file mode 100644
index 000000000..62db4a73e
--- /dev/null
+++ b/src/citra_qt/ui_settings.h
@@ -0,0 +1,47 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <QByteArray>
8#include <QStringList>
9#include <QString>
10
11#include <vector>
12
13namespace UISettings {
14
15using ContextualShortcut = std::pair<QString, int> ;
16using Shortcut = std::pair<QString, ContextualShortcut>;
17
18struct Values {
19 QByteArray geometry;
20 QByteArray state;
21
22 QByteArray renderwindow_geometry;
23
24 QByteArray gamelist_header_state;
25
26 QByteArray microprofile_geometry;
27 bool microprofile_visible;
28
29 bool single_window_mode;
30 bool display_titlebar;
31
32 bool confirm_before_closing;
33 bool first_start;
34
35 QString roms_path;
36 QString symbols_path;
37 QString gamedir;
38 bool gamedir_deepscan;
39 QStringList recent_files;
40
41 // Shortcut name <Shortcut, context>
42 std::vector<Shortcut> shortcuts;
43};
44
45extern Values values;
46
47}
diff --git a/src/citra_qt/util/util.cpp b/src/citra_qt/util/util.cpp
index 8734a8efd..2f9beb5cc 100644
--- a/src/citra_qt/util/util.cpp
+++ b/src/citra_qt/util/util.cpp
@@ -19,7 +19,7 @@ QString ReadableByteSize(qulonglong size) {
19 static const std::array<const char*, 6> units = { "B", "KiB", "MiB", "GiB", "TiB", "PiB" }; 19 static const std::array<const char*, 6> units = { "B", "KiB", "MiB", "GiB", "TiB", "PiB" };
20 if (size == 0) 20 if (size == 0)
21 return "0"; 21 return "0";
22 int digit_groups = std::min<int>((int)(std::log10(size) / std::log10(1024)), units.size()); 22 int digit_groups = std::min<int>(static_cast<int>(std::log10(size) / std::log10(1024)), static_cast<int>(units.size()));
23 return QString("%L1 %2").arg(size / std::pow(1024, digit_groups), 0, 'f', 1) 23 return QString("%L1 %2").arg(size / std::pow(1024, digit_groups), 0, 'f', 1)
24 .arg(units[digit_groups]); 24 .arg(units[digit_groups]);
25} 25}
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index c839ce173..aa6eee2a3 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -47,7 +47,6 @@ set(HEADERS
47 microprofile.h 47 microprofile.h
48 microprofileui.h 48 microprofileui.h
49 platform.h 49 platform.h
50 profiler.h
51 profiler_reporting.h 50 profiler_reporting.h
52 scm_rev.h 51 scm_rev.h
53 scope_exit.h 52 scope_exit.h
diff --git a/src/common/assert.h b/src/common/assert.h
index 6849778b7..cd9b819a9 100644
--- a/src/common/assert.h
+++ b/src/common/assert.h
@@ -39,6 +39,7 @@ static void assert_noinline_call(const Fn& fn) {
39 }); } while (0) 39 }); } while (0)
40 40
41#define UNREACHABLE() ASSERT_MSG(false, "Unreachable code!") 41#define UNREACHABLE() ASSERT_MSG(false, "Unreachable code!")
42#define UNREACHABLE_MSG(...) ASSERT_MSG(false, __VA_ARGS__)
42 43
43#ifdef _DEBUG 44#ifdef _DEBUG
44#define DEBUG_ASSERT(_a_) ASSERT(_a_) 45#define DEBUG_ASSERT(_a_) ASSERT(_a_)
@@ -49,3 +50,4 @@ static void assert_noinline_call(const Fn& fn) {
49#endif 50#endif
50 51
51#define UNIMPLEMENTED() DEBUG_ASSERT_MSG(false, "Unimplemented code!") 52#define UNIMPLEMENTED() DEBUG_ASSERT_MSG(false, "Unimplemented code!")
53#define UNIMPLEMENTED_MSG(_a_, ...) ASSERT_MSG(false, _a_, __VA_ARGS__) \ No newline at end of file
diff --git a/src/common/bit_field.h b/src/common/bit_field.h
index 371eb17a1..4748999ed 100644
--- a/src/common/bit_field.h
+++ b/src/common/bit_field.h
@@ -186,5 +186,5 @@ private:
186#pragma pack() 186#pragma pack()
187 187
188#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER) 188#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER)
189static_assert(std::is_trivially_copyable<BitField<0, 1, u32>>::value, "BitField must be trivially copyable"); 189static_assert(std::is_trivially_copyable<BitField<0, 1, unsigned>>::value, "BitField must be trivially copyable");
190#endif 190#endif
diff --git a/src/common/bit_set.h b/src/common/bit_set.h
index 85f91e786..7f5de8df2 100644
--- a/src/common/bit_set.h
+++ b/src/common/bit_set.h
@@ -7,6 +7,7 @@
7#include <intrin.h> 7#include <intrin.h>
8#endif 8#endif
9#include <initializer_list> 9#include <initializer_list>
10#include <new>
10#include <type_traits> 11#include <type_traits>
11#include "common/common_types.h" 12#include "common/common_types.h"
12 13
@@ -186,4 +187,4 @@ public:
186typedef Common::BitSet<u8> BitSet8; 187typedef Common::BitSet<u8> BitSet8;
187typedef Common::BitSet<u16> BitSet16; 188typedef Common::BitSet<u16> BitSet16;
188typedef Common::BitSet<u32> BitSet32; 189typedef Common::BitSet<u32> BitSet32;
189typedef Common::BitSet<u64> BitSet64; \ No newline at end of file 190typedef Common::BitSet<u64> BitSet64;
diff --git a/src/common/code_block.h b/src/common/code_block.h
index 9ef7296d3..2fa4a0090 100644
--- a/src/common/code_block.h
+++ b/src/common/code_block.h
@@ -4,8 +4,10 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "common_types.h" 7#include <cstddef>
8#include "memory_util.h" 8
9#include "common/common_types.h"
10#include "common/memory_util.h"
9 11
10// Everything that needs to generate code should inherit from this. 12// Everything that needs to generate code should inherit from this.
11// You get memory management for free, plus, you can use all emitter functions without 13// You get memory management for free, plus, you can use all emitter functions without
diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h
index aa6aff7b9..ab3515683 100644
--- a/src/common/common_funcs.h
+++ b/src/common/common_funcs.h
@@ -4,6 +4,10 @@
4 4
5#pragma once 5#pragma once
6 6
7#if !defined(ARCHITECTURE_x86_64) && !defined(_M_ARM)
8#include <cstdlib> // for exit
9#endif
10
7#include "common_types.h" 11#include "common_types.h"
8 12
9#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) 13#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
diff --git a/src/common/file_util.cpp b/src/common/file_util.cpp
index 9ada09f8a..6e2867658 100644
--- a/src/common/file_util.cpp
+++ b/src/common/file_util.cpp
@@ -69,9 +69,10 @@ static void StripTailDirSlashes(std::string &fname)
69{ 69{
70 if (fname.length() > 1) 70 if (fname.length() > 1)
71 { 71 {
72 size_t i = fname.length() - 1; 72 size_t i = fname.length();
73 while (fname[i] == DIR_SEP_CHR) 73 while (i > 0 && fname[i - 1] == DIR_SEP_CHR)
74 fname[i--] = '\0'; 74 --i;
75 fname.resize(i);
75 } 76 }
76 return; 77 return;
77} 78}
@@ -85,6 +86,10 @@ bool Exists(const std::string &filename)
85 StripTailDirSlashes(copy); 86 StripTailDirSlashes(copy);
86 87
87#ifdef _WIN32 88#ifdef _WIN32
89 // Windows needs a slash to identify a driver root
90 if (copy.size() != 0 && copy.back() == ':')
91 copy += DIR_SEP_CHR;
92
88 int result = _wstat64(Common::UTF8ToUTF16W(copy).c_str(), &file_info); 93 int result = _wstat64(Common::UTF8ToUTF16W(copy).c_str(), &file_info);
89#else 94#else
90 int result = stat64(copy.c_str(), &file_info); 95 int result = stat64(copy.c_str(), &file_info);
@@ -102,6 +107,10 @@ bool IsDirectory(const std::string &filename)
102 StripTailDirSlashes(copy); 107 StripTailDirSlashes(copy);
103 108
104#ifdef _WIN32 109#ifdef _WIN32
110 // Windows needs a slash to identify a driver root
111 if (copy.size() != 0 && copy.back() == ':')
112 copy += DIR_SEP_CHR;
113
105 int result = _wstat64(Common::UTF8ToUTF16W(copy).c_str(), &file_info); 114 int result = _wstat64(Common::UTF8ToUTF16W(copy).c_str(), &file_info);
106#else 115#else
107 int result = stat64(copy.c_str(), &file_info); 116 int result = stat64(copy.c_str(), &file_info);
@@ -824,13 +833,12 @@ size_t WriteStringToFile(bool text_file, const std::string &str, const char *fil
824 833
825size_t ReadFileToString(bool text_file, const char *filename, std::string &str) 834size_t ReadFileToString(bool text_file, const char *filename, std::string &str)
826{ 835{
827 FileUtil::IOFile file(filename, text_file ? "r" : "rb"); 836 IOFile file(filename, text_file ? "r" : "rb");
828 auto const f = file.GetHandle();
829 837
830 if (!f) 838 if (!file)
831 return false; 839 return false;
832 840
833 str.resize(static_cast<u32>(GetSize(f))); 841 str.resize(static_cast<u32>(file.GetSize()));
834 return file.ReadArray(&str[0], str.size()); 842 return file.ReadArray(&str[0], str.size());
835} 843}
836 844
@@ -877,15 +885,10 @@ void SplitFilename83(const std::string& filename, std::array<char, 9>& short_nam
877} 885}
878 886
879IOFile::IOFile() 887IOFile::IOFile()
880 : m_file(nullptr), m_good(true) 888{
881{} 889}
882
883IOFile::IOFile(std::FILE* file)
884 : m_file(file), m_good(true)
885{}
886 890
887IOFile::IOFile(const std::string& filename, const char openmode[]) 891IOFile::IOFile(const std::string& filename, const char openmode[])
888 : m_file(nullptr), m_good(true)
889{ 892{
890 Open(filename, openmode); 893 Open(filename, openmode);
891} 894}
@@ -896,7 +899,6 @@ IOFile::~IOFile()
896} 899}
897 900
898IOFile::IOFile(IOFile&& other) 901IOFile::IOFile(IOFile&& other)
899 : m_file(nullptr), m_good(true)
900{ 902{
901 Swap(other); 903 Swap(other);
902} 904}
@@ -935,26 +937,12 @@ bool IOFile::Close()
935 return m_good; 937 return m_good;
936} 938}
937 939
938std::FILE* IOFile::ReleaseHandle() 940u64 IOFile::GetSize() const
939{
940 std::FILE* const ret = m_file;
941 m_file = nullptr;
942 return ret;
943}
944
945void IOFile::SetHandle(std::FILE* file)
946{
947 Close();
948 Clear();
949 m_file = file;
950}
951
952u64 IOFile::GetSize()
953{ 941{
954 if (IsOpen()) 942 if (IsOpen())
955 return FileUtil::GetSize(m_file); 943 return FileUtil::GetSize(m_file);
956 else 944
957 return 0; 945 return 0;
958} 946}
959 947
960bool IOFile::Seek(s64 off, int origin) 948bool IOFile::Seek(s64 off, int origin)
@@ -965,12 +953,12 @@ bool IOFile::Seek(s64 off, int origin)
965 return m_good; 953 return m_good;
966} 954}
967 955
968u64 IOFile::Tell() 956u64 IOFile::Tell() const
969{ 957{
970 if (IsOpen()) 958 if (IsOpen())
971 return ftello(m_file); 959 return ftello(m_file);
972 else 960
973 return -1; 961 return -1;
974} 962}
975 963
976bool IOFile::Flush() 964bool IOFile::Flush()
diff --git a/src/common/file_util.h b/src/common/file_util.h
index a85121aa6..c6a8694ce 100644
--- a/src/common/file_util.h
+++ b/src/common/file_util.h
@@ -7,13 +7,17 @@
7#include <array> 7#include <array>
8#include <fstream> 8#include <fstream>
9#include <functional> 9#include <functional>
10#include <cstddef>
11#include <cstdio> 10#include <cstdio>
12#include <string> 11#include <string>
12#include <type_traits>
13#include <vector> 13#include <vector>
14 14
15#include "common/common_types.h" 15#include "common/common_types.h"
16 16
17#ifdef _MSC_VER
18#include "common/string_util.h"
19#endif
20
17// User directory indices for GetUserPath 21// User directory indices for GetUserPath
18enum { 22enum {
19 D_USER_IDX, 23 D_USER_IDX,
@@ -172,7 +176,6 @@ class IOFile : public NonCopyable
172{ 176{
173public: 177public:
174 IOFile(); 178 IOFile();
175 IOFile(std::FILE* file);
176 IOFile(const std::string& filename, const char openmode[]); 179 IOFile(const std::string& filename, const char openmode[]);
177 180
178 ~IOFile(); 181 ~IOFile();
@@ -188,6 +191,11 @@ public:
188 template <typename T> 191 template <typename T>
189 size_t ReadArray(T* data, size_t length) 192 size_t ReadArray(T* data, size_t length)
190 { 193 {
194 static_assert(std::is_standard_layout<T>(), "Given array does not consist of standard layout objects");
195#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER)
196 static_assert(std::is_trivially_copyable<T>(), "Given array does not consist of trivially copyable objects");
197#endif
198
191 if (!IsOpen()) { 199 if (!IsOpen()) {
192 m_good = false; 200 m_good = false;
193 return -1; 201 return -1;
@@ -203,9 +211,10 @@ public:
203 template <typename T> 211 template <typename T>
204 size_t WriteArray(const T* data, size_t length) 212 size_t WriteArray(const T* data, size_t length)
205 { 213 {
206 static_assert(std::is_standard_layout<T>::value, "Given array does not consist of standard layout objects"); 214 static_assert(std::is_standard_layout<T>(), "Given array does not consist of standard layout objects");
207 // TODO: gcc 4.8 does not support is_trivially_copyable, but we really should check for it here. 215#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER)
208 //static_assert(std::is_trivially_copyable<T>::value, "Given array does not consist of trivially copyable objects"); 216 static_assert(std::is_trivially_copyable<T>(), "Given array does not consist of trivially copyable objects");
217#endif
209 218
210 if (!IsOpen()) { 219 if (!IsOpen()) {
211 m_good = false; 220 m_good = false;
@@ -235,32 +244,24 @@ public:
235 return WriteArray(&object, 1); 244 return WriteArray(&object, 1);
236 } 245 }
237 246
238 bool IsOpen() { return nullptr != m_file; } 247 bool IsOpen() const { return nullptr != m_file; }
239 248
240 // m_good is set to false when a read, write or other function fails 249 // m_good is set to false when a read, write or other function fails
241 bool IsGood() { return m_good; } 250 bool IsGood() const { return m_good; }
242 operator void*() { return m_good ? m_file : nullptr; } 251 explicit operator bool() const { return IsGood(); }
243
244 std::FILE* ReleaseHandle();
245
246 std::FILE* GetHandle() { return m_file; }
247
248 void SetHandle(std::FILE* file);
249 252
250 bool Seek(s64 off, int origin); 253 bool Seek(s64 off, int origin);
251 u64 Tell(); 254 u64 Tell() const;
252 u64 GetSize(); 255 u64 GetSize() const;
253 bool Resize(u64 size); 256 bool Resize(u64 size);
254 bool Flush(); 257 bool Flush();
255 258
256 // clear error state 259 // clear error state
257 void Clear() { m_good = true; std::clearerr(m_file); } 260 void Clear() { m_good = true; std::clearerr(m_file); }
258 261
259 std::FILE* m_file;
260 bool m_good;
261private: 262private:
262 IOFile(IOFile&); 263 std::FILE* m_file = nullptr;
263 IOFile& operator=(IOFile& other); 264 bool m_good = true;
264}; 265};
265 266
266} // namespace 267} // namespace
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp
index 3d39f94d5..d7008fc66 100644
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -65,6 +65,7 @@ namespace Log {
65 SUB(Render, OpenGL) \ 65 SUB(Render, OpenGL) \
66 CLS(Audio) \ 66 CLS(Audio) \
67 SUB(Audio, DSP) \ 67 SUB(Audio, DSP) \
68 SUB(Audio, Sink) \
68 CLS(Loader) 69 CLS(Loader)
69 70
70// GetClassName is a macro defined by Windows.h, grrr... 71// GetClassName is a macro defined by Windows.h, grrr...
diff --git a/src/common/logging/log.h b/src/common/logging/log.h
index 521362317..c6910b1c7 100644
--- a/src/common/logging/log.h
+++ b/src/common/logging/log.h
@@ -78,8 +78,9 @@ enum class Class : ClassType {
78 Render, ///< Emulator video output and hardware acceleration 78 Render, ///< Emulator video output and hardware acceleration
79 Render_Software, ///< Software renderer backend 79 Render_Software, ///< Software renderer backend
80 Render_OpenGL, ///< OpenGL backend 80 Render_OpenGL, ///< OpenGL backend
81 Audio, ///< Emulator audio output 81 Audio, ///< Audio emulation
82 Audio_DSP, ///< The HLE implementation of the DSP 82 Audio_DSP, ///< The HLE implementation of the DSP
83 Audio_Sink, ///< Emulator audio output backend
83 Loader, ///< ROM loader 84 Loader, ///< ROM loader
84 85
85 Count ///< Total number of logging classes 86 Count ///< Total number of logging classes
diff --git a/src/common/microprofile.h b/src/common/microprofile.h
index d3b6cb97c..ef312c6e1 100644
--- a/src/common/microprofile.h
+++ b/src/common/microprofile.h
@@ -4,6 +4,10 @@
4 4
5#pragma once 5#pragma once
6 6
7// Uncomment this to disable microprofile. This will get you cleaner profiles when using
8// external sampling profilers like "Very Sleepy", and will improve performance somewhat.
9// #define MICROPROFILE_ENABLED 0
10
7// Customized Citra settings. 11// Customized Citra settings.
8// This file wraps the MicroProfile header so that these are consistent everywhere. 12// This file wraps the MicroProfile header so that these are consistent everywhere.
9#define MICROPROFILE_WEBSERVER 0 13#define MICROPROFILE_WEBSERVER 0
diff --git a/src/common/microprofileui.h b/src/common/microprofileui.h
index 97c369bd9..41abe6b75 100644
--- a/src/common/microprofileui.h
+++ b/src/common/microprofileui.h
@@ -13,4 +13,7 @@
13#define MICROPROFILE_HELP_ALT "Right-Click" 13#define MICROPROFILE_HELP_ALT "Right-Click"
14#define MICROPROFILE_HELP_MOD "Ctrl" 14#define MICROPROFILE_HELP_MOD "Ctrl"
15 15
16// This isn't included by microprofileui.h :(
17#include <cstdlib> // For std::abs
18
16#include <microprofileui.h> 19#include <microprofileui.h>
diff --git a/src/common/profiler.cpp b/src/common/profiler.cpp
index 7792edd2f..49eb3f40c 100644
--- a/src/common/profiler.cpp
+++ b/src/common/profiler.cpp
@@ -7,71 +7,16 @@
7#include <vector> 7#include <vector>
8 8
9#include "common/assert.h" 9#include "common/assert.h"
10#include "common/profiler.h"
11#include "common/profiler_reporting.h" 10#include "common/profiler_reporting.h"
12#include "common/synchronized_wrapper.h" 11#include "common/synchronized_wrapper.h"
13 12
14#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013.
15 #define WIN32_LEAN_AND_MEAN
16 #include <Windows.h> // For QueryPerformanceCounter/Frequency
17#endif
18
19namespace Common { 13namespace Common {
20namespace Profiling { 14namespace Profiling {
21 15
22#if ENABLE_PROFILING
23thread_local Timer* Timer::current_timer = nullptr;
24#endif
25
26#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013
27QPCClock::time_point QPCClock::now() {
28 static LARGE_INTEGER freq;
29 // Use this dummy local static to ensure this gets initialized once.
30 static BOOL dummy = QueryPerformanceFrequency(&freq);
31
32 LARGE_INTEGER ticks;
33 QueryPerformanceCounter(&ticks);
34
35 // This is prone to overflow when multiplying, which is why I'm using micro instead of nano. The
36 // correct way to approach this would be to just return ticks as a time_point and then subtract
37 // and do this conversion when creating a duration from two time_points, however, as far as I
38 // could tell the C++ requirements for these types are incompatible with this approach.
39 return time_point(duration(ticks.QuadPart * std::micro::den / freq.QuadPart));
40}
41#endif
42
43TimingCategory::TimingCategory(const char* name, TimingCategory* parent)
44 : accumulated_duration(0) {
45
46 ProfilingManager& manager = GetProfilingManager();
47 category_id = manager.RegisterTimingCategory(this, name);
48 if (parent != nullptr)
49 manager.SetTimingCategoryParent(category_id, parent->category_id);
50}
51
52ProfilingManager::ProfilingManager() 16ProfilingManager::ProfilingManager()
53 : last_frame_end(Clock::now()), this_frame_start(Clock::now()) { 17 : last_frame_end(Clock::now()), this_frame_start(Clock::now()) {
54} 18}
55 19
56unsigned int ProfilingManager::RegisterTimingCategory(TimingCategory* category, const char* name) {
57 TimingCategoryInfo info;
58 info.category = category;
59 info.name = name;
60 info.parent = TimingCategoryInfo::NO_PARENT;
61
62 unsigned int id = (unsigned int)timing_categories.size();
63 timing_categories.push_back(std::move(info));
64
65 return id;
66}
67
68void ProfilingManager::SetTimingCategoryParent(unsigned int category, unsigned int parent) {
69 ASSERT(category < timing_categories.size());
70 ASSERT(parent < timing_categories.size());
71
72 timing_categories[category].parent = parent;
73}
74
75void ProfilingManager::BeginFrame() { 20void ProfilingManager::BeginFrame() {
76 this_frame_start = Clock::now(); 21 this_frame_start = Clock::now();
77} 22}
@@ -82,11 +27,6 @@ void ProfilingManager::FinishFrame() {
82 results.interframe_time = now - last_frame_end; 27 results.interframe_time = now - last_frame_end;
83 results.frame_time = now - this_frame_start; 28 results.frame_time = now - this_frame_start;
84 29
85 results.time_per_category.resize(timing_categories.size());
86 for (size_t i = 0; i < timing_categories.size(); ++i) {
87 results.time_per_category[i] = timing_categories[i].category->GetAccumulatedTime();
88 }
89
90 last_frame_end = now; 30 last_frame_end = now;
91} 31}
92 32
@@ -100,26 +40,9 @@ void TimingResultsAggregator::Clear() {
100 window_size = cursor = 0; 40 window_size = cursor = 0;
101} 41}
102 42
103void TimingResultsAggregator::SetNumberOfCategories(size_t n) {
104 size_t old_size = times_per_category.size();
105 if (n == old_size)
106 return;
107
108 times_per_category.resize(n);
109
110 for (size_t i = old_size; i < n; ++i) {
111 times_per_category[i].resize(max_window_size, Duration::zero());
112 }
113}
114
115void TimingResultsAggregator::AddFrame(const ProfilingFrameResult& frame_result) { 43void TimingResultsAggregator::AddFrame(const ProfilingFrameResult& frame_result) {
116 SetNumberOfCategories(frame_result.time_per_category.size());
117
118 interframe_times[cursor] = frame_result.interframe_time; 44 interframe_times[cursor] = frame_result.interframe_time;
119 frame_times[cursor] = frame_result.frame_time; 45 frame_times[cursor] = frame_result.frame_time;
120 for (size_t i = 0; i < frame_result.time_per_category.size(); ++i) {
121 times_per_category[i][cursor] = frame_result.time_per_category[i];
122 }
123 46
124 ++cursor; 47 ++cursor;
125 if (cursor == max_window_size) 48 if (cursor == max_window_size)
@@ -162,11 +85,6 @@ AggregatedFrameResult TimingResultsAggregator::GetAggregatedResults() const {
162 result.fps = 0.0f; 85 result.fps = 0.0f;
163 } 86 }
164 87
165 result.time_per_category.resize(times_per_category.size());
166 for (size_t i = 0; i < times_per_category.size(); ++i) {
167 result.time_per_category[i] = AggregateField(times_per_category[i], window_size);
168 }
169
170 return result; 88 return result;
171} 89}
172 90
diff --git a/src/common/profiler.h b/src/common/profiler.h
deleted file mode 100644
index 3e967b4bc..000000000
--- a/src/common/profiler.h
+++ /dev/null
@@ -1,152 +0,0 @@
1// Copyright 2015 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <atomic>
8#include <chrono>
9
10#include "common/assert.h"
11#include "common/thread.h"
12
13namespace Common {
14namespace Profiling {
15
16// If this is defined to 0, it turns all Timers into no-ops.
17#ifndef ENABLE_PROFILING
18#define ENABLE_PROFILING 1
19#endif
20
21#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013
22// MSVC up to 2013 doesn't use QueryPerformanceCounter for high_resolution_clock, so it has bad
23// precision. We manually implement a clock based on QPC to get good results.
24
25struct QPCClock {
26 using duration = std::chrono::microseconds;
27 using time_point = std::chrono::time_point<QPCClock>;
28 using rep = duration::rep;
29 using period = duration::period;
30 static const bool is_steady = false;
31
32 static time_point now();
33};
34
35using Clock = QPCClock;
36#else
37using Clock = std::chrono::high_resolution_clock;
38#endif
39
40using Duration = Clock::duration;
41
42/**
43 * Represents a timing category that measured time can be accounted towards. Should be declared as a
44 * global variable and passed to Timers.
45 */
46class TimingCategory final {
47public:
48 TimingCategory(const char* name, TimingCategory* parent = nullptr);
49
50 unsigned int GetCategoryId() const {
51 return category_id;
52 }
53
54 /// Adds some time to this category. Can safely be called from multiple threads at the same time.
55 void AddTime(Duration amount) {
56 std::atomic_fetch_add_explicit(
57 &accumulated_duration, amount.count(),
58 std::memory_order_relaxed);
59 }
60
61 /**
62 * Atomically retrieves the accumulated measured time for this category and resets the counter
63 * to zero. Can be safely called concurrently with AddTime.
64 */
65 Duration GetAccumulatedTime() {
66 return Duration(std::atomic_exchange_explicit(
67 &accumulated_duration, (Duration::rep)0,
68 std::memory_order_relaxed));
69 }
70
71private:
72 unsigned int category_id;
73 std::atomic<Duration::rep> accumulated_duration;
74};
75
76/**
77 * Measures time elapsed between a call to Start and a call to Stop and attributes it to the given
78 * TimingCategory. Start/Stop can be called multiple times on the same timer, but each call must be
79 * appropriately paired.
80 *
81 * When a Timer is started, it automatically pauses a previously running timer on the same thread,
82 * which is resumed when it is stopped. As such, no special action needs to be taken to avoid
83 * double-accounting of time on two categories.
84 */
85class Timer {
86public:
87 Timer(TimingCategory& category) : category(category) {
88 }
89
90 void Start() {
91#if ENABLE_PROFILING
92 ASSERT(!running);
93 previous_timer = current_timer;
94 current_timer = this;
95 if (previous_timer != nullptr)
96 previous_timer->StopTiming();
97
98 StartTiming();
99#endif
100 }
101
102 void Stop() {
103#if ENABLE_PROFILING
104 ASSERT(running);
105 StopTiming();
106
107 if (previous_timer != nullptr)
108 previous_timer->StartTiming();
109 current_timer = previous_timer;
110#endif
111 }
112
113private:
114#if ENABLE_PROFILING
115 void StartTiming() {
116 start = Clock::now();
117 running = true;
118 }
119
120 void StopTiming() {
121 auto duration = Clock::now() - start;
122 running = false;
123 category.AddTime(std::chrono::duration_cast<Duration>(duration));
124 }
125
126 Clock::time_point start;
127 bool running = false;
128
129 Timer* previous_timer;
130 static thread_local Timer* current_timer;
131#endif
132
133 TimingCategory& category;
134};
135
136/**
137 * A Timer that automatically starts timing when created and stops at the end of the scope. Should
138 * be used in the majority of cases.
139 */
140class ScopeTimer : public Timer {
141public:
142 ScopeTimer(TimingCategory& category) : Timer(category) {
143 Start();
144 }
145
146 ~ScopeTimer() {
147 Stop();
148 }
149};
150
151} // namespace Profiling
152} // namespace Common
diff --git a/src/common/profiler_reporting.h b/src/common/profiler_reporting.h
index df98e05b7..fa1ac883f 100644
--- a/src/common/profiler_reporting.h
+++ b/src/common/profiler_reporting.h
@@ -4,22 +4,17 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <chrono>
7#include <cstddef> 8#include <cstddef>
8#include <vector> 9#include <vector>
9 10
10#include "common/profiler.h"
11#include "common/synchronized_wrapper.h" 11#include "common/synchronized_wrapper.h"
12 12
13namespace Common { 13namespace Common {
14namespace Profiling { 14namespace Profiling {
15 15
16struct TimingCategoryInfo { 16using Clock = std::chrono::high_resolution_clock;
17 static const unsigned int NO_PARENT = -1; 17using Duration = Clock::duration;
18
19 TimingCategory* category;
20 const char* name;
21 unsigned int parent;
22};
23 18
24struct ProfilingFrameResult { 19struct ProfilingFrameResult {
25 /// Time since the last delivered frame 20 /// Time since the last delivered frame
@@ -27,22 +22,12 @@ struct ProfilingFrameResult {
27 22
28 /// Time spent processing a frame, excluding VSync 23 /// Time spent processing a frame, excluding VSync
29 Duration frame_time; 24 Duration frame_time;
30
31 /// Total amount of time spent inside each category in this frame. Indexed by the category id
32 std::vector<Duration> time_per_category;
33}; 25};
34 26
35class ProfilingManager final { 27class ProfilingManager final {
36public: 28public:
37 ProfilingManager(); 29 ProfilingManager();
38 30
39 unsigned int RegisterTimingCategory(TimingCategory* category, const char* name);
40 void SetTimingCategoryParent(unsigned int category, unsigned int parent);
41
42 const std::vector<TimingCategoryInfo>& GetTimingCategoriesInfo() const {
43 return timing_categories;
44 }
45
46 /// This should be called after swapping screen buffers. 31 /// This should be called after swapping screen buffers.
47 void BeginFrame(); 32 void BeginFrame();
48 /// This should be called before swapping screen buffers. 33 /// This should be called before swapping screen buffers.
@@ -54,7 +39,6 @@ public:
54 } 39 }
55 40
56private: 41private:
57 std::vector<TimingCategoryInfo> timing_categories;
58 Clock::time_point last_frame_end; 42 Clock::time_point last_frame_end;
59 Clock::time_point this_frame_start; 43 Clock::time_point this_frame_start;
60 44
@@ -73,9 +57,6 @@ struct AggregatedFrameResult {
73 AggregatedDuration frame_time; 57 AggregatedDuration frame_time;
74 58
75 float fps; 59 float fps;
76
77 /// Total amount of time spent inside each category in this frame. Indexed by the category id
78 std::vector<AggregatedDuration> time_per_category;
79}; 60};
80 61
81class TimingResultsAggregator final { 62class TimingResultsAggregator final {
@@ -83,7 +64,6 @@ public:
83 TimingResultsAggregator(size_t window_size); 64 TimingResultsAggregator(size_t window_size);
84 65
85 void Clear(); 66 void Clear();
86 void SetNumberOfCategories(size_t n);
87 67
88 void AddFrame(const ProfilingFrameResult& frame_result); 68 void AddFrame(const ProfilingFrameResult& frame_result);
89 69
@@ -95,7 +75,6 @@ public:
95 75
96 std::vector<Duration> interframe_times; 76 std::vector<Duration> interframe_times;
97 std::vector<Duration> frame_times; 77 std::vector<Duration> frame_times;
98 std::vector<std::vector<Duration>> times_per_category;
99}; 78};
100 79
101ProfilingManager& GetProfilingManager(); 80ProfilingManager& GetProfilingManager();
diff --git a/src/common/swap.h b/src/common/swap.h
index a7c37bc44..1749bd7a4 100644
--- a/src/common/swap.h
+++ b/src/common/swap.h
@@ -25,6 +25,8 @@
25 #include <sys/endian.h> 25 #include <sys/endian.h>
26#endif 26#endif
27 27
28#include <cstring>
29
28#include "common/common_types.h" 30#include "common/common_types.h"
29 31
30// GCC 4.6+ 32// GCC 4.6+
@@ -58,9 +60,6 @@
58 60
59namespace Common { 61namespace Common {
60 62
61inline u8 swap8(u8 _data) {return _data;}
62inline u32 swap24(const u8* _data) {return (_data[0] << 16) | (_data[1] << 8) | _data[2];}
63
64#ifdef _MSC_VER 63#ifdef _MSC_VER
65inline u16 swap16(u16 _data) {return _byteswap_ushort(_data);} 64inline u16 swap16(u16 _data) {return _byteswap_ushort(_data);}
66inline u32 swap32(u32 _data) {return _byteswap_ulong (_data);} 65inline u32 swap32(u32 _data) {return _byteswap_ulong (_data);}
@@ -92,52 +91,29 @@ inline u64 swap64(u64 data) {return ((u64)swap32(data) << 32) | swap32(data >> 3
92#endif 91#endif
93 92
94inline float swapf(float f) { 93inline float swapf(float f) {
95 union { 94 static_assert(sizeof(u32) == sizeof(float),
96 float f; 95 "float must be the same size as uint32_t.");
97 unsigned int u32;
98 } dat1, dat2;
99
100 dat1.f = f;
101 dat2.u32 = swap32(dat1.u32);
102 96
103 return dat2.f; 97 u32 value;
104} 98 std::memcpy(&value, &f, sizeof(u32));
105
106inline double swapd(double f) {
107 union {
108 double f;
109 unsigned long long u64;
110 } dat1, dat2;
111 99
112 dat1.f = f; 100 value = swap32(value);
113 dat2.u64 = swap64(dat1.u64); 101 std::memcpy(&f, &value, sizeof(u32));
114 102
115 return dat2.f; 103 return f;
116} 104}
117 105
118inline u16 swap16(const u8* _pData) {return swap16(*(const u16*)_pData);} 106inline double swapd(double f) {
119inline u32 swap32(const u8* _pData) {return swap32(*(const u32*)_pData);} 107 static_assert(sizeof(u64) == sizeof(double),
120inline u64 swap64(const u8* _pData) {return swap64(*(const u64*)_pData);} 108 "double must be the same size as uint64_t.");
121
122template <int count>
123void swap(u8*);
124 109
125template <> 110 u64 value;
126inline void swap<1>(u8* data) { } 111 std::memcpy(&value, &f, sizeof(u64));
127 112
128template <> 113 value = swap64(value);
129inline void swap<2>(u8* data) { 114 std::memcpy(&f, &value, sizeof(u64));
130 *reinterpret_cast<u16*>(data) = swap16(data);
131}
132
133template <>
134inline void swap<4>(u8* data) {
135 *reinterpret_cast<u32*>(data) = swap32(data);
136}
137 115
138template <> 116 return f;
139inline void swap<8>(u8* data) {
140 *reinterpret_cast<u64*>(data) = swap64(data);
141} 117}
142 118
143} // Namespace Common 119} // Namespace Common
@@ -534,35 +510,35 @@ bool operator==(const S &p, const swap_struct_t<T, F> v) {
534template <typename T> 510template <typename T>
535struct swap_64_t { 511struct swap_64_t {
536 static T swap(T x) { 512 static T swap(T x) {
537 return (T)Common::swap64(*(u64 *)&x); 513 return static_cast<T>(Common::swap64(x));
538 } 514 }
539}; 515};
540 516
541template <typename T> 517template <typename T>
542struct swap_32_t { 518struct swap_32_t {
543 static T swap(T x) { 519 static T swap(T x) {
544 return (T)Common::swap32(*(u32 *)&x); 520 return static_cast<T>(Common::swap32(x));
545 } 521 }
546}; 522};
547 523
548template <typename T> 524template <typename T>
549struct swap_16_t { 525struct swap_16_t {
550 static T swap(T x) { 526 static T swap(T x) {
551 return (T)Common::swap16(*(u16 *)&x); 527 return static_cast<T>(Common::swap16(x));
552 } 528 }
553}; 529};
554 530
555template <typename T> 531template <typename T>
556struct swap_float_t { 532struct swap_float_t {
557 static T swap(T x) { 533 static T swap(T x) {
558 return (T)Common::swapf(*(float *)&x); 534 return static_cast<T>(Common::swapf(x));
559 } 535 }
560}; 536};
561 537
562template <typename T> 538template <typename T>
563struct swap_double_t { 539struct swap_double_t {
564 static T swap(T x) { 540 static T swap(T x) {
565 return (T)Common::swapd(*(double *)&x); 541 return static_cast<T>(Common::swapd(x));
566 } 542 }
567}; 543};
568 544
diff --git a/src/common/thread.h b/src/common/thread.h
index 8255ee6d3..bbfa8befa 100644
--- a/src/common/thread.h
+++ b/src/common/thread.h
@@ -30,8 +30,7 @@
30# endif 30# endif
31#endif 31#endif
32 32
33namespace Common 33namespace Common {
34{
35 34
36int CurrentThreadId(); 35int CurrentThreadId();
37 36
@@ -43,55 +42,55 @@ public:
43 Event() : is_set(false) {} 42 Event() : is_set(false) {}
44 43
45 void Set() { 44 void Set() {
46 std::lock_guard<std::mutex> lk(m_mutex); 45 std::lock_guard<std::mutex> lk(mutex);
47 if (!is_set) { 46 if (!is_set) {
48 is_set = true; 47 is_set = true;
49 m_condvar.notify_one(); 48 condvar.notify_one();
50 } 49 }
51 } 50 }
52 51
53 void Wait() { 52 void Wait() {
54 std::unique_lock<std::mutex> lk(m_mutex); 53 std::unique_lock<std::mutex> lk(mutex);
55 m_condvar.wait(lk, [&]{ return is_set; }); 54 condvar.wait(lk, [&]{ return is_set; });
56 is_set = false; 55 is_set = false;
57 } 56 }
58 57
59 void Reset() { 58 void Reset() {
60 std::unique_lock<std::mutex> lk(m_mutex); 59 std::unique_lock<std::mutex> lk(mutex);
61 // no other action required, since wait loops on the predicate and any lingering signal will get cleared on the first iteration 60 // no other action required, since wait loops on the predicate and any lingering signal will get cleared on the first iteration
62 is_set = false; 61 is_set = false;
63 } 62 }
64 63
65private: 64private:
66 bool is_set; 65 bool is_set;
67 std::condition_variable m_condvar; 66 std::condition_variable condvar;
68 std::mutex m_mutex; 67 std::mutex mutex;
69}; 68};
70 69
71class Barrier { 70class Barrier {
72public: 71public:
73 Barrier(size_t count) : m_count(count), m_waiting(0) {} 72 explicit Barrier(size_t count_) : count(count_), waiting(0), generation(0) {}
74 73
75 /// Blocks until all "count" threads have called Sync() 74 /// Blocks until all "count" threads have called Sync()
76 void Sync() { 75 void Sync() {
77 std::unique_lock<std::mutex> lk(m_mutex); 76 std::unique_lock<std::mutex> lk(mutex);
77 const size_t current_generation = generation;
78 78
79 // TODO: broken when next round of Sync()s 79 if (++waiting == count) {
80 // is entered before all waiting threads return from the notify_all 80 generation++;
81 81 waiting = 0;
82 if (++m_waiting == m_count) { 82 condvar.notify_all();
83 m_waiting = 0;
84 m_condvar.notify_all();
85 } else { 83 } else {
86 m_condvar.wait(lk, [&]{ return m_waiting == 0; }); 84 condvar.wait(lk, [this, current_generation]{ return current_generation != generation; });
87 } 85 }
88 } 86 }
89 87
90private: 88private:
91 std::condition_variable m_condvar; 89 std::condition_variable condvar;
92 std::mutex m_mutex; 90 std::mutex mutex;
93 const size_t m_count; 91 const size_t count;
94 size_t m_waiting; 92 size_t waiting;
93 size_t generation; // Incremented once each time the barrier is used
95}; 94};
96 95
97void SleepCurrentThread(int ms); 96void SleepCurrentThread(int ms);
@@ -100,8 +99,7 @@ void SwitchCurrentThread(); // On Linux, this is equal to sleep 1ms
100// Use this function during a spin-wait to make the current thread 99// Use this function during a spin-wait to make the current thread
101// relax while another thread is working. This may be more efficient 100// relax while another thread is working. This may be more efficient
102// than using events because event functions use kernel calls. 101// than using events because event functions use kernel calls.
103inline void YieldCPU() 102inline void YieldCPU() {
104{
105 std::this_thread::yield(); 103 std::this_thread::yield();
106} 104}
107 105
diff --git a/src/common/x64/emitter.cpp b/src/common/x64/emitter.cpp
index 1dcf2416c..5662f7f86 100644
--- a/src/common/x64/emitter.cpp
+++ b/src/common/x64/emitter.cpp
@@ -455,6 +455,18 @@ void XEmitter::CALL(const void* fnptr)
455 Write32(u32(distance)); 455 Write32(u32(distance));
456} 456}
457 457
458FixupBranch XEmitter::CALL()
459{
460 FixupBranch branch;
461 branch.type = 1;
462 branch.ptr = code + 5;
463
464 Write8(0xE8);
465 Write32(0);
466
467 return branch;
468}
469
458FixupBranch XEmitter::J(bool force5bytes) 470FixupBranch XEmitter::J(bool force5bytes)
459{ 471{
460 FixupBranch branch; 472 FixupBranch branch;
@@ -531,6 +543,22 @@ void XEmitter::SetJumpTarget(const FixupBranch& branch)
531 } 543 }
532} 544}
533 545
546void XEmitter::SetJumpTarget(const FixupBranch& branch, const u8* target)
547{
548 if (branch.type == 0)
549 {
550 s64 distance = (s64)(target - branch.ptr);
551 ASSERT_MSG(distance >= -0x80 && distance < 0x80, "Jump target too far away, needs force5Bytes = true");
552 branch.ptr[-1] = (u8)(s8)distance;
553 }
554 else if (branch.type == 1)
555 {
556 s64 distance = (s64)(target - branch.ptr);
557 ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL, "Jump target too far away, needs indirect register");
558 ((s32*)branch.ptr)[-1] = (s32)distance;
559 }
560}
561
534//Single byte opcodes 562//Single byte opcodes
535//There is no PUSHAD/POPAD in 64-bit mode. 563//There is no PUSHAD/POPAD in 64-bit mode.
536void XEmitter::INT3() {Write8(0xCC);} 564void XEmitter::INT3() {Write8(0xCC);}
diff --git a/src/common/x64/emitter.h b/src/common/x64/emitter.h
index 7c6548fb5..60a77dfe1 100644
--- a/src/common/x64/emitter.h
+++ b/src/common/x64/emitter.h
@@ -17,6 +17,8 @@
17 17
18#pragma once 18#pragma once
19 19
20#include <cstddef>
21
20#include "common/assert.h" 22#include "common/assert.h"
21#include "common/bit_set.h" 23#include "common/bit_set.h"
22#include "common/common_types.h" 24#include "common/common_types.h"
@@ -425,12 +427,14 @@ public:
425#undef CALL 427#undef CALL
426#endif 428#endif
427 void CALL(const void* fnptr); 429 void CALL(const void* fnptr);
430 FixupBranch CALL();
428 void CALLptr(OpArg arg); 431 void CALLptr(OpArg arg);
429 432
430 FixupBranch J_CC(CCFlags conditionCode, bool force5bytes = false); 433 FixupBranch J_CC(CCFlags conditionCode, bool force5bytes = false);
431 void J_CC(CCFlags conditionCode, const u8* addr, bool force5Bytes = false); 434 void J_CC(CCFlags conditionCode, const u8* addr, bool force5Bytes = false);
432 435
433 void SetJumpTarget(const FixupBranch& branch); 436 void SetJumpTarget(const FixupBranch& branch);
437 void SetJumpTarget(const FixupBranch& branch, const u8* target);
434 438
435 void SETcc(CCFlags flag, OpArg dest); 439 void SETcc(CCFlags flag, OpArg dest);
436 // Note: CMOV brings small if any benefit on current cpus. 440 // Note: CMOV brings small if any benefit on current cpus.
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index a8d891689..12080a802 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -42,6 +42,7 @@ set(SRCS
42 hle/kernel/timer.cpp 42 hle/kernel/timer.cpp
43 hle/kernel/vm_manager.cpp 43 hle/kernel/vm_manager.cpp
44 hle/service/ac_u.cpp 44 hle/service/ac_u.cpp
45 hle/service/act_a.cpp
45 hle/service/act_u.cpp 46 hle/service/act_u.cpp
46 hle/service/am/am.cpp 47 hle/service/am/am.cpp
47 hle/service/am/am_app.cpp 48 hle/service/am/am_app.cpp
@@ -52,6 +53,7 @@ set(SRCS
52 hle/service/apt/apt_a.cpp 53 hle/service/apt/apt_a.cpp
53 hle/service/apt/apt_s.cpp 54 hle/service/apt/apt_s.cpp
54 hle/service/apt/apt_u.cpp 55 hle/service/apt/apt_u.cpp
56 hle/service/apt/bcfnt/bcfnt.cpp
55 hle/service/boss/boss.cpp 57 hle/service/boss/boss.cpp
56 hle/service/boss/boss_p.cpp 58 hle/service/boss/boss_p.cpp
57 hle/service/boss/boss_u.cpp 59 hle/service/boss/boss_u.cpp
@@ -175,6 +177,7 @@ set(HEADERS
175 hle/kernel/vm_manager.h 177 hle/kernel/vm_manager.h
176 hle/result.h 178 hle/result.h
177 hle/service/ac_u.h 179 hle/service/ac_u.h
180 hle/service/act_a.h
178 hle/service/act_u.h 181 hle/service/act_u.h
179 hle/service/am/am.h 182 hle/service/am/am.h
180 hle/service/am/am_app.h 183 hle/service/am/am_app.h
@@ -185,6 +188,7 @@ set(HEADERS
185 hle/service/apt/apt_a.h 188 hle/service/apt/apt_a.h
186 hle/service/apt/apt_s.h 189 hle/service/apt/apt_s.h
187 hle/service/apt/apt_u.h 190 hle/service/apt/apt_u.h
191 hle/service/apt/bcfnt/bcfnt.h
188 hle/service/boss/boss.h 192 hle/service/boss/boss.h
189 hle/service/boss/boss_p.h 193 hle/service/boss/boss_p.h
190 hle/service/boss/boss_u.h 194 hle/service/boss/boss_u.h
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index 533067d4f..d8abe5aeb 100644
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -6,6 +6,7 @@
6 6
7#include "common/common_types.h" 7#include "common/common_types.h"
8#include "core/arm/skyeye_common/arm_regformat.h" 8#include "core/arm/skyeye_common/arm_regformat.h"
9#include "core/arm/skyeye_common/vfp/asm_vfp.h"
9 10
10namespace Core { 11namespace Core {
11 struct ThreadContext; 12 struct ThreadContext;
diff --git a/src/core/arm/dyncom/arm_dyncom.cpp b/src/core/arm/dyncom/arm_dyncom.cpp
index a3581132c..13492a08b 100644
--- a/src/core/arm/dyncom/arm_dyncom.cpp
+++ b/src/core/arm/dyncom/arm_dyncom.cpp
@@ -93,7 +93,7 @@ void ARM_DynCom::ResetContext(Core::ThreadContext& context, u32 stack_top, u32 e
93 context.cpu_registers[0] = arg; 93 context.cpu_registers[0] = arg;
94 context.pc = entry_point; 94 context.pc = entry_point;
95 context.sp = stack_top; 95 context.sp = stack_top;
96 context.cpsr = 0x1F | ((entry_point & 1) << 5); // Usermode and THUMB mode 96 context.cpsr = USER32MODE | ((entry_point & 1) << 5); // Usermode and THUMB mode
97} 97}
98 98
99void ARM_DynCom::SaveContext(Core::ThreadContext& ctx) { 99void ARM_DynCom::SaveContext(Core::ThreadContext& ctx) {
diff --git a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
index a6faf42b9..cfc67287f 100644
--- a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
+++ b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
@@ -10,7 +10,6 @@
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "common/logging/log.h" 11#include "common/logging/log.h"
12#include "common/microprofile.h" 12#include "common/microprofile.h"
13#include "common/profiler.h"
14 13
15#include "core/memory.h" 14#include "core/memory.h"
16#include "core/hle/svc.h" 15#include "core/hle/svc.h"
@@ -25,9 +24,6 @@
25 24
26#include "core/gdbstub/gdbstub.h" 25#include "core/gdbstub/gdbstub.h"
27 26
28Common::Profiling::TimingCategory profile_execute("DynCom::Execute");
29Common::Profiling::TimingCategory profile_decode("DynCom::Decode");
30
31enum { 27enum {
32 COND = (1 << 0), 28 COND = (1 << 0),
33 NON_BRANCH = (1 << 1), 29 NON_BRANCH = (1 << 1),
@@ -3496,7 +3492,6 @@ static unsigned int InterpreterTranslateInstruction(const ARMul_State* cpu, cons
3496} 3492}
3497 3493
3498static int InterpreterTranslateBlock(ARMul_State* cpu, int& bb_start, u32 addr) { 3494static int InterpreterTranslateBlock(ARMul_State* cpu, int& bb_start, u32 addr) {
3499 Common::Profiling::ScopeTimer timer_decode(profile_decode);
3500 MICROPROFILE_SCOPE(DynCom_Decode); 3495 MICROPROFILE_SCOPE(DynCom_Decode);
3501 3496
3502 // Decode instruction, get index 3497 // Decode instruction, get index
@@ -3530,7 +3525,6 @@ static int InterpreterTranslateBlock(ARMul_State* cpu, int& bb_start, u32 addr)
3530} 3525}
3531 3526
3532static int InterpreterTranslateSingle(ARMul_State* cpu, int& bb_start, u32 addr) { 3527static int InterpreterTranslateSingle(ARMul_State* cpu, int& bb_start, u32 addr) {
3533 Common::Profiling::ScopeTimer timer_decode(profile_decode);
3534 MICROPROFILE_SCOPE(DynCom_Decode); 3528 MICROPROFILE_SCOPE(DynCom_Decode);
3535 3529
3536 ARM_INST_PTR inst_base = nullptr; 3530 ARM_INST_PTR inst_base = nullptr;
@@ -3565,7 +3559,6 @@ static int clz(unsigned int x) {
3565MICROPROFILE_DEFINE(DynCom_Execute, "DynCom", "Execute", MP_RGB(255, 0, 0)); 3559MICROPROFILE_DEFINE(DynCom_Execute, "DynCom", "Execute", MP_RGB(255, 0, 0));
3566 3560
3567unsigned InterpreterMainLoop(ARMul_State* cpu) { 3561unsigned InterpreterMainLoop(ARMul_State* cpu) {
3568 Common::Profiling::ScopeTimer timer_execute(profile_execute);
3569 MICROPROFILE_SCOPE(DynCom_Execute); 3562 MICROPROFILE_SCOPE(DynCom_Execute);
3570 3563
3571 GDBStub::BreakpointAddress breakpoint_data; 3564 GDBStub::BreakpointAddress breakpoint_data;
@@ -4080,11 +4073,12 @@ unsigned InterpreterMainLoop(ARMul_State* cpu) {
4080 if ((inst_base->cond == ConditionCode::AL) || CondPassed(cpu, inst_base->cond)) { 4073 if ((inst_base->cond == ConditionCode::AL) || CondPassed(cpu, inst_base->cond)) {
4081 unsigned int inst = inst_cream->inst; 4074 unsigned int inst = inst_cream->inst;
4082 if (BITS(inst, 20, 27) == 0x12 && BITS(inst, 4, 7) == 0x3) { 4075 if (BITS(inst, 20, 27) == 0x12 && BITS(inst, 4, 7) == 0x3) {
4076 const u32 jump_address = cpu->Reg[inst_cream->val.Rm];
4083 cpu->Reg[14] = (cpu->Reg[15] + cpu->GetInstructionSize()); 4077 cpu->Reg[14] = (cpu->Reg[15] + cpu->GetInstructionSize());
4084 if(cpu->TFlag) 4078 if(cpu->TFlag)
4085 cpu->Reg[14] |= 0x1; 4079 cpu->Reg[14] |= 0x1;
4086 cpu->Reg[15] = cpu->Reg[inst_cream->val.Rm] & 0xfffffffe; 4080 cpu->Reg[15] = jump_address & 0xfffffffe;
4087 cpu->TFlag = cpu->Reg[inst_cream->val.Rm] & 0x1; 4081 cpu->TFlag = jump_address & 0x1;
4088 } else { 4082 } else {
4089 cpu->Reg[14] = (cpu->Reg[15] + cpu->GetInstructionSize()); 4083 cpu->Reg[14] = (cpu->Reg[15] + cpu->GetInstructionSize());
4090 cpu->TFlag = 0x1; 4084 cpu->TFlag = 0x1;
@@ -5533,28 +5527,32 @@ unsigned InterpreterMainLoop(ARMul_State* cpu) {
5533 5527
5534 // SMUAD and SMLAD 5528 // SMUAD and SMLAD
5535 if (BIT(op2, 1) == 0) { 5529 if (BIT(op2, 1) == 0) {
5536 RD = (product1 + product2); 5530 u32 rd_val = (product1 + product2);
5537 5531
5538 if (inst_cream->Ra != 15) { 5532 if (inst_cream->Ra != 15) {
5539 RD += cpu->Reg[inst_cream->Ra]; 5533 rd_val += cpu->Reg[inst_cream->Ra];
5540 5534
5541 if (ARMul_AddOverflowQ(product1 + product2, cpu->Reg[inst_cream->Ra])) 5535 if (ARMul_AddOverflowQ(product1 + product2, cpu->Reg[inst_cream->Ra]))
5542 cpu->Cpsr |= (1 << 27); 5536 cpu->Cpsr |= (1 << 27);
5543 } 5537 }
5544 5538
5539 RD = rd_val;
5540
5545 if (ARMul_AddOverflowQ(product1, product2)) 5541 if (ARMul_AddOverflowQ(product1, product2))
5546 cpu->Cpsr |= (1 << 27); 5542 cpu->Cpsr |= (1 << 27);
5547 } 5543 }
5548 // SMUSD and SMLSD 5544 // SMUSD and SMLSD
5549 else { 5545 else {
5550 RD = (product1 - product2); 5546 u32 rd_val = (product1 - product2);
5551 5547
5552 if (inst_cream->Ra != 15) { 5548 if (inst_cream->Ra != 15) {
5553 RD += cpu->Reg[inst_cream->Ra]; 5549 rd_val += cpu->Reg[inst_cream->Ra];
5554 5550
5555 if (ARMul_AddOverflowQ(product1 - product2, cpu->Reg[inst_cream->Ra])) 5551 if (ARMul_AddOverflowQ(product1 - product2, cpu->Reg[inst_cream->Ra]))
5556 cpu->Cpsr |= (1 << 27); 5552 cpu->Cpsr |= (1 << 27);
5557 } 5553 }
5554
5555 RD = rd_val;
5558 } 5556 }
5559 } 5557 }
5560 5558
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 3bb843aab..cabab744a 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -51,7 +51,7 @@ void RunLoop(int tight_loop) {
51 } 51 }
52 52
53 HW::Update(); 53 HW::Update();
54 if (HLE::g_reschedule) { 54 if (HLE::IsReschedulePending()) {
55 Kernel::Reschedule(); 55 Kernel::Reschedule();
56 } 56 }
57} 57}
diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp
index c1a7ec5bf..820b19e1a 100644
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -374,7 +374,7 @@ static void SendReply(const char* reply) {
374 374
375 memset(command_buffer, 0, sizeof(command_buffer)); 375 memset(command_buffer, 0, sizeof(command_buffer));
376 376
377 command_length = strlen(reply); 377 command_length = static_cast<u32>(strlen(reply));
378 if (command_length + 4 > sizeof(command_buffer)) { 378 if (command_length + 4 > sizeof(command_buffer)) {
379 LOG_ERROR(Debug_GDBStub, "command_buffer overflow in SendReply"); 379 LOG_ERROR(Debug_GDBStub, "command_buffer overflow in SendReply");
380 return; 380 return;
@@ -437,7 +437,7 @@ static void HandleSetThread() {
437 * 437 *
438 * @param signal Signal to be sent to client. 438 * @param signal Signal to be sent to client.
439 */ 439 */
440void SendSignal(u32 signal) { 440static void SendSignal(u32 signal) {
441 if (gdbserver_socket == -1) { 441 if (gdbserver_socket == -1) {
442 return; 442 return;
443 } 443 }
@@ -515,7 +515,7 @@ static bool IsDataAvailable() {
515 return false; 515 return false;
516 } 516 }
517 517
518 return FD_ISSET(gdbserver_socket, &fd_socket); 518 return FD_ISSET(gdbserver_socket, &fd_socket) != 0;
519} 519}
520 520
521/// Send requested register to gdb client. 521/// Send requested register to gdb client.
@@ -529,7 +529,7 @@ static void ReadRegister() {
529 id |= HexCharToValue(command_buffer[2]); 529 id |= HexCharToValue(command_buffer[2]);
530 } 530 }
531 531
532 if (id >= R0_REGISTER && id <= R15_REGISTER) { 532 if (id <= R15_REGISTER) {
533 IntToGdbHex(reply, Core::g_app_core->GetReg(id)); 533 IntToGdbHex(reply, Core::g_app_core->GetReg(id));
534 } else if (id == CPSR_REGISTER) { 534 } else if (id == CPSR_REGISTER) {
535 IntToGdbHex(reply, Core::g_app_core->GetCPSR()); 535 IntToGdbHex(reply, Core::g_app_core->GetCPSR());
@@ -584,7 +584,7 @@ static void WriteRegister() {
584 id |= HexCharToValue(command_buffer[2]); 584 id |= HexCharToValue(command_buffer[2]);
585 } 585 }
586 586
587 if (id >= R0_REGISTER && id <= R15_REGISTER) { 587 if (id <= R15_REGISTER) {
588 Core::g_app_core->SetReg(id, GdbHexToInt(buffer_ptr)); 588 Core::g_app_core->SetReg(id, GdbHexToInt(buffer_ptr));
589 } else if (id == CPSR_REGISTER) { 589 } else if (id == CPSR_REGISTER) {
590 Core::g_app_core->SetCPSR(GdbHexToInt(buffer_ptr)); 590 Core::g_app_core->SetCPSR(GdbHexToInt(buffer_ptr));
@@ -633,10 +633,10 @@ static void ReadMemory() {
633 633
634 auto start_offset = command_buffer+1; 634 auto start_offset = command_buffer+1;
635 auto addr_pos = std::find(start_offset, command_buffer+command_length, ','); 635 auto addr_pos = std::find(start_offset, command_buffer+command_length, ',');
636 PAddr addr = HexToInt(start_offset, addr_pos - start_offset); 636 PAddr addr = HexToInt(start_offset, static_cast<u32>(addr_pos - start_offset));
637 637
638 start_offset = addr_pos+1; 638 start_offset = addr_pos+1;
639 u32 len = HexToInt(start_offset, (command_buffer + command_length) - start_offset); 639 u32 len = HexToInt(start_offset, static_cast<u32>((command_buffer + command_length) - start_offset));
640 640
641 LOG_DEBUG(Debug_GDBStub, "gdb: addr: %08x len: %08x\n", addr, len); 641 LOG_DEBUG(Debug_GDBStub, "gdb: addr: %08x len: %08x\n", addr, len);
642 642
@@ -658,11 +658,11 @@ static void ReadMemory() {
658static void WriteMemory() { 658static void WriteMemory() {
659 auto start_offset = command_buffer+1; 659 auto start_offset = command_buffer+1;
660 auto addr_pos = std::find(start_offset, command_buffer+command_length, ','); 660 auto addr_pos = std::find(start_offset, command_buffer+command_length, ',');
661 PAddr addr = HexToInt(start_offset, addr_pos - start_offset); 661 PAddr addr = HexToInt(start_offset, static_cast<u32>(addr_pos - start_offset));
662 662
663 start_offset = addr_pos+1; 663 start_offset = addr_pos+1;
664 auto len_pos = std::find(start_offset, command_buffer+command_length, ':'); 664 auto len_pos = std::find(start_offset, command_buffer+command_length, ':');
665 u32 len = HexToInt(start_offset, len_pos - start_offset); 665 u32 len = HexToInt(start_offset, static_cast<u32>(len_pos - start_offset));
666 666
667 u8* dst = Memory::GetPointer(addr); 667 u8* dst = Memory::GetPointer(addr);
668 if (!dst) { 668 if (!dst) {
@@ -713,7 +713,7 @@ static void Continue() {
713 * @param addr Address of breakpoint. 713 * @param addr Address of breakpoint.
714 * @param len Length of breakpoint. 714 * @param len Length of breakpoint.
715 */ 715 */
716bool CommitBreakpoint(BreakpointType type, PAddr addr, u32 len) { 716static bool CommitBreakpoint(BreakpointType type, PAddr addr, u32 len) {
717 std::map<u32, Breakpoint>& p = GetBreakpointList(type); 717 std::map<u32, Breakpoint>& p = GetBreakpointList(type);
718 718
719 Breakpoint breakpoint; 719 Breakpoint breakpoint;
@@ -752,10 +752,10 @@ static void AddBreakpoint() {
752 752
753 auto start_offset = command_buffer+3; 753 auto start_offset = command_buffer+3;
754 auto addr_pos = std::find(start_offset, command_buffer+command_length, ','); 754 auto addr_pos = std::find(start_offset, command_buffer+command_length, ',');
755 PAddr addr = HexToInt(start_offset, addr_pos - start_offset); 755 PAddr addr = HexToInt(start_offset, static_cast<u32>(addr_pos - start_offset));
756 756
757 start_offset = addr_pos+1; 757 start_offset = addr_pos+1;
758 u32 len = HexToInt(start_offset, (command_buffer + command_length) - start_offset); 758 u32 len = HexToInt(start_offset, static_cast<u32>((command_buffer + command_length) - start_offset));
759 759
760 if (type == BreakpointType::Access) { 760 if (type == BreakpointType::Access) {
761 // Access is made up of Read and Write types, so add both breakpoints 761 // Access is made up of Read and Write types, so add both breakpoints
@@ -800,10 +800,10 @@ static void RemoveBreakpoint() {
800 800
801 auto start_offset = command_buffer+3; 801 auto start_offset = command_buffer+3;
802 auto addr_pos = std::find(start_offset, command_buffer+command_length, ','); 802 auto addr_pos = std::find(start_offset, command_buffer+command_length, ',');
803 PAddr addr = HexToInt(start_offset, addr_pos - start_offset); 803 PAddr addr = HexToInt(start_offset, static_cast<u32>(addr_pos - start_offset));
804 804
805 start_offset = addr_pos+1; 805 start_offset = addr_pos+1;
806 u32 len = HexToInt(start_offset, (command_buffer + command_length) - start_offset); 806 u32 len = HexToInt(start_offset, static_cast<u32>((command_buffer + command_length) - start_offset));
807 807
808 if (type == BreakpointType::Access) { 808 if (type == BreakpointType::Access) {
809 // Access is made up of Read and Write types, so add both breakpoints 809 // Access is made up of Read and Write types, so add both breakpoints
@@ -907,7 +907,7 @@ void ToggleServer(bool status) {
907 } 907 }
908} 908}
909 909
910void Init(u16 port) { 910static void Init(u16 port) {
911 if (!g_server_enabled) { 911 if (!g_server_enabled) {
912 // Set the halt loop to false in case the user enabled the gdbstub mid-execution. 912 // Set the halt loop to false in case the user enabled the gdbstub mid-execution.
913 // This way the CPU can still execute normally. 913 // This way the CPU can still execute normally.
diff --git a/src/core/hle/applets/applet.h b/src/core/hle/applets/applet.h
index af442f81d..754c6f7db 100644
--- a/src/core/hle/applets/applet.h
+++ b/src/core/hle/applets/applet.h
@@ -65,6 +65,7 @@ protected:
65 virtual ResultCode StartImpl(const Service::APT::AppletStartupParameter& parameter) = 0; 65 virtual ResultCode StartImpl(const Service::APT::AppletStartupParameter& parameter) = 0;
66 66
67 Service::APT::AppletId id; ///< Id of this Applet 67 Service::APT::AppletId id; ///< Id of this Applet
68 std::shared_ptr<std::vector<u8>> heap_memory; ///< Heap memory for this Applet
68}; 69};
69 70
70/// Returns whether a library applet is currently running 71/// Returns whether a library applet is currently running
diff --git a/src/core/hle/applets/mii_selector.cpp b/src/core/hle/applets/mii_selector.cpp
index 708d2f630..bf39eca22 100644
--- a/src/core/hle/applets/mii_selector.cpp
+++ b/src/core/hle/applets/mii_selector.cpp
@@ -21,13 +21,6 @@
21namespace HLE { 21namespace HLE {
22namespace Applets { 22namespace Applets {
23 23
24MiiSelector::MiiSelector(Service::APT::AppletId id) : Applet(id), started(false) {
25 // Create the SharedMemory that will hold the framebuffer data
26 // TODO(Subv): What size should we use here?
27 using Kernel::MemoryPermission;
28 framebuffer_memory = Kernel::SharedMemory::Create(0x1000, MemoryPermission::ReadWrite, MemoryPermission::ReadWrite, "MiiSelector Memory");
29}
30
31ResultCode MiiSelector::ReceiveParameter(const Service::APT::MessageParameter& parameter) { 24ResultCode MiiSelector::ReceiveParameter(const Service::APT::MessageParameter& parameter) {
32 if (parameter.signal != static_cast<u32>(Service::APT::SignalType::LibAppJustStarted)) { 25 if (parameter.signal != static_cast<u32>(Service::APT::SignalType::LibAppJustStarted)) {
33 LOG_ERROR(Service_APT, "unsupported signal %u", parameter.signal); 26 LOG_ERROR(Service_APT, "unsupported signal %u", parameter.signal);
@@ -36,8 +29,23 @@ ResultCode MiiSelector::ReceiveParameter(const Service::APT::MessageParameter& p
36 return ResultCode(-1); 29 return ResultCode(-1);
37 } 30 }
38 31
32 // The LibAppJustStarted message contains a buffer with the size of the framebuffer shared memory.
33 // Create the SharedMemory that will hold the framebuffer data
34 Service::APT::CaptureBufferInfo capture_info;
35 ASSERT(sizeof(capture_info) == parameter.buffer_size);
36
37 memcpy(&capture_info, parameter.data, sizeof(capture_info));
38
39 using Kernel::MemoryPermission;
40 // Allocate a heap block of the required size for this applet.
41 heap_memory = std::make_shared<std::vector<u8>>(capture_info.size);
42 // Create a SharedMemory that directly points to this heap block.
43 framebuffer_memory = Kernel::SharedMemory::CreateForApplet(heap_memory, 0, heap_memory->size(),
44 MemoryPermission::ReadWrite, MemoryPermission::ReadWrite,
45 "MiiSelector Memory");
46
47 // Send the response message with the newly created SharedMemory
39 Service::APT::MessageParameter result; 48 Service::APT::MessageParameter result;
40 // The buffer passed in parameter contains the data returned by GSPGPU::ImportDisplayCaptureInfo
41 result.signal = static_cast<u32>(Service::APT::SignalType::LibAppFinished); 49 result.signal = static_cast<u32>(Service::APT::SignalType::LibAppFinished);
42 result.data = nullptr; 50 result.data = nullptr;
43 result.buffer_size = 0; 51 result.buffer_size = 0;
@@ -55,6 +63,11 @@ ResultCode MiiSelector::StartImpl(const Service::APT::AppletStartupParameter& pa
55 // TODO(Subv): Set the expected fields in the response buffer before resending it to the application. 63 // TODO(Subv): Set the expected fields in the response buffer before resending it to the application.
56 // TODO(Subv): Reverse the parameter format for the Mii Selector 64 // TODO(Subv): Reverse the parameter format for the Mii Selector
57 65
66 if(parameter.buffer_size >= sizeof(u32)) {
67 // TODO: defaults return no error, but garbage in other unknown fields
68 memset(parameter.data, 0, sizeof(u32));
69 }
70
58 // Let the application know that we're closing 71 // Let the application know that we're closing
59 Service::APT::MessageParameter message; 72 Service::APT::MessageParameter message;
60 message.buffer_size = parameter.buffer_size; 73 message.buffer_size = parameter.buffer_size;
diff --git a/src/core/hle/applets/mii_selector.h b/src/core/hle/applets/mii_selector.h
index 6a3e7c8eb..be6b04642 100644
--- a/src/core/hle/applets/mii_selector.h
+++ b/src/core/hle/applets/mii_selector.h
@@ -16,17 +16,61 @@
16namespace HLE { 16namespace HLE {
17namespace Applets { 17namespace Applets {
18 18
19struct MiiConfig {
20 u8 unk_000;
21 u8 unk_001;
22 u8 unk_002;
23 u8 unk_003;
24 u8 unk_004;
25 INSERT_PADDING_BYTES(3);
26 u16 unk_008;
27 INSERT_PADDING_BYTES(0x8C - 0xA);
28 u8 unk_08C;
29 INSERT_PADDING_BYTES(3);
30 u16 unk_090;
31 INSERT_PADDING_BYTES(2);
32 u32 unk_094;
33 u16 unk_098;
34 u8 unk_09A[0x64];
35 u8 unk_0FE;
36 u8 unk_0FF;
37 u32 unk_100;
38};
39
40static_assert(sizeof(MiiConfig) == 0x104, "MiiConfig structure has incorrect size");
41#define ASSERT_REG_POSITION(field_name, position) static_assert(offsetof(MiiConfig, field_name) == position, "Field "#field_name" has invalid position")
42ASSERT_REG_POSITION(unk_008, 0x08);
43ASSERT_REG_POSITION(unk_08C, 0x8C);
44ASSERT_REG_POSITION(unk_090, 0x90);
45ASSERT_REG_POSITION(unk_094, 0x94);
46ASSERT_REG_POSITION(unk_0FE, 0xFE);
47#undef ASSERT_REG_POSITION
48
49struct MiiResult {
50 u32 result_code;
51 u8 unk_04;
52 INSERT_PADDING_BYTES(7);
53 u8 unk_0C[0x60];
54 u8 unk_6C[0x16];
55 INSERT_PADDING_BYTES(2);
56};
57static_assert(sizeof(MiiResult) == 0x84, "MiiResult structure has incorrect size");
58#define ASSERT_REG_POSITION(field_name, position) static_assert(offsetof(MiiResult, field_name) == position, "Field "#field_name" has invalid position")
59ASSERT_REG_POSITION(unk_0C, 0x0C);
60ASSERT_REG_POSITION(unk_6C, 0x6C);
61#undef ASSERT_REG_POSITION
62
19class MiiSelector final : public Applet { 63class MiiSelector final : public Applet {
20public: 64public:
21 MiiSelector(Service::APT::AppletId id); 65 MiiSelector(Service::APT::AppletId id) : Applet(id), started(false) { }
22 66
23 ResultCode ReceiveParameter(const Service::APT::MessageParameter& parameter) override; 67 ResultCode ReceiveParameter(const Service::APT::MessageParameter& parameter) override;
24 ResultCode StartImpl(const Service::APT::AppletStartupParameter& parameter) override; 68 ResultCode StartImpl(const Service::APT::AppletStartupParameter& parameter) override;
25 void Update() override; 69 void Update() override;
26 bool IsRunning() const override { return started; } 70 bool IsRunning() const override { return started; }
27 71
28 /// TODO(Subv): Find out what this is actually used for. 72 /// This SharedMemory will be created when we receive the LibAppJustStarted message.
29 /// It is believed that the application stores the current screen image here. 73 /// It holds the framebuffer info retrieved by the application with GSPGPU::ImportDisplayCaptureInfo
30 Kernel::SharedPtr<Kernel::SharedMemory> framebuffer_memory; 74 Kernel::SharedPtr<Kernel::SharedMemory> framebuffer_memory;
31 75
32 /// Whether this applet is currently running instead of the host application or not. 76 /// Whether this applet is currently running instead of the host application or not.
diff --git a/src/core/hle/applets/swkbd.cpp b/src/core/hle/applets/swkbd.cpp
index 1db6b5a17..90c6adc65 100644
--- a/src/core/hle/applets/swkbd.cpp
+++ b/src/core/hle/applets/swkbd.cpp
@@ -24,13 +24,6 @@
24namespace HLE { 24namespace HLE {
25namespace Applets { 25namespace Applets {
26 26
27SoftwareKeyboard::SoftwareKeyboard(Service::APT::AppletId id) : Applet(id), started(false) {
28 // Create the SharedMemory that will hold the framebuffer data
29 // TODO(Subv): What size should we use here?
30 using Kernel::MemoryPermission;
31 framebuffer_memory = Kernel::SharedMemory::Create(0x1000, MemoryPermission::ReadWrite, MemoryPermission::ReadWrite, "SoftwareKeyboard Memory");
32}
33
34ResultCode SoftwareKeyboard::ReceiveParameter(Service::APT::MessageParameter const& parameter) { 27ResultCode SoftwareKeyboard::ReceiveParameter(Service::APT::MessageParameter const& parameter) {
35 if (parameter.signal != static_cast<u32>(Service::APT::SignalType::LibAppJustStarted)) { 28 if (parameter.signal != static_cast<u32>(Service::APT::SignalType::LibAppJustStarted)) {
36 LOG_ERROR(Service_APT, "unsupported signal %u", parameter.signal); 29 LOG_ERROR(Service_APT, "unsupported signal %u", parameter.signal);
@@ -39,8 +32,23 @@ ResultCode SoftwareKeyboard::ReceiveParameter(Service::APT::MessageParameter con
39 return ResultCode(-1); 32 return ResultCode(-1);
40 } 33 }
41 34
35 // The LibAppJustStarted message contains a buffer with the size of the framebuffer shared memory.
36 // Create the SharedMemory that will hold the framebuffer data
37 Service::APT::CaptureBufferInfo capture_info;
38 ASSERT(sizeof(capture_info) == parameter.buffer_size);
39
40 memcpy(&capture_info, parameter.data, sizeof(capture_info));
41
42 using Kernel::MemoryPermission;
43 // Allocate a heap block of the required size for this applet.
44 heap_memory = std::make_shared<std::vector<u8>>(capture_info.size);
45 // Create a SharedMemory that directly points to this heap block.
46 framebuffer_memory = Kernel::SharedMemory::CreateForApplet(heap_memory, 0, heap_memory->size(),
47 MemoryPermission::ReadWrite, MemoryPermission::ReadWrite,
48 "SoftwareKeyboard Memory");
49
50 // Send the response message with the newly created SharedMemory
42 Service::APT::MessageParameter result; 51 Service::APT::MessageParameter result;
43 // The buffer passed in parameter contains the data returned by GSPGPU::ImportDisplayCaptureInfo
44 result.signal = static_cast<u32>(Service::APT::SignalType::LibAppFinished); 52 result.signal = static_cast<u32>(Service::APT::SignalType::LibAppFinished);
45 result.data = nullptr; 53 result.data = nullptr;
46 result.buffer_size = 0; 54 result.buffer_size = 0;
diff --git a/src/core/hle/applets/swkbd.h b/src/core/hle/applets/swkbd.h
index cb95b8d90..cf26a8fb7 100644
--- a/src/core/hle/applets/swkbd.h
+++ b/src/core/hle/applets/swkbd.h
@@ -53,8 +53,7 @@ static_assert(sizeof(SoftwareKeyboardConfig) == 0x400, "Software Keyboard Config
53 53
54class SoftwareKeyboard final : public Applet { 54class SoftwareKeyboard final : public Applet {
55public: 55public:
56 SoftwareKeyboard(Service::APT::AppletId id); 56 SoftwareKeyboard(Service::APT::AppletId id) : Applet(id), started(false) { }
57 ~SoftwareKeyboard() {}
58 57
59 ResultCode ReceiveParameter(const Service::APT::MessageParameter& parameter) override; 58 ResultCode ReceiveParameter(const Service::APT::MessageParameter& parameter) override;
60 ResultCode StartImpl(const Service::APT::AppletStartupParameter& parameter) override; 59 ResultCode StartImpl(const Service::APT::AppletStartupParameter& parameter) override;
@@ -72,8 +71,8 @@ public:
72 */ 71 */
73 void Finalize(); 72 void Finalize();
74 73
75 /// TODO(Subv): Find out what this is actually used for. 74 /// This SharedMemory will be created when we receive the LibAppJustStarted message.
76 /// It is believed that the application stores the current screen image here. 75 /// It holds the framebuffer info retrieved by the application with GSPGPU::ImportDisplayCaptureInfo
77 Kernel::SharedPtr<Kernel::SharedMemory> framebuffer_memory; 76 Kernel::SharedPtr<Kernel::SharedMemory> framebuffer_memory;
78 77
79 /// SharedMemory where the output text will be stored 78 /// SharedMemory where the output text will be stored
diff --git a/src/core/hle/config_mem.cpp b/src/core/hle/config_mem.cpp
index b1a72dc0c..ccd73cfcb 100644
--- a/src/core/hle/config_mem.cpp
+++ b/src/core/hle/config_mem.cpp
@@ -3,13 +3,6 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cstring> 5#include <cstring>
6
7#include "common/assert.h"
8#include "common/common_types.h"
9#include "common/common_funcs.h"
10
11#include "core/core.h"
12#include "core/memory.h"
13#include "core/hle/config_mem.h" 6#include "core/hle/config_mem.h"
14 7
15//////////////////////////////////////////////////////////////////////////////////////////////////// 8////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/core/hle/function_wrappers.h b/src/core/hle/function_wrappers.h
index 4d718b681..bf7f875b6 100644
--- a/src/core/hle/function_wrappers.h
+++ b/src/core/hle/function_wrappers.h
@@ -170,7 +170,8 @@ template<ResultCode func(s64*, u32, s32)> void Wrap() {
170 170
171template<ResultCode func(u32*, u32, u32, u32, u32)> void Wrap() { 171template<ResultCode func(u32*, u32, u32, u32, u32)> void Wrap() {
172 u32 param_1 = 0; 172 u32 param_1 = 0;
173 u32 retval = func(&param_1, PARAM(1), PARAM(2), PARAM(3), PARAM(4)).raw; 173 // The last parameter is passed in R0 instead of R4
174 u32 retval = func(&param_1, PARAM(1), PARAM(2), PARAM(3), PARAM(0)).raw;
174 Core::g_app_core->SetReg(1, param_1); 175 Core::g_app_core->SetReg(1, param_1);
175 FuncReturn(retval); 176 FuncReturn(retval);
176} 177}
diff --git a/src/core/hle/hle.cpp b/src/core/hle/hle.cpp
index 331b1b22a..5c5373517 100644
--- a/src/core/hle/hle.cpp
+++ b/src/core/hle/hle.cpp
@@ -8,15 +8,17 @@
8#include "core/arm/arm_interface.h" 8#include "core/arm/arm_interface.h"
9#include "core/core.h" 9#include "core/core.h"
10#include "core/hle/hle.h" 10#include "core/hle/hle.h"
11#include "core/hle/config_mem.h"
12#include "core/hle/shared_page.h"
13#include "core/hle/service/service.h" 11#include "core/hle/service/service.h"
14 12
15//////////////////////////////////////////////////////////////////////////////////////////////////// 13////////////////////////////////////////////////////////////////////////////////////////////////////
16 14
17namespace HLE { 15namespace {
16
17bool reschedule; ///< If true, immediately reschedules the CPU to a new thread
18 18
19bool g_reschedule; ///< If true, immediately reschedules the CPU to a new thread 19}
20
21namespace HLE {
20 22
21void Reschedule(const char *reason) { 23void Reschedule(const char *reason) {
22 DEBUG_ASSERT_MSG(reason != nullptr && strlen(reason) < 256, "Reschedule: Invalid or too long reason."); 24 DEBUG_ASSERT_MSG(reason != nullptr && strlen(reason) < 256, "Reschedule: Invalid or too long reason.");
@@ -29,13 +31,21 @@ void Reschedule(const char *reason) {
29 31
30 Core::g_app_core->PrepareReschedule(); 32 Core::g_app_core->PrepareReschedule();
31 33
32 g_reschedule = true; 34 reschedule = true;
35}
36
37bool IsReschedulePending() {
38 return reschedule;
39}
40
41void DoneRescheduling() {
42 reschedule = false;
33} 43}
34 44
35void Init() { 45void Init() {
36 Service::Init(); 46 Service::Init();
37 47
38 g_reschedule = false; 48 reschedule = false;
39 49
40 LOG_DEBUG(Kernel, "initialized OK"); 50 LOG_DEBUG(Kernel, "initialized OK");
41} 51}
diff --git a/src/core/hle/hle.h b/src/core/hle/hle.h
index e0b97797c..69ac0ade6 100644
--- a/src/core/hle/hle.h
+++ b/src/core/hle/hle.h
@@ -13,9 +13,9 @@ const Handle INVALID_HANDLE = 0;
13 13
14namespace HLE { 14namespace HLE {
15 15
16extern bool g_reschedule; ///< If true, immediately reschedules the CPU to a new thread
17
18void Reschedule(const char *reason); 16void Reschedule(const char *reason);
17bool IsReschedulePending();
18void DoneRescheduling();
19 19
20void Init(); 20void Init();
21void Shutdown(); 21void Shutdown();
diff --git a/src/core/hle/kernel/memory.cpp b/src/core/hle/kernel/memory.cpp
index 862643448..17ae87aef 100644
--- a/src/core/hle/kernel/memory.cpp
+++ b/src/core/hle/kernel/memory.cpp
@@ -55,6 +55,9 @@ void MemoryInit(u32 mem_type) {
55 memory_regions[i].size = memory_region_sizes[mem_type][i]; 55 memory_regions[i].size = memory_region_sizes[mem_type][i];
56 memory_regions[i].used = 0; 56 memory_regions[i].used = 0;
57 memory_regions[i].linear_heap_memory = std::make_shared<std::vector<u8>>(); 57 memory_regions[i].linear_heap_memory = std::make_shared<std::vector<u8>>();
58 // Reserve enough space for this region of FCRAM.
59 // We do not want this block of memory to be relocated when allocating from it.
60 memory_regions[i].linear_heap_memory->reserve(memory_regions[i].size);
58 61
59 base += memory_regions[i].size; 62 base += memory_regions[i].size;
60 } 63 }
@@ -107,9 +110,7 @@ struct MemoryArea {
107 110
108// We don't declare the IO regions in here since its handled by other means. 111// We don't declare the IO regions in here since its handled by other means.
109static MemoryArea memory_areas[] = { 112static MemoryArea memory_areas[] = {
110 {SHARED_MEMORY_VADDR, SHARED_MEMORY_SIZE, "Shared Memory"}, // Shared memory
111 {VRAM_VADDR, VRAM_SIZE, "VRAM"}, // Video memory (VRAM) 113 {VRAM_VADDR, VRAM_SIZE, "VRAM"}, // Video memory (VRAM)
112 {TLS_AREA_VADDR, TLS_AREA_SIZE, "TLS Area"}, // TLS memory
113}; 114};
114 115
115} 116}
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index 0546f6e16..69302cc82 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -209,7 +209,7 @@ ResultVal<VAddr> Process::LinearAllocate(VAddr target, u32 size, VMAPermission p
209 return ERR_INVALID_ADDRESS; 209 return ERR_INVALID_ADDRESS;
210 } 210 }
211 211
212 // Expansion of the linear heap is only allowed if you do an allocation immediatelly at its 212 // Expansion of the linear heap is only allowed if you do an allocation immediately at its
213 // end. It's possible to free gaps in the middle of the heap and then reallocate them later, 213 // end. It's possible to free gaps in the middle of the heap and then reallocate them later,
214 // but expansions are only allowed at the end. 214 // but expansions are only allowed at the end.
215 if (target == heap_end) { 215 if (target == heap_end) {
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index 6d2ca96a2..d781ef32c 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -107,6 +107,8 @@ public:
107 ProcessFlags flags; 107 ProcessFlags flags;
108 /// Kernel compatibility version for this process 108 /// Kernel compatibility version for this process
109 u16 kernel_version = 0; 109 u16 kernel_version = 0;
110 /// The default CPU for this process, threads are scheduled on this cpu by default.
111 u8 ideal_processor = 0;
110 112
111 /// The id of this process 113 /// The id of this process
112 u32 process_id = next_process_id++; 114 u32 process_id = next_process_id++;
@@ -140,8 +142,11 @@ public:
140 142
141 MemoryRegionInfo* memory_region = nullptr; 143 MemoryRegionInfo* memory_region = nullptr;
142 144
143 /// Bitmask of the used TLS slots 145 /// The Thread Local Storage area is allocated as processes create threads,
144 std::bitset<300> used_tls_slots; 146 /// each TLS area is 0x200 bytes, so one page (0x1000) is split up in 8 parts, and each part
147 /// holds the TLS for a specific thread. This vector contains which parts are in use for each page as a bitmask.
148 /// This vector will grow as more pages are allocated for new threads.
149 std::vector<std::bitset<8>> tls_slots;
145 150
146 VAddr GetLinearHeapAreaAddress() const; 151 VAddr GetLinearHeapAreaAddress() const;
147 VAddr GetLinearHeapBase() const; 152 VAddr GetLinearHeapBase() const;
diff --git a/src/core/hle/kernel/shared_memory.cpp b/src/core/hle/kernel/shared_memory.cpp
index d90f0f00f..6a22c8986 100644
--- a/src/core/hle/kernel/shared_memory.cpp
+++ b/src/core/hle/kernel/shared_memory.cpp
@@ -7,6 +7,7 @@
7#include "common/logging/log.h" 7#include "common/logging/log.h"
8 8
9#include "core/memory.h" 9#include "core/memory.h"
10#include "core/hle/kernel/memory.h"
10#include "core/hle/kernel/shared_memory.h" 11#include "core/hle/kernel/shared_memory.h"
11 12
12namespace Kernel { 13namespace Kernel {
@@ -14,93 +15,157 @@ namespace Kernel {
14SharedMemory::SharedMemory() {} 15SharedMemory::SharedMemory() {}
15SharedMemory::~SharedMemory() {} 16SharedMemory::~SharedMemory() {}
16 17
17SharedPtr<SharedMemory> SharedMemory::Create(u32 size, MemoryPermission permissions, 18SharedPtr<SharedMemory> SharedMemory::Create(SharedPtr<Process> owner_process, u32 size, MemoryPermission permissions,
18 MemoryPermission other_permissions, std::string name) { 19 MemoryPermission other_permissions, VAddr address, MemoryRegion region, std::string name) {
19 SharedPtr<SharedMemory> shared_memory(new SharedMemory); 20 SharedPtr<SharedMemory> shared_memory(new SharedMemory);
20 21
22 shared_memory->owner_process = owner_process;
21 shared_memory->name = std::move(name); 23 shared_memory->name = std::move(name);
22 shared_memory->base_address = 0x0;
23 shared_memory->fixed_address = 0x0;
24 shared_memory->size = size; 24 shared_memory->size = size;
25 shared_memory->permissions = permissions; 25 shared_memory->permissions = permissions;
26 shared_memory->other_permissions = other_permissions; 26 shared_memory->other_permissions = other_permissions;
27 27
28 if (address == 0) {
29 // We need to allocate a block from the Linear Heap ourselves.
30 // We'll manually allocate some memory from the linear heap in the specified region.
31 MemoryRegionInfo* memory_region = GetMemoryRegion(region);
32 auto& linheap_memory = memory_region->linear_heap_memory;
33
34 ASSERT_MSG(linheap_memory->size() + size <= memory_region->size, "Not enough space in region to allocate shared memory!");
35
36 shared_memory->backing_block = linheap_memory;
37 shared_memory->backing_block_offset = linheap_memory->size();
38 // Allocate some memory from the end of the linear heap for this region.
39 linheap_memory->insert(linheap_memory->end(), size, 0);
40 memory_region->used += size;
41
42 shared_memory->linear_heap_phys_address = Memory::FCRAM_PADDR + memory_region->base + shared_memory->backing_block_offset;
43
44 // Increase the amount of used linear heap memory for the owner process.
45 if (shared_memory->owner_process != nullptr) {
46 shared_memory->owner_process->linear_heap_used += size;
47 }
48
49 // Refresh the address mappings for the current process.
50 if (Kernel::g_current_process != nullptr) {
51 Kernel::g_current_process->vm_manager.RefreshMemoryBlockMappings(linheap_memory.get());
52 }
53 } else {
54 // TODO(Subv): What happens if an application tries to create multiple memory blocks pointing to the same address?
55 auto& vm_manager = shared_memory->owner_process->vm_manager;
56 // The memory is already available and mapped in the owner process.
57 auto vma = vm_manager.FindVMA(address)->second;
58 // Copy it over to our own storage
59 shared_memory->backing_block = std::make_shared<std::vector<u8>>(vma.backing_block->data() + vma.offset,
60 vma.backing_block->data() + vma.offset + size);
61 shared_memory->backing_block_offset = 0;
62 // Unmap the existing pages
63 vm_manager.UnmapRange(address, size);
64 // Map our own block into the address space
65 vm_manager.MapMemoryBlock(address, shared_memory->backing_block, 0, size, MemoryState::Shared);
66 // Reprotect the block with the new permissions
67 vm_manager.ReprotectRange(address, size, ConvertPermissions(permissions));
68 }
69
70 shared_memory->base_address = address;
28 return shared_memory; 71 return shared_memory;
29} 72}
30 73
31ResultCode SharedMemory::Map(VAddr address, MemoryPermission permissions, 74SharedPtr<SharedMemory> SharedMemory::CreateForApplet(std::shared_ptr<std::vector<u8>> heap_block, u32 offset, u32 size,
32 MemoryPermission other_permissions) { 75 MemoryPermission permissions, MemoryPermission other_permissions, std::string name) {
76 SharedPtr<SharedMemory> shared_memory(new SharedMemory);
33 77
34 if (base_address != 0) { 78 shared_memory->owner_process = nullptr;
35 LOG_ERROR(Kernel, "cannot map id=%u, address=0x%08X name=%s: already mapped at 0x%08X!", 79 shared_memory->name = std::move(name);
36 GetObjectId(), address, name.c_str(), base_address); 80 shared_memory->size = size;
37 // TODO: Verify error code with hardware 81 shared_memory->permissions = permissions;
38 return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::Kernel, 82 shared_memory->other_permissions = other_permissions;
39 ErrorSummary::InvalidArgument, ErrorLevel::Permanent); 83 shared_memory->backing_block = heap_block;
40 } 84 shared_memory->backing_block_offset = offset;
85 shared_memory->base_address = Memory::HEAP_VADDR + offset;
41 86
42 // TODO(Subv): Return E0E01BEE when permissions and other_permissions don't 87 return shared_memory;
43 // match what was specified when the memory block was created. 88}
44 89
45 // TODO(Subv): Return E0E01BEE when address should be 0. 90ResultCode SharedMemory::Map(Process* target_process, VAddr address, MemoryPermission permissions,
46 // Note: Find out when that's the case. 91 MemoryPermission other_permissions) {
47 92
48 if (fixed_address != 0) { 93 MemoryPermission own_other_permissions = target_process == owner_process ? this->permissions : this->other_permissions;
49 if (address != 0 && address != fixed_address) {
50 LOG_ERROR(Kernel, "cannot map id=%u, address=0x%08X name=%s: fixed_addres is 0x%08X!",
51 GetObjectId(), address, name.c_str(), fixed_address);
52 // TODO: Verify error code with hardware
53 return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::Kernel,
54 ErrorSummary::InvalidArgument, ErrorLevel::Permanent);
55 }
56 94
57 // HACK(yuriks): This is only here to support the APT shared font mapping right now. 95 // Automatically allocated memory blocks can only be mapped with other_permissions = DontCare
58 // Later, this should actually map the memory block onto the address space. 96 if (base_address == 0 && other_permissions != MemoryPermission::DontCare) {
59 return RESULT_SUCCESS; 97 return ResultCode(ErrorDescription::InvalidCombination, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage);
60 } 98 }
61 99
62 if (address < Memory::SHARED_MEMORY_VADDR || address + size >= Memory::SHARED_MEMORY_VADDR_END) { 100 // Error out if the requested permissions don't match what the creator process allows.
63 LOG_ERROR(Kernel, "cannot map id=%u, address=0x%08X name=%s outside of shared mem bounds!", 101 if (static_cast<u32>(permissions) & ~static_cast<u32>(own_other_permissions)) {
64 GetObjectId(), address, name.c_str()); 102 LOG_ERROR(Kernel, "cannot map id=%u, address=0x%08X name=%s, permissions don't match",
65 // TODO: Verify error code with hardware 103 GetObjectId(), address, name.c_str());
66 return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::Kernel, 104 return ResultCode(ErrorDescription::InvalidCombination, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage);
67 ErrorSummary::InvalidArgument, ErrorLevel::Permanent);
68 } 105 }
69 106
70 // TODO: Test permissions 107 // Heap-backed memory blocks can not be mapped with other_permissions = DontCare
108 if (base_address != 0 && other_permissions == MemoryPermission::DontCare) {
109 LOG_ERROR(Kernel, "cannot map id=%u, address=0x%08X name=%s, permissions don't match",
110 GetObjectId(), address, name.c_str());
111 return ResultCode(ErrorDescription::InvalidCombination, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage);
112 }
71 113
72 // HACK: Since there's no way to write to the memory block without mapping it onto the game 114 // Error out if the provided permissions are not compatible with what the creator process needs.
73 // process yet, at least initialize memory the first time it's mapped. 115 if (other_permissions != MemoryPermission::DontCare &&
74 if (address != this->base_address) { 116 static_cast<u32>(this->permissions) & ~static_cast<u32>(other_permissions)) {
75 std::memset(Memory::GetPointer(address), 0, size); 117 LOG_ERROR(Kernel, "cannot map id=%u, address=0x%08X name=%s, permissions don't match",
118 GetObjectId(), address, name.c_str());
119 return ResultCode(ErrorDescription::WrongPermission, ErrorModule::OS, ErrorSummary::WrongArgument, ErrorLevel::Permanent);
76 } 120 }
77 121
78 this->base_address = address; 122 // TODO(Subv): Check for the Shared Device Mem flag in the creator process.
123 /*if (was_created_with_shared_device_mem && address != 0) {
124 return ResultCode(ErrorDescription::InvalidCombination, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage);
125 }*/
79 126
80 return RESULT_SUCCESS; 127 // TODO(Subv): The same process that created a SharedMemory object
81} 128 // can not map it in its own address space unless it was created with addr=0, result 0xD900182C.
82 129
83ResultCode SharedMemory::Unmap(VAddr address) { 130 if (address != 0) {
84 if (base_address == 0) { 131 if (address < Memory::HEAP_VADDR || address + size >= Memory::SHARED_MEMORY_VADDR_END) {
85 // TODO(Subv): Verify what actually happens when you want to unmap a memory block that 132 LOG_ERROR(Kernel, "cannot map id=%u, address=0x%08X name=%s, invalid address",
86 // was originally mapped with address = 0 133 GetObjectId(), address, name.c_str());
87 return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage); 134 return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::OS,
135 ErrorSummary::InvalidArgument, ErrorLevel::Usage);
136 }
88 } 137 }
89 138
90 if (base_address != address) 139 VAddr target_address = address;
91 return ResultCode(ErrorDescription::WrongAddress, ErrorModule::OS, ErrorSummary::InvalidState, ErrorLevel::Usage);
92 140
93 base_address = 0; 141 if (base_address == 0 && target_address == 0) {
142 // Calculate the address at which to map the memory block.
143 target_address = Memory::PhysicalToVirtualAddress(linear_heap_phys_address);
144 }
145
146 // Map the memory block into the target process
147 auto result = target_process->vm_manager.MapMemoryBlock(target_address, backing_block, backing_block_offset, size, MemoryState::Shared);
148 if (result.Failed()) {
149 LOG_ERROR(Kernel, "cannot map id=%u, target_address=0x%08X name=%s, error mapping to virtual memory",
150 GetObjectId(), target_address, name.c_str());
151 return result.Code();
152 }
94 153
95 return RESULT_SUCCESS; 154 return target_process->vm_manager.ReprotectRange(target_address, size, ConvertPermissions(permissions));
96} 155}
97 156
98u8* SharedMemory::GetPointer(u32 offset) { 157ResultCode SharedMemory::Unmap(Process* target_process, VAddr address) {
99 if (base_address != 0) 158 // TODO(Subv): Verify what happens if the application tries to unmap an address that is not mapped to a SharedMemory.
100 return Memory::GetPointer(base_address + offset); 159 return target_process->vm_manager.UnmapRange(address, size);
160}
161
162VMAPermission SharedMemory::ConvertPermissions(MemoryPermission permission) {
163 u32 masked_permissions = static_cast<u32>(permission) & static_cast<u32>(MemoryPermission::ReadWriteExecute);
164 return static_cast<VMAPermission>(masked_permissions);
165};
101 166
102 LOG_ERROR(Kernel_SVC, "memory block id=%u not mapped!", GetObjectId()); 167u8* SharedMemory::GetPointer(u32 offset) {
103 return nullptr; 168 return backing_block->data() + backing_block_offset + offset;
104} 169}
105 170
106} // namespace 171} // namespace
diff --git a/src/core/hle/kernel/shared_memory.h b/src/core/hle/kernel/shared_memory.h
index b51049ad0..0c404a9f8 100644
--- a/src/core/hle/kernel/shared_memory.h
+++ b/src/core/hle/kernel/shared_memory.h
@@ -9,6 +9,7 @@
9#include "common/common_types.h" 9#include "common/common_types.h"
10 10
11#include "core/hle/kernel/kernel.h" 11#include "core/hle/kernel/kernel.h"
12#include "core/hle/kernel/process.h"
12#include "core/hle/result.h" 13#include "core/hle/result.h"
13 14
14namespace Kernel { 15namespace Kernel {
@@ -29,14 +30,29 @@ enum class MemoryPermission : u32 {
29class SharedMemory final : public Object { 30class SharedMemory final : public Object {
30public: 31public:
31 /** 32 /**
32 * Creates a shared memory object 33 * Creates a shared memory object.
34 * @param owner_process Process that created this shared memory object.
33 * @param size Size of the memory block. Must be page-aligned. 35 * @param size Size of the memory block. Must be page-aligned.
34 * @param permissions Permission restrictions applied to the process which created the block. 36 * @param permissions Permission restrictions applied to the process which created the block.
35 * @param other_permissions Permission restrictions applied to other processes mapping the block. 37 * @param other_permissions Permission restrictions applied to other processes mapping the block.
38 * @param address The address from which to map the Shared Memory.
39 * @param region If the address is 0, the shared memory will be allocated in this region of the linear heap.
36 * @param name Optional object name, used for debugging purposes. 40 * @param name Optional object name, used for debugging purposes.
37 */ 41 */
38 static SharedPtr<SharedMemory> Create(u32 size, MemoryPermission permissions, 42 static SharedPtr<SharedMemory> Create(SharedPtr<Process> owner_process, u32 size, MemoryPermission permissions,
39 MemoryPermission other_permissions, std::string name = "Unknown"); 43 MemoryPermission other_permissions, VAddr address = 0, MemoryRegion region = MemoryRegion::BASE, std::string name = "Unknown");
44
45 /**
46 * Creates a shared memory object from a block of memory managed by an HLE applet.
47 * @param heap_block Heap block of the HLE applet.
48 * @param offset The offset into the heap block that the SharedMemory will map.
49 * @param size Size of the memory block. Must be page-aligned.
50 * @param permissions Permission restrictions applied to the process which created the block.
51 * @param other_permissions Permission restrictions applied to other processes mapping the block.
52 * @param name Optional object name, used for debugging purposes.
53 */
54 static SharedPtr<SharedMemory> CreateForApplet(std::shared_ptr<std::vector<u8>> heap_block, u32 offset, u32 size,
55 MemoryPermission permissions, MemoryPermission other_permissions, std::string name = "Unknown Applet");
40 56
41 std::string GetTypeName() const override { return "SharedMemory"; } 57 std::string GetTypeName() const override { return "SharedMemory"; }
42 std::string GetName() const override { return name; } 58 std::string GetName() const override { return name; }
@@ -45,19 +61,27 @@ public:
45 HandleType GetHandleType() const override { return HANDLE_TYPE; } 61 HandleType GetHandleType() const override { return HANDLE_TYPE; }
46 62
47 /** 63 /**
48 * Maps a shared memory block to an address in system memory 64 * Converts the specified MemoryPermission into the equivalent VMAPermission.
65 * @param permission The MemoryPermission to convert.
66 */
67 static VMAPermission ConvertPermissions(MemoryPermission permission);
68
69 /**
70 * Maps a shared memory block to an address in the target process' address space
71 * @param target_process Process on which to map the memory block.
49 * @param address Address in system memory to map shared memory block to 72 * @param address Address in system memory to map shared memory block to
50 * @param permissions Memory block map permissions (specified by SVC field) 73 * @param permissions Memory block map permissions (specified by SVC field)
51 * @param other_permissions Memory block map other permissions (specified by SVC field) 74 * @param other_permissions Memory block map other permissions (specified by SVC field)
52 */ 75 */
53 ResultCode Map(VAddr address, MemoryPermission permissions, MemoryPermission other_permissions); 76 ResultCode Map(Process* target_process, VAddr address, MemoryPermission permissions, MemoryPermission other_permissions);
54 77
55 /** 78 /**
56 * Unmaps a shared memory block from the specified address in system memory 79 * Unmaps a shared memory block from the specified address in system memory
80 * @param target_process Process from which to umap the memory block.
57 * @param address Address in system memory where the shared memory block is mapped 81 * @param address Address in system memory where the shared memory block is mapped
58 * @return Result code of the unmap operation 82 * @return Result code of the unmap operation
59 */ 83 */
60 ResultCode Unmap(VAddr address); 84 ResultCode Unmap(Process* target_process, VAddr address);
61 85
62 /** 86 /**
63 * Gets a pointer to the shared memory block 87 * Gets a pointer to the shared memory block
@@ -66,10 +90,16 @@ public:
66 */ 90 */
67 u8* GetPointer(u32 offset = 0); 91 u8* GetPointer(u32 offset = 0);
68 92
69 /// Address of shared memory block in the process. 93 /// Process that created this shared memory block.
94 SharedPtr<Process> owner_process;
95 /// Address of shared memory block in the owner process if specified.
70 VAddr base_address; 96 VAddr base_address;
71 /// Fixed address to allow mapping to. Used for blocks created from the linear heap. 97 /// Physical address of the shared memory block in the linear heap if no address was specified during creation.
72 VAddr fixed_address; 98 PAddr linear_heap_phys_address;
99 /// Backing memory for this shared memory block.
100 std::shared_ptr<std::vector<u8>> backing_block;
101 /// Offset into the backing block for this shared memory.
102 u32 backing_block_offset;
73 /// Size of the memory block. Page-aligned. 103 /// Size of the memory block. Page-aligned.
74 u32 size; 104 u32 size;
75 /// Permission restrictions applied to the process which created the block. 105 /// Permission restrictions applied to the process which created the block.
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index bf32f653d..43def6146 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -117,9 +117,10 @@ void Thread::Stop() {
117 } 117 }
118 wait_objects.clear(); 118 wait_objects.clear();
119 119
120 Kernel::g_current_process->used_tls_slots[tls_index] = false; 120 // Mark the TLS slot in the thread's page as free.
121 g_current_process->misc_memory_used -= Memory::TLS_ENTRY_SIZE; 121 u32 tls_page = (tls_address - Memory::TLS_AREA_VADDR) / Memory::PAGE_SIZE;
122 g_current_process->memory_region->used -= Memory::TLS_ENTRY_SIZE; 122 u32 tls_slot = ((tls_address - Memory::TLS_AREA_VADDR) % Memory::PAGE_SIZE) / Memory::TLS_ENTRY_SIZE;
123 Kernel::g_current_process->tls_slots[tls_page].reset(tls_slot);
123 124
124 HLE::Reschedule(__func__); 125 HLE::Reschedule(__func__);
125} 126}
@@ -366,6 +367,31 @@ static void DebugThreadQueue() {
366 } 367 }
367} 368}
368 369
370/**
371 * Finds a free location for the TLS section of a thread.
372 * @param tls_slots The TLS page array of the thread's owner process.
373 * Returns a tuple of (page, slot, alloc_needed) where:
374 * page: The index of the first allocated TLS page that has free slots.
375 * slot: The index of the first free slot in the indicated page.
376 * alloc_needed: Whether there's a need to allocate a new TLS page (All pages are full).
377 */
378std::tuple<u32, u32, bool> GetFreeThreadLocalSlot(std::vector<std::bitset<8>>& tls_slots) {
379 // Iterate over all the allocated pages, and try to find one where not all slots are used.
380 for (unsigned page = 0; page < tls_slots.size(); ++page) {
381 const auto& page_tls_slots = tls_slots[page];
382 if (!page_tls_slots.all()) {
383 // We found a page with at least one free slot, find which slot it is
384 for (unsigned slot = 0; slot < page_tls_slots.size(); ++slot) {
385 if (!page_tls_slots.test(slot)) {
386 return std::make_tuple(page, slot, false);
387 }
388 }
389 }
390 }
391
392 return std::make_tuple(0, 0, true);
393}
394
369ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point, s32 priority, 395ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point, s32 priority,
370 u32 arg, s32 processor_id, VAddr stack_top) { 396 u32 arg, s32 processor_id, VAddr stack_top) {
371 if (priority < THREADPRIO_HIGHEST || priority > THREADPRIO_LOWEST) { 397 if (priority < THREADPRIO_HIGHEST || priority > THREADPRIO_LOWEST) {
@@ -403,22 +429,50 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point,
403 thread->name = std::move(name); 429 thread->name = std::move(name);
404 thread->callback_handle = wakeup_callback_handle_table.Create(thread).MoveFrom(); 430 thread->callback_handle = wakeup_callback_handle_table.Create(thread).MoveFrom();
405 thread->owner_process = g_current_process; 431 thread->owner_process = g_current_process;
406 thread->tls_index = -1;
407 thread->waitsynch_waited = false; 432 thread->waitsynch_waited = false;
408 433
409 // Find the next available TLS index, and mark it as used 434 // Find the next available TLS index, and mark it as used
410 auto& used_tls_slots = Kernel::g_current_process->used_tls_slots; 435 auto& tls_slots = Kernel::g_current_process->tls_slots;
411 for (unsigned int i = 0; i < used_tls_slots.size(); ++i) { 436 bool needs_allocation = true;
412 if (used_tls_slots[i] == false) { 437 u32 available_page; // Which allocated page has free space
413 thread->tls_index = i; 438 u32 available_slot; // Which slot within the page is free
414 used_tls_slots[i] = true; 439
415 break; 440 std::tie(available_page, available_slot, needs_allocation) = GetFreeThreadLocalSlot(tls_slots);
441
442 if (needs_allocation) {
443 // There are no already-allocated pages with free slots, lets allocate a new one.
444 // TLS pages are allocated from the BASE region in the linear heap.
445 MemoryRegionInfo* memory_region = GetMemoryRegion(MemoryRegion::BASE);
446 auto& linheap_memory = memory_region->linear_heap_memory;
447
448 if (linheap_memory->size() + Memory::PAGE_SIZE > memory_region->size) {
449 LOG_ERROR(Kernel_SVC, "Not enough space in region to allocate a new TLS page for thread");
450 return ResultCode(ErrorDescription::OutOfMemory, ErrorModule::Kernel, ErrorSummary::OutOfResource, ErrorLevel::Permanent);
416 } 451 }
452
453 u32 offset = linheap_memory->size();
454
455 // Allocate some memory from the end of the linear heap for this region.
456 linheap_memory->insert(linheap_memory->end(), Memory::PAGE_SIZE, 0);
457 memory_region->used += Memory::PAGE_SIZE;
458 Kernel::g_current_process->linear_heap_used += Memory::PAGE_SIZE;
459
460 tls_slots.emplace_back(0); // The page is completely available at the start
461 available_page = tls_slots.size() - 1;
462 available_slot = 0; // Use the first slot in the new page
463
464 auto& vm_manager = Kernel::g_current_process->vm_manager;
465 vm_manager.RefreshMemoryBlockMappings(linheap_memory.get());
466
467 // Map the page to the current process' address space.
468 // TODO(Subv): Find the correct MemoryState for this region.
469 vm_manager.MapMemoryBlock(Memory::TLS_AREA_VADDR + available_page * Memory::PAGE_SIZE,
470 linheap_memory, offset, Memory::PAGE_SIZE, MemoryState::Private);
417 } 471 }
418 472
419 ASSERT_MSG(thread->tls_index != -1, "Out of TLS space"); 473 // Mark the slot as used
420 g_current_process->misc_memory_used += Memory::TLS_ENTRY_SIZE; 474 tls_slots[available_page].set(available_slot);
421 g_current_process->memory_region->used += Memory::TLS_ENTRY_SIZE; 475 thread->tls_address = Memory::TLS_AREA_VADDR + available_page * Memory::PAGE_SIZE + available_slot * Memory::TLS_ENTRY_SIZE;
422 476
423 // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used 477 // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used
424 // to initialize the context 478 // to initialize the context
@@ -472,6 +526,8 @@ SharedPtr<Thread> SetupMainThread(u32 entry_point, s32 priority) {
472 526
473 SharedPtr<Thread> thread = thread_res.MoveFrom(); 527 SharedPtr<Thread> thread = thread_res.MoveFrom();
474 528
529 thread->context.fpscr = FPSCR_DEFAULT_NAN | FPSCR_FLUSH_TO_ZERO | FPSCR_ROUND_TOZERO | FPSCR_IXC; // 0x03C00010
530
475 // Run new "main" thread 531 // Run new "main" thread
476 SwitchContext(thread.get()); 532 SwitchContext(thread.get());
477 533
@@ -483,7 +539,8 @@ void Reschedule() {
483 539
484 Thread* cur = GetCurrentThread(); 540 Thread* cur = GetCurrentThread();
485 Thread* next = PopNextReadyThread(); 541 Thread* next = PopNextReadyThread();
486 HLE::g_reschedule = false; 542
543 HLE::DoneRescheduling();
487 544
488 // Don't bother switching to the same thread 545 // Don't bother switching to the same thread
489 if (next == cur) 546 if (next == cur)
@@ -508,10 +565,6 @@ void Thread::SetWaitSynchronizationOutput(s32 output) {
508 context.cpu_registers[1] = output; 565 context.cpu_registers[1] = output;
509} 566}
510 567
511VAddr Thread::GetTLSAddress() const {
512 return Memory::TLS_AREA_VADDR + tls_index * Memory::TLS_ENTRY_SIZE;
513}
514
515//////////////////////////////////////////////////////////////////////////////////////////////////// 568////////////////////////////////////////////////////////////////////////////////////////////////////
516 569
517void ThreadingInit() { 570void ThreadingInit() {
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 97ba57fc5..deab5d5a6 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -127,7 +127,7 @@ public:
127 * Returns the Thread Local Storage address of the current thread 127 * Returns the Thread Local Storage address of the current thread
128 * @returns VAddr of the thread's TLS 128 * @returns VAddr of the thread's TLS
129 */ 129 */
130 VAddr GetTLSAddress() const; 130 VAddr GetTLSAddress() const { return tls_address; }
131 131
132 Core::ThreadContext context; 132 Core::ThreadContext context;
133 133
@@ -144,7 +144,7 @@ public:
144 144
145 s32 processor_id; 145 s32 processor_id;
146 146
147 s32 tls_index; ///< Index of the Thread Local Storage of the thread 147 VAddr tls_address; ///< Virtual address of the Thread Local Storage of the thread
148 148
149 bool waitsynch_waited; ///< Set to true if the last svcWaitSynch call caused the thread to wait 149 bool waitsynch_waited; ///< Set to true if the last svcWaitSynch call caused the thread to wait
150 150
diff --git a/src/core/hle/result.h b/src/core/hle/result.h
index 2d22652d9..bfb3327ce 100644
--- a/src/core/hle/result.h
+++ b/src/core/hle/result.h
@@ -5,7 +5,6 @@
5#pragma once 5#pragma once
6 6
7#include <new> 7#include <new>
8#include <type_traits>
9#include <utility> 8#include <utility>
10 9
11#include "common/assert.h" 10#include "common/assert.h"
@@ -18,6 +17,8 @@
18/// Detailed description of the error. This listing is likely incomplete. 17/// Detailed description of the error. This listing is likely incomplete.
19enum class ErrorDescription : u32 { 18enum class ErrorDescription : u32 {
20 Success = 0, 19 Success = 0,
20 WrongPermission = 46,
21 OS_InvalidBufferDescriptor = 48,
21 WrongAddress = 53, 22 WrongAddress = 53,
22 FS_NotFound = 120, 23 FS_NotFound = 120,
23 FS_AlreadyExists = 190, 24 FS_AlreadyExists = 190,
diff --git a/src/core/hle/service/ac_u.cpp b/src/core/hle/service/ac_u.cpp
index d67325506..5241dd3e7 100644
--- a/src/core/hle/service/ac_u.cpp
+++ b/src/core/hle/service/ac_u.cpp
@@ -3,6 +3,8 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/logging/log.h" 5#include "common/logging/log.h"
6
7#include "core/hle/kernel/event.h"
6#include "core/hle/service/ac_u.h" 8#include "core/hle/service/ac_u.h"
7 9
8//////////////////////////////////////////////////////////////////////////////////////////////////// 10////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -11,6 +13,28 @@
11namespace AC_U { 13namespace AC_U {
12 14
13/** 15/**
16 * AC_U::CloseAsync service function
17 * Inputs:
18 * 1 : Always 0x20
19 * 3 : Always 0
20 * 4 : Event handle, should be signaled when AC connection is closed
21 * Outputs:
22 * 1 : Result of function, 0 on success, otherwise error code
23 */
24static void CloseAsync(Service::Interface* self) {
25 u32* cmd_buff = Kernel::GetCommandBuffer();
26
27 auto evt = Kernel::g_handle_table.Get<Kernel::Event>(cmd_buff[4]);
28
29 if (evt) {
30 evt->name = "AC_U:close_event";
31 evt->Signal();
32 }
33 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
34
35 LOG_WARNING(Service_AC, "(STUBBED) called");
36}
37/**
14 * AC_U::GetWifiStatus service function 38 * AC_U::GetWifiStatus service function
15 * Outputs: 39 * Outputs:
16 * 1 : Result of function, 0 on success, otherwise error code 40 * 1 : Result of function, 0 on success, otherwise error code
@@ -47,7 +71,7 @@ const Interface::FunctionInfo FunctionTable[] = {
47 {0x00010000, nullptr, "CreateDefaultConfig"}, 71 {0x00010000, nullptr, "CreateDefaultConfig"},
48 {0x00040006, nullptr, "ConnectAsync"}, 72 {0x00040006, nullptr, "ConnectAsync"},
49 {0x00050002, nullptr, "GetConnectResult"}, 73 {0x00050002, nullptr, "GetConnectResult"},
50 {0x00080004, nullptr, "CloseAsync"}, 74 {0x00080004, CloseAsync, "CloseAsync"},
51 {0x00090002, nullptr, "GetCloseResult"}, 75 {0x00090002, nullptr, "GetCloseResult"},
52 {0x000A0000, nullptr, "GetLastErrorCode"}, 76 {0x000A0000, nullptr, "GetLastErrorCode"},
53 {0x000D0000, GetWifiStatus, "GetWifiStatus"}, 77 {0x000D0000, GetWifiStatus, "GetWifiStatus"},
diff --git a/src/core/hle/service/act_a.cpp b/src/core/hle/service/act_a.cpp
new file mode 100644
index 000000000..3a775fa90
--- /dev/null
+++ b/src/core/hle/service/act_a.cpp
@@ -0,0 +1,26 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/hle/service/act_a.h"
6
7////////////////////////////////////////////////////////////////////////////////////////////////////
8// Namespace ACT_A
9
10namespace ACT_A {
11
12const Interface::FunctionInfo FunctionTable[] = {
13 {0x041300C2, nullptr, "UpdateMiiImage"},
14 {0x041B0142, nullptr, "AgreeEula"},
15 {0x04210042, nullptr, "UploadMii"},
16 {0x04230082, nullptr, "ValidateMailAddress"},
17};
18
19////////////////////////////////////////////////////////////////////////////////////////////////////
20// Interface class
21
22Interface::Interface() {
23 Register(FunctionTable);
24}
25
26} // namespace
diff --git a/src/core/hle/service/act_a.h b/src/core/hle/service/act_a.h
new file mode 100644
index 000000000..765cae644
--- /dev/null
+++ b/src/core/hle/service/act_a.h
@@ -0,0 +1,23 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "core/hle/service/service.h"
8
9////////////////////////////////////////////////////////////////////////////////////////////////////
10// Namespace ACT_A
11
12namespace ACT_A {
13
14class Interface : public Service::Interface {
15public:
16 Interface();
17
18 std::string GetPortName() const override {
19 return "act:a";
20 }
21};
22
23} // namespace
diff --git a/src/core/hle/service/act_u.cpp b/src/core/hle/service/act_u.cpp
index b23d17fba..05de4d002 100644
--- a/src/core/hle/service/act_u.cpp
+++ b/src/core/hle/service/act_u.cpp
@@ -10,7 +10,10 @@
10namespace ACT_U { 10namespace ACT_U {
11 11
12const Interface::FunctionInfo FunctionTable[] = { 12const Interface::FunctionInfo FunctionTable[] = {
13 {0x00010084, nullptr, "Initialize"},
14 {0x00020040, nullptr, "GetErrorCode"},
13 {0x000600C2, nullptr, "GetAccountDataBlock"}, 15 {0x000600C2, nullptr, "GetAccountDataBlock"},
16 {0x000D0040, nullptr, "GenerateUuid"},
14}; 17};
15 18
16//////////////////////////////////////////////////////////////////////////////////////////////////// 19////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index 9591522e5..3f71e7f2b 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -43,7 +43,7 @@ void FindContentInfos(Service::Interface* self) {
43 am_content_count[media_type] = cmd_buff[4]; 43 am_content_count[media_type] = cmd_buff[4];
44 44
45 cmd_buff[1] = RESULT_SUCCESS.raw; 45 cmd_buff[1] = RESULT_SUCCESS.raw;
46 LOG_WARNING(Service_AM, "(STUBBED) media_type=%u, title_id=0x%016lx, content_cound=%u, content_ids_pointer=0x%08x, content_info_pointer=0x%08x", 46 LOG_WARNING(Service_AM, "(STUBBED) media_type=%u, title_id=0x%016llx, content_cound=%u, content_ids_pointer=0x%08x, content_info_pointer=0x%08x",
47 media_type, title_id, am_content_count[media_type], content_ids_pointer, content_info_pointer); 47 media_type, title_id, am_content_count[media_type], content_ids_pointer, content_info_pointer);
48} 48}
49 49
diff --git a/src/core/hle/service/apt/apt.cpp b/src/core/hle/service/apt/apt.cpp
index e6fcbc714..bbf170b71 100644
--- a/src/core/hle/service/apt/apt.cpp
+++ b/src/core/hle/service/apt/apt.cpp
@@ -12,6 +12,7 @@
12#include "core/hle/service/apt/apt_a.h" 12#include "core/hle/service/apt/apt_a.h"
13#include "core/hle/service/apt/apt_s.h" 13#include "core/hle/service/apt/apt_s.h"
14#include "core/hle/service/apt/apt_u.h" 14#include "core/hle/service/apt/apt_u.h"
15#include "core/hle/service/apt/bcfnt/bcfnt.h"
15#include "core/hle/service/fs/archive.h" 16#include "core/hle/service/fs/archive.h"
16#include "core/hle/service/ptm/ptm.h" 17#include "core/hle/service/ptm/ptm.h"
17 18
@@ -23,23 +24,14 @@
23namespace Service { 24namespace Service {
24namespace APT { 25namespace APT {
25 26
26// Address used for shared font (as observed on HW)
27// TODO(bunnei): This is the hard-coded address where we currently dump the shared font from via
28// https://github.com/citra-emu/3dsutils. This is technically a hack, and will not work at any
29// address other than 0x18000000 due to internal pointers in the shared font dump that would need to
30// be relocated. This might be fixed by dumping the shared font @ address 0x00000000 and then
31// correctly mapping it in Citra, however we still do not understand how the mapping is determined.
32static const VAddr SHARED_FONT_VADDR = 0x18000000;
33
34/// Handle to shared memory region designated to for shared system font 27/// Handle to shared memory region designated to for shared system font
35static Kernel::SharedPtr<Kernel::SharedMemory> shared_font_mem; 28static Kernel::SharedPtr<Kernel::SharedMemory> shared_font_mem;
29static bool shared_font_relocated = false;
36 30
37static Kernel::SharedPtr<Kernel::Mutex> lock; 31static Kernel::SharedPtr<Kernel::Mutex> lock;
38static Kernel::SharedPtr<Kernel::Event> notification_event; ///< APT notification event 32static Kernel::SharedPtr<Kernel::Event> notification_event; ///< APT notification event
39static Kernel::SharedPtr<Kernel::Event> parameter_event; ///< APT parameter event 33static Kernel::SharedPtr<Kernel::Event> parameter_event; ///< APT parameter event
40 34
41static std::shared_ptr<std::vector<u8>> shared_font;
42
43static u32 cpu_percent; ///< CPU time available to the running application 35static u32 cpu_percent; ///< CPU time available to the running application
44 36
45// APT::CheckNew3DSApp will check this unknown_ns_state_field to determine processing mode 37// APT::CheckNew3DSApp will check this unknown_ns_state_field to determine processing mode
@@ -78,23 +70,25 @@ void Initialize(Service::Interface* self) {
78void GetSharedFont(Service::Interface* self) { 70void GetSharedFont(Service::Interface* self) {
79 u32* cmd_buff = Kernel::GetCommandBuffer(); 71 u32* cmd_buff = Kernel::GetCommandBuffer();
80 72
81 if (shared_font != nullptr) { 73 // The shared font has to be relocated to the new address before being passed to the application.
82 // TODO(yuriks): This is a hack to keep this working right now even with our completely 74 VAddr target_address = Memory::PhysicalToVirtualAddress(shared_font_mem->linear_heap_phys_address);
83 // broken shared memory system. 75 // The shared font dumped by 3dsutils (https://github.com/citra-emu/3dsutils) uses this address as base,
84 shared_font_mem->fixed_address = SHARED_FONT_VADDR; 76 // so we relocate it from there to our real address.
85 Kernel::g_current_process->vm_manager.MapMemoryBlock(shared_font_mem->fixed_address, 77 // TODO(Subv): This address is wrong if the shared font is dumped from a n3DS,
86 shared_font, 0, shared_font_mem->size, Kernel::MemoryState::Shared); 78 // we need a way to automatically calculate the original address of the font from the file.
87 79 static const VAddr SHARED_FONT_VADDR = 0x18000000;
88 cmd_buff[0] = IPC::MakeHeader(0x44, 2, 2); 80 if (!shared_font_relocated) {
89 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 81 BCFNT::RelocateSharedFont(shared_font_mem, SHARED_FONT_VADDR, target_address);
90 cmd_buff[2] = SHARED_FONT_VADDR; 82 shared_font_relocated = true;
91 cmd_buff[3] = IPC::MoveHandleDesc();
92 cmd_buff[4] = Kernel::g_handle_table.Create(shared_font_mem).MoveFrom();
93 } else {
94 cmd_buff[0] = IPC::MakeHeader(0x44, 1, 0);
95 cmd_buff[1] = -1; // Generic error (not really possible to verify this on hardware)
96 LOG_ERROR(Kernel_SVC, "called, but %s has not been loaded!", SHARED_FONT);
97 } 83 }
84 cmd_buff[0] = IPC::MakeHeader(0x44, 2, 2);
85 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
86 // Since the SharedMemory interface doesn't provide the address at which the memory was allocated,
87 // the real APT service calculates this address by scanning the entire address space (using svcQueryMemory)
88 // and searches for an allocation of the same size as the Shared Font.
89 cmd_buff[2] = target_address;
90 cmd_buff[3] = IPC::MoveHandleDesc();
91 cmd_buff[4] = Kernel::g_handle_table.Create(shared_font_mem).MoveFrom();
98} 92}
99 93
100void NotifyToWait(Service::Interface* self) { 94void NotifyToWait(Service::Interface* self) {
@@ -483,14 +477,12 @@ void Init() {
483 FileUtil::IOFile file(filepath, "rb"); 477 FileUtil::IOFile file(filepath, "rb");
484 478
485 if (file.IsOpen()) { 479 if (file.IsOpen()) {
486 // Read shared font data
487 shared_font = std::make_shared<std::vector<u8>>((size_t)file.GetSize());
488 file.ReadBytes(shared_font->data(), shared_font->size());
489
490 // Create shared font memory object 480 // Create shared font memory object
491 using Kernel::MemoryPermission; 481 using Kernel::MemoryPermission;
492 shared_font_mem = Kernel::SharedMemory::Create(3 * 1024 * 1024, // 3MB 482 shared_font_mem = Kernel::SharedMemory::Create(nullptr, 0x332000, // 3272 KB
493 MemoryPermission::ReadWrite, MemoryPermission::Read, "APT_U:shared_font_mem"); 483 MemoryPermission::ReadWrite, MemoryPermission::Read, 0, Kernel::MemoryRegion::SYSTEM, "APT:SharedFont");
484 // Read shared font data
485 file.ReadBytes(shared_font_mem->GetPointer(), file.GetSize());
494 } else { 486 } else {
495 LOG_WARNING(Service_APT, "Unable to load shared font: %s", filepath.c_str()); 487 LOG_WARNING(Service_APT, "Unable to load shared font: %s", filepath.c_str());
496 shared_font_mem = nullptr; 488 shared_font_mem = nullptr;
@@ -510,8 +502,8 @@ void Init() {
510} 502}
511 503
512void Shutdown() { 504void Shutdown() {
513 shared_font = nullptr;
514 shared_font_mem = nullptr; 505 shared_font_mem = nullptr;
506 shared_font_relocated = false;
515 lock = nullptr; 507 lock = nullptr;
516 notification_event = nullptr; 508 notification_event = nullptr;
517 parameter_event = nullptr; 509 parameter_event = nullptr;
diff --git a/src/core/hle/service/apt/apt.h b/src/core/hle/service/apt/apt.h
index fd3c2bd37..ed7c47cca 100644
--- a/src/core/hle/service/apt/apt.h
+++ b/src/core/hle/service/apt/apt.h
@@ -5,6 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include "common/common_types.h" 7#include "common/common_types.h"
8#include "common/swap.h"
8 9
9#include "core/hle/kernel/kernel.h" 10#include "core/hle/kernel/kernel.h"
10 11
@@ -31,6 +32,20 @@ struct AppletStartupParameter {
31 u8* data = nullptr; 32 u8* data = nullptr;
32}; 33};
33 34
35/// Used by the application to pass information about the current framebuffer to applets.
36struct CaptureBufferInfo {
37 u32_le size;
38 u8 is_3d;
39 INSERT_PADDING_BYTES(0x3); // Padding for alignment
40 u32_le top_screen_left_offset;
41 u32_le top_screen_right_offset;
42 u32_le top_screen_format;
43 u32_le bottom_screen_left_offset;
44 u32_le bottom_screen_right_offset;
45 u32_le bottom_screen_format;
46};
47static_assert(sizeof(CaptureBufferInfo) == 0x20, "CaptureBufferInfo struct has incorrect size");
48
34/// Signals used by APT functions 49/// Signals used by APT functions
35enum class SignalType : u32 { 50enum class SignalType : u32 {
36 None = 0x0, 51 None = 0x0,
diff --git a/src/core/hle/service/apt/bcfnt/bcfnt.cpp b/src/core/hle/service/apt/bcfnt/bcfnt.cpp
new file mode 100644
index 000000000..b0d39d4a5
--- /dev/null
+++ b/src/core/hle/service/apt/bcfnt/bcfnt.cpp
@@ -0,0 +1,71 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/hle/service/apt/bcfnt/bcfnt.h"
6#include "core/hle/service/service.h"
7
8namespace Service {
9namespace APT {
10namespace BCFNT {
11
12void RelocateSharedFont(Kernel::SharedPtr<Kernel::SharedMemory> shared_font, VAddr previous_address, VAddr new_address) {
13 static const u32 SharedFontStartOffset = 0x80;
14 u8* data = shared_font->GetPointer(SharedFontStartOffset);
15
16 CFNT cfnt;
17 memcpy(&cfnt, data, sizeof(cfnt));
18
19 // Advance past the header
20 data = shared_font->GetPointer(SharedFontStartOffset + cfnt.header_size);
21
22 for (unsigned block = 0; block < cfnt.num_blocks; ++block) {
23
24 u32 section_size = 0;
25 if (memcmp(data, "FINF", 4) == 0) {
26 BCFNT::FINF finf;
27 memcpy(&finf, data, sizeof(finf));
28 section_size = finf.section_size;
29
30 // Relocate the offsets in the FINF section
31 finf.cmap_offset += new_address - previous_address;
32 finf.cwdh_offset += new_address - previous_address;
33 finf.tglp_offset += new_address - previous_address;
34
35 memcpy(data, &finf, sizeof(finf));
36 } else if (memcmp(data, "CMAP", 4) == 0) {
37 BCFNT::CMAP cmap;
38 memcpy(&cmap, data, sizeof(cmap));
39 section_size = cmap.section_size;
40
41 // Relocate the offsets in the CMAP section
42 cmap.next_cmap_offset += new_address - previous_address;
43
44 memcpy(data, &cmap, sizeof(cmap));
45 } else if (memcmp(data, "CWDH", 4) == 0) {
46 BCFNT::CWDH cwdh;
47 memcpy(&cwdh, data, sizeof(cwdh));
48 section_size = cwdh.section_size;
49
50 // Relocate the offsets in the CWDH section
51 cwdh.next_cwdh_offset += new_address - previous_address;
52
53 memcpy(data, &cwdh, sizeof(cwdh));
54 } else if (memcmp(data, "TGLP", 4) == 0) {
55 BCFNT::TGLP tglp;
56 memcpy(&tglp, data, sizeof(tglp));
57 section_size = tglp.section_size;
58
59 // Relocate the offsets in the TGLP section
60 tglp.sheet_data_offset += new_address - previous_address;
61
62 memcpy(data, &tglp, sizeof(tglp));
63 }
64
65 data += section_size;
66 }
67}
68
69} // namespace BCFNT
70} // namespace APT
71} // namespace Service \ No newline at end of file
diff --git a/src/core/hle/service/apt/bcfnt/bcfnt.h b/src/core/hle/service/apt/bcfnt/bcfnt.h
new file mode 100644
index 000000000..388c6bea0
--- /dev/null
+++ b/src/core/hle/service/apt/bcfnt/bcfnt.h
@@ -0,0 +1,87 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/swap.h"
8
9#include "core/hle/kernel/shared_memory.h"
10#include "core/hle/service/service.h"
11
12namespace Service {
13namespace APT {
14namespace BCFNT { ///< BCFNT Shared Font file structures
15
16struct CFNT {
17 u8 magic[4];
18 u16_le endianness;
19 u16_le header_size;
20 u32_le version;
21 u32_le file_size;
22 u32_le num_blocks;
23};
24
25struct FINF {
26 u8 magic[4];
27 u32_le section_size;
28 u8 font_type;
29 u8 line_feed;
30 u16_le alter_char_index;
31 u8 default_width[3];
32 u8 encoding;
33 u32_le tglp_offset;
34 u32_le cwdh_offset;
35 u32_le cmap_offset;
36 u8 height;
37 u8 width;
38 u8 ascent;
39 u8 reserved;
40};
41
42struct TGLP {
43 u8 magic[4];
44 u32_le section_size;
45 u8 cell_width;
46 u8 cell_height;
47 u8 baseline_position;
48 u8 max_character_width;
49 u32_le sheet_size;
50 u16_le num_sheets;
51 u16_le sheet_image_format;
52 u16_le num_columns;
53 u16_le num_rows;
54 u16_le sheet_width;
55 u16_le sheet_height;
56 u32_le sheet_data_offset;
57};
58
59struct CMAP {
60 u8 magic[4];
61 u32_le section_size;
62 u16_le code_begin;
63 u16_le code_end;
64 u16_le mapping_method;
65 u16_le reserved;
66 u32_le next_cmap_offset;
67};
68
69struct CWDH {
70 u8 magic[4];
71 u32_le section_size;
72 u16_le start_index;
73 u16_le end_index;
74 u32_le next_cwdh_offset;
75};
76
77/**
78 * Relocates the internal addresses of the BCFNT Shared Font to the new base.
79 * @param shared_font SharedMemory object that contains the Shared Font
80 * @param previous_address Previous address at which the offsets in the structure were based.
81 * @param new_address New base for the offsets in the structure.
82 */
83void RelocateSharedFont(Kernel::SharedPtr<Kernel::SharedMemory> shared_font, VAddr previous_address, VAddr new_address);
84
85} // namespace BCFNT
86} // namespace APT
87} // namespace Service
diff --git a/src/core/hle/service/cfg/cfg.cpp b/src/core/hle/service/cfg/cfg.cpp
index 525432957..b9322c55d 100644
--- a/src/core/hle/service/cfg/cfg.cpp
+++ b/src/core/hle/service/cfg/cfg.cpp
@@ -389,6 +389,10 @@ ResultCode FormatConfig() {
389 res = CreateConfigInfoBlk(0x000F0004, sizeof(CONSOLE_MODEL), 0xC, &CONSOLE_MODEL); 389 res = CreateConfigInfoBlk(0x000F0004, sizeof(CONSOLE_MODEL), 0xC, &CONSOLE_MODEL);
390 if (!res.IsSuccess()) return res; 390 if (!res.IsSuccess()) return res;
391 391
392 // 0x00170000 - Unknown
393 res = CreateConfigInfoBlk(0x00170000, 0x4, 0xE, zero_buffer);
394 if (!res.IsSuccess()) return res;
395
392 // Save the buffer to the file 396 // Save the buffer to the file
393 res = UpdateConfigNANDSavegame(); 397 res = UpdateConfigNANDSavegame();
394 if (!res.IsSuccess()) 398 if (!res.IsSuccess())
diff --git a/src/core/hle/service/cfg/cfg.h b/src/core/hle/service/cfg/cfg.h
index 606ab99cf..c01806836 100644
--- a/src/core/hle/service/cfg/cfg.h
+++ b/src/core/hle/service/cfg/cfg.h
@@ -98,19 +98,6 @@ void GetCountryCodeString(Service::Interface* self);
98void GetCountryCodeID(Service::Interface* self); 98void GetCountryCodeID(Service::Interface* self);
99 99
100/** 100/**
101 * CFG::GetConfigInfoBlk2 service function
102 * Inputs:
103 * 0 : 0x00010082
104 * 1 : Size
105 * 2 : Block ID
106 * 3 : Descriptor for the output buffer
107 * 4 : Output buffer pointer
108 * Outputs:
109 * 1 : Result of function, 0 on success, otherwise error code
110 */
111void GetConfigInfoBlk2(Service::Interface* self);
112
113/**
114 * CFG::SecureInfoGetRegion service function 101 * CFG::SecureInfoGetRegion service function
115 * Inputs: 102 * Inputs:
116 * 1 : None 103 * 1 : None
diff --git a/src/core/hle/service/csnd_snd.cpp b/src/core/hle/service/csnd_snd.cpp
index 6318bf2a7..d2bb8941c 100644
--- a/src/core/hle/service/csnd_snd.cpp
+++ b/src/core/hle/service/csnd_snd.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cstring> 5#include <cstring>
6#include "common/alignment.h"
6#include "core/hle/hle.h" 7#include "core/hle/hle.h"
7#include "core/hle/kernel/mutex.h" 8#include "core/hle/kernel/mutex.h"
8#include "core/hle/kernel/shared_memory.h" 9#include "core/hle/kernel/shared_memory.h"
@@ -41,14 +42,16 @@ static Kernel::SharedPtr<Kernel::Mutex> mutex = nullptr;
41void Initialize(Service::Interface* self) { 42void Initialize(Service::Interface* self) {
42 u32* cmd_buff = Kernel::GetCommandBuffer(); 43 u32* cmd_buff = Kernel::GetCommandBuffer();
43 44
44 shared_memory = Kernel::SharedMemory::Create(cmd_buff[1], 45 u32 size = Common::AlignUp(cmd_buff[1], Memory::PAGE_SIZE);
45 Kernel::MemoryPermission::ReadWrite, 46 using Kernel::MemoryPermission;
46 Kernel::MemoryPermission::ReadWrite, "CSNDSharedMem"); 47 shared_memory = Kernel::SharedMemory::Create(nullptr, size,
48 MemoryPermission::ReadWrite, MemoryPermission::ReadWrite,
49 0, Kernel::MemoryRegion::BASE, "CSND:SharedMemory");
47 50
48 mutex = Kernel::Mutex::Create(false); 51 mutex = Kernel::Mutex::Create(false);
49 52
50 cmd_buff[1] = 0; 53 cmd_buff[1] = RESULT_SUCCESS.raw;
51 cmd_buff[2] = 0x4000000; 54 cmd_buff[2] = IPC::MoveHandleDesc(2);
52 cmd_buff[3] = Kernel::g_handle_table.Create(mutex).MoveFrom(); 55 cmd_buff[3] = Kernel::g_handle_table.Create(mutex).MoveFrom();
53 cmd_buff[4] = Kernel::g_handle_table.Create(shared_memory).MoveFrom(); 56 cmd_buff[4] = Kernel::g_handle_table.Create(shared_memory).MoveFrom();
54} 57}
diff --git a/src/core/hle/service/dsp_dsp.cpp b/src/core/hle/service/dsp_dsp.cpp
index 08e437125..10730d7ac 100644
--- a/src/core/hle/service/dsp_dsp.cpp
+++ b/src/core/hle/service/dsp_dsp.cpp
@@ -2,6 +2,7 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
5#include <cinttypes> 6#include <cinttypes>
6 7
7#include "audio_core/hle/pipe.h" 8#include "audio_core/hle/pipe.h"
@@ -12,37 +13,80 @@
12#include "core/hle/kernel/event.h" 13#include "core/hle/kernel/event.h"
13#include "core/hle/service/dsp_dsp.h" 14#include "core/hle/service/dsp_dsp.h"
14 15
16using DspPipe = DSP::HLE::DspPipe;
17
15//////////////////////////////////////////////////////////////////////////////////////////////////// 18////////////////////////////////////////////////////////////////////////////////////////////////////
16// Namespace DSP_DSP 19// Namespace DSP_DSP
17 20
18namespace DSP_DSP { 21namespace DSP_DSP {
19 22
20static u32 read_pipe_count;
21static Kernel::SharedPtr<Kernel::Event> semaphore_event; 23static Kernel::SharedPtr<Kernel::Event> semaphore_event;
22 24
23struct PairHash { 25/// There are three types of interrupts
24 template <typename T, typename U> 26enum class InterruptType {
25 std::size_t operator()(const std::pair<T, U> &x) const { 27 Zero, One, Pipe
26 // TODO(yuriks): Replace with better hash combining function. 28};
27 return std::hash<T>()(x.first) ^ std::hash<U>()(x.second); 29constexpr size_t NUM_INTERRUPT_TYPE = 3;
30
31class InterruptEvents final {
32public:
33 void Signal(InterruptType type, DspPipe pipe) {
34 Kernel::SharedPtr<Kernel::Event>& event = Get(type, pipe);
35 if (event) {
36 event->Signal();
37 }
28 } 38 }
39
40 Kernel::SharedPtr<Kernel::Event>& Get(InterruptType type, DspPipe dsp_pipe) {
41 switch (type) {
42 case InterruptType::Zero:
43 return zero;
44 case InterruptType::One:
45 return one;
46 case InterruptType::Pipe: {
47 const size_t pipe_index = static_cast<size_t>(dsp_pipe);
48 ASSERT(pipe_index < DSP::HLE::NUM_DSP_PIPE);
49 return pipe[pipe_index];
50 }
51 }
52
53 UNREACHABLE_MSG("Invalid interrupt type = %zu", static_cast<size_t>(type));
54 }
55
56 bool HasTooManyEventsRegistered() const {
57 // Actual service implementation only has 6 'slots' for interrupts.
58 constexpr size_t max_number_of_interrupt_events = 6;
59
60 size_t number = std::count_if(pipe.begin(), pipe.end(), [](const auto& evt) {
61 return evt != nullptr;
62 });
63
64 if (zero != nullptr)
65 number++;
66 if (one != nullptr)
67 number++;
68
69 return number >= max_number_of_interrupt_events;
70 }
71
72private:
73 /// Currently unknown purpose
74 Kernel::SharedPtr<Kernel::Event> zero = nullptr;
75 /// Currently unknown purpose
76 Kernel::SharedPtr<Kernel::Event> one = nullptr;
77 /// Each DSP pipe has an associated interrupt
78 std::array<Kernel::SharedPtr<Kernel::Event>, DSP::HLE::NUM_DSP_PIPE> pipe = {{}};
29}; 79};
30 80
31/// Map of (audio interrupt number, channel number) to Kernel::Events. See: RegisterInterruptEvents 81static InterruptEvents interrupt_events;
32static std::unordered_map<std::pair<u32, u32>, Kernel::SharedPtr<Kernel::Event>, PairHash> interrupt_events;
33 82
34// DSP Interrupts: 83// DSP Interrupts:
35// Interrupt #2 occurs every frame tick. Userland programs normally have a thread that's waiting 84// The audio-pipe interrupt occurs every frame tick. Userland programs normally have a thread
36// for an interrupt event. Immediately after this interrupt event, userland normally updates the 85// that's waiting for an interrupt event. Immediately after this interrupt event, userland
37// state in the next region and increments the relevant frame counter by two. 86// normally updates the state in the next region and increments the relevant frame counter by
38void SignalAllInterrupts() { 87// two.
39 // HACK: The other interrupts have currently unknown purpose, we trigger them each tick in any case. 88void SignalPipeInterrupt(DspPipe pipe) {
40 for (auto& interrupt_event : interrupt_events) 89 interrupt_events.Signal(InterruptType::Pipe, pipe);
41 interrupt_event.second->Signal();
42}
43
44void SignalInterrupt(u32 interrupt, u32 channel) {
45 interrupt_events[std::make_pair(interrupt, channel)]->Signal();
46} 90}
47 91
48/** 92/**
@@ -58,7 +102,10 @@ static void ConvertProcessAddressFromDspDram(Service::Interface* self) {
58 102
59 u32 addr = cmd_buff[1]; 103 u32 addr = cmd_buff[1];
60 104
105 cmd_buff[0] = IPC::MakeHeader(0xC, 2, 0);
61 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 106 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
107
108 // TODO(merry): There is a per-region offset missing in this calculation (that seems to be always zero).
62 cmd_buff[2] = (addr << 1) + (Memory::DSP_RAM_VADDR + 0x40000); 109 cmd_buff[2] = (addr << 1) + (Memory::DSP_RAM_VADDR + 0x40000);
63 110
64 LOG_DEBUG(Service_DSP, "addr=0x%08X", addr); 111 LOG_DEBUG(Service_DSP, "addr=0x%08X", addr);
@@ -113,7 +160,9 @@ static void LoadComponent(Service::Interface* self) {
113static void GetSemaphoreEventHandle(Service::Interface* self) { 160static void GetSemaphoreEventHandle(Service::Interface* self) {
114 u32* cmd_buff = Kernel::GetCommandBuffer(); 161 u32* cmd_buff = Kernel::GetCommandBuffer();
115 162
163 cmd_buff[0] = IPC::MakeHeader(0x16, 1, 2);
116 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 164 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
165 // cmd_buff[2] not set
117 cmd_buff[3] = Kernel::g_handle_table.Create(semaphore_event).MoveFrom(); // Event handle 166 cmd_buff[3] = Kernel::g_handle_table.Create(semaphore_event).MoveFrom(); // Event handle
118 167
119 LOG_WARNING(Service_DSP, "(STUBBED) called"); 168 LOG_WARNING(Service_DSP, "(STUBBED) called");
@@ -138,8 +187,7 @@ static void FlushDataCache(Service::Interface* self) {
138 u32 size = cmd_buff[2]; 187 u32 size = cmd_buff[2];
139 u32 process = cmd_buff[4]; 188 u32 process = cmd_buff[4];
140 189
141 // TODO(purpasmart96): Verify return header on HW 190 cmd_buff[0] = IPC::MakeHeader(0x13, 1, 0);
142
143 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 191 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
144 192
145 LOG_TRACE(Service_DSP, "called address=0x%08X, size=0x%X, process=0x%08X", address, size, process); 193 LOG_TRACE(Service_DSP, "called address=0x%08X, size=0x%X, process=0x%08X", address, size, process);
@@ -148,8 +196,8 @@ static void FlushDataCache(Service::Interface* self) {
148/** 196/**
149 * DSP_DSP::RegisterInterruptEvents service function 197 * DSP_DSP::RegisterInterruptEvents service function
150 * Inputs: 198 * Inputs:
151 * 1 : Interrupt Number 199 * 1 : Interrupt Type
152 * 2 : Channel Number 200 * 2 : Pipe Number
153 * 4 : Interrupt event handle 201 * 4 : Interrupt event handle
154 * Outputs: 202 * Outputs:
155 * 1 : Result of function, 0 on success, otherwise error code 203 * 1 : Result of function, 0 on success, otherwise error code
@@ -157,23 +205,40 @@ static void FlushDataCache(Service::Interface* self) {
157static void RegisterInterruptEvents(Service::Interface* self) { 205static void RegisterInterruptEvents(Service::Interface* self) {
158 u32* cmd_buff = Kernel::GetCommandBuffer(); 206 u32* cmd_buff = Kernel::GetCommandBuffer();
159 207
160 u32 interrupt = cmd_buff[1]; 208 u32 type_index = cmd_buff[1];
161 u32 channel = cmd_buff[2]; 209 u32 pipe_index = cmd_buff[2];
162 u32 event_handle = cmd_buff[4]; 210 u32 event_handle = cmd_buff[4];
163 211
212 ASSERT_MSG(type_index < NUM_INTERRUPT_TYPE && pipe_index < DSP::HLE::NUM_DSP_PIPE,
213 "Invalid type or pipe: type = %u, pipe = %u", type_index, pipe_index);
214
215 InterruptType type = static_cast<InterruptType>(cmd_buff[1]);
216 DspPipe pipe = static_cast<DspPipe>(cmd_buff[2]);
217
218 cmd_buff[0] = IPC::MakeHeader(0x15, 1, 0);
219
164 if (event_handle) { 220 if (event_handle) {
165 auto evt = Kernel::g_handle_table.Get<Kernel::Event>(cmd_buff[4]); 221 auto evt = Kernel::g_handle_table.Get<Kernel::Event>(cmd_buff[4]);
166 if (evt) { 222
167 interrupt_events[std::make_pair(interrupt, channel)] = evt; 223 if (!evt) {
168 cmd_buff[1] = RESULT_SUCCESS.raw; 224 LOG_INFO(Service_DSP, "Invalid event handle! type=%u, pipe=%u, event_handle=0x%08X", type_index, pipe_index, event_handle);
169 LOG_INFO(Service_DSP, "Registered interrupt=%u, channel=%u, event_handle=0x%08X", interrupt, channel, event_handle); 225 ASSERT(false); // TODO: This should really be handled at an IPC translation layer.
170 } else {
171 LOG_CRITICAL(Service_DSP, "Invalid event handle! interrupt=%u, channel=%u, event_handle=0x%08X", interrupt, channel, event_handle);
172 ASSERT(false); // This should really be handled at a IPC translation layer.
173 } 226 }
227
228 if (interrupt_events.HasTooManyEventsRegistered()) {
229 LOG_INFO(Service_DSP, "Ran out of space to register interrupts (Attempted to register type=%u, pipe=%u, event_handle=0x%08X)",
230 type_index, pipe_index, event_handle);
231 cmd_buff[1] = ResultCode(ErrorDescription::InvalidResultValue, ErrorModule::DSP, ErrorSummary::OutOfResource, ErrorLevel::Status).raw;
232 return;
233 }
234
235 interrupt_events.Get(type, pipe) = evt;
236 LOG_INFO(Service_DSP, "Registered type=%u, pipe=%u, event_handle=0x%08X", type_index, pipe_index, event_handle);
237 cmd_buff[1] = RESULT_SUCCESS.raw;
174 } else { 238 } else {
175 interrupt_events.erase(std::make_pair(interrupt, channel)); 239 interrupt_events.Get(type, pipe) = nullptr;
176 LOG_INFO(Service_DSP, "Unregistered interrupt=%u, channel=%u, event_handle=0x%08X", interrupt, channel, event_handle); 240 LOG_INFO(Service_DSP, "Unregistered interrupt=%u, channel=%u, event_handle=0x%08X", type_index, pipe_index, event_handle);
241 cmd_buff[1] = RESULT_SUCCESS.raw;
177 } 242 }
178} 243}
179 244
@@ -187,6 +252,7 @@ static void RegisterInterruptEvents(Service::Interface* self) {
187static void SetSemaphore(Service::Interface* self) { 252static void SetSemaphore(Service::Interface* self) {
188 u32* cmd_buff = Kernel::GetCommandBuffer(); 253 u32* cmd_buff = Kernel::GetCommandBuffer();
189 254
255 cmd_buff[0] = IPC::MakeHeader(0x7, 1, 0);
190 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 256 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
191 257
192 LOG_WARNING(Service_DSP, "(STUBBED) called"); 258 LOG_WARNING(Service_DSP, "(STUBBED) called");
@@ -195,7 +261,7 @@ static void SetSemaphore(Service::Interface* self) {
195/** 261/**
196 * DSP_DSP::WriteProcessPipe service function 262 * DSP_DSP::WriteProcessPipe service function
197 * Inputs: 263 * Inputs:
198 * 1 : Channel 264 * 1 : Pipe Number
199 * 2 : Size 265 * 2 : Size
200 * 3 : (size << 14) | 0x402 266 * 3 : (size << 14) | 0x402
201 * 4 : Buffer 267 * 4 : Buffer
@@ -206,24 +272,32 @@ static void SetSemaphore(Service::Interface* self) {
206static void WriteProcessPipe(Service::Interface* self) { 272static void WriteProcessPipe(Service::Interface* self) {
207 u32* cmd_buff = Kernel::GetCommandBuffer(); 273 u32* cmd_buff = Kernel::GetCommandBuffer();
208 274
209 DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(cmd_buff[1]); 275 u32 pipe_index = cmd_buff[1];
210 u32 size = cmd_buff[2]; 276 u32 size = cmd_buff[2];
211 u32 buffer = cmd_buff[4]; 277 u32 buffer = cmd_buff[4];
212 278
213 ASSERT_MSG(IPC::StaticBufferDesc(size, 1) == cmd_buff[3], "IPC static buffer descriptor failed validation (0x%X). pipe=%u, size=0x%X, buffer=0x%08X", cmd_buff[3], pipe, size, buffer); 279 DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(pipe_index);
214 ASSERT_MSG(Memory::GetPointer(buffer) != nullptr, "Invalid Buffer: pipe=%u, size=0x%X, buffer=0x%08X", pipe, size, buffer);
215 280
216 std::vector<u8> message(size); 281 if (IPC::StaticBufferDesc(size, 1) != cmd_buff[3]) {
282 LOG_ERROR(Service_DSP, "IPC static buffer descriptor failed validation (0x%X). pipe=%u, size=0x%X, buffer=0x%08X", cmd_buff[3], pipe_index, size, buffer);
283 cmd_buff[0] = IPC::MakeHeader(0, 1, 0);
284 cmd_buff[1] = ResultCode(ErrorDescription::OS_InvalidBufferDescriptor, ErrorModule::OS, ErrorSummary::WrongArgument, ErrorLevel::Permanent).raw;
285 return;
286 }
217 287
218 for (size_t i = 0; i < size; i++) { 288 ASSERT_MSG(Memory::GetPointer(buffer) != nullptr, "Invalid Buffer: pipe=%u, size=0x%X, buffer=0x%08X", pipe_index, size, buffer);
289
290 std::vector<u8> message(size);
291 for (u32 i = 0; i < size; i++) {
219 message[i] = Memory::Read8(buffer + i); 292 message[i] = Memory::Read8(buffer + i);
220 } 293 }
221 294
222 DSP::HLE::PipeWrite(pipe, message); 295 DSP::HLE::PipeWrite(pipe, message);
223 296
297 cmd_buff[0] = IPC::MakeHeader(0xD, 1, 0);
224 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 298 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
225 299
226 LOG_DEBUG(Service_DSP, "pipe=%u, size=0x%X, buffer=0x%08X", pipe, size, buffer); 300 LOG_DEBUG(Service_DSP, "pipe=%u, size=0x%X, buffer=0x%08X", pipe_index, size, buffer);
227} 301}
228 302
229/** 303/**
@@ -243,13 +317,16 @@ static void WriteProcessPipe(Service::Interface* self) {
243static void ReadPipeIfPossible(Service::Interface* self) { 317static void ReadPipeIfPossible(Service::Interface* self) {
244 u32* cmd_buff = Kernel::GetCommandBuffer(); 318 u32* cmd_buff = Kernel::GetCommandBuffer();
245 319
246 DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(cmd_buff[1]); 320 u32 pipe_index = cmd_buff[1];
247 u32 unknown = cmd_buff[2]; 321 u32 unknown = cmd_buff[2];
248 u32 size = cmd_buff[3] & 0xFFFF; // Lower 16 bits are size 322 u32 size = cmd_buff[3] & 0xFFFF; // Lower 16 bits are size
249 VAddr addr = cmd_buff[0x41]; 323 VAddr addr = cmd_buff[0x41];
250 324
251 ASSERT_MSG(Memory::GetPointer(addr) != nullptr, "Invalid addr: pipe=0x%08X, unknown=0x%08X, size=0x%X, buffer=0x%08X", pipe, unknown, size, addr); 325 DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(pipe_index);
252 326
327 ASSERT_MSG(Memory::GetPointer(addr) != nullptr, "Invalid addr: pipe=%u, unknown=0x%08X, size=0x%X, buffer=0x%08X", pipe_index, unknown, size, addr);
328
329 cmd_buff[0] = IPC::MakeHeader(0x10, 1, 2);
253 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 330 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
254 if (DSP::HLE::GetPipeReadableSize(pipe) >= size) { 331 if (DSP::HLE::GetPipeReadableSize(pipe) >= size) {
255 std::vector<u8> response = DSP::HLE::PipeRead(pipe, size); 332 std::vector<u8> response = DSP::HLE::PipeRead(pipe, size);
@@ -260,8 +337,10 @@ static void ReadPipeIfPossible(Service::Interface* self) {
260 } else { 337 } else {
261 cmd_buff[2] = 0; // Return no data 338 cmd_buff[2] = 0; // Return no data
262 } 339 }
340 cmd_buff[3] = IPC::StaticBufferDesc(size, 0);
341 cmd_buff[4] = addr;
263 342
264 LOG_DEBUG(Service_DSP, "pipe=0x%08X, unknown=0x%08X, size=0x%X, buffer=0x%08X, return cmd_buff[2]=0x%08X", pipe, unknown, size, addr, cmd_buff[2]); 343 LOG_DEBUG(Service_DSP, "pipe=%u, unknown=0x%08X, size=0x%X, buffer=0x%08X, return cmd_buff[2]=0x%08X", pipe_index, unknown, size, addr, cmd_buff[2]);
265} 344}
266 345
267/** 346/**
@@ -278,26 +357,31 @@ static void ReadPipeIfPossible(Service::Interface* self) {
278static void ReadPipe(Service::Interface* self) { 357static void ReadPipe(Service::Interface* self) {
279 u32* cmd_buff = Kernel::GetCommandBuffer(); 358 u32* cmd_buff = Kernel::GetCommandBuffer();
280 359
281 DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(cmd_buff[1]); 360 u32 pipe_index = cmd_buff[1];
282 u32 unknown = cmd_buff[2]; 361 u32 unknown = cmd_buff[2];
283 u32 size = cmd_buff[3] & 0xFFFF; // Lower 16 bits are size 362 u32 size = cmd_buff[3] & 0xFFFF; // Lower 16 bits are size
284 VAddr addr = cmd_buff[0x41]; 363 VAddr addr = cmd_buff[0x41];
285 364
286 ASSERT_MSG(Memory::GetPointer(addr) != nullptr, "Invalid addr: pipe=0x%08X, unknown=0x%08X, size=0x%X, buffer=0x%08X", pipe, unknown, size, addr); 365 DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(pipe_index);
366
367 ASSERT_MSG(Memory::GetPointer(addr) != nullptr, "Invalid addr: pipe=%u, unknown=0x%08X, size=0x%X, buffer=0x%08X", pipe_index, unknown, size, addr);
287 368
288 if (DSP::HLE::GetPipeReadableSize(pipe) >= size) { 369 if (DSP::HLE::GetPipeReadableSize(pipe) >= size) {
289 std::vector<u8> response = DSP::HLE::PipeRead(pipe, size); 370 std::vector<u8> response = DSP::HLE::PipeRead(pipe, size);
290 371
291 Memory::WriteBlock(addr, response.data(), response.size()); 372 Memory::WriteBlock(addr, response.data(), response.size());
292 373
374 cmd_buff[0] = IPC::MakeHeader(0xE, 2, 2);
293 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 375 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
294 cmd_buff[2] = static_cast<u32>(response.size()); 376 cmd_buff[2] = static_cast<u32>(response.size());
377 cmd_buff[3] = IPC::StaticBufferDesc(size, 0);
378 cmd_buff[4] = addr;
295 } else { 379 } else {
296 // No more data is in pipe. Hardware hangs in this case; this should never happen. 380 // No more data is in pipe. Hardware hangs in this case; this should never happen.
297 UNREACHABLE(); 381 UNREACHABLE();
298 } 382 }
299 383
300 LOG_DEBUG(Service_DSP, "pipe=0x%08X, unknown=0x%08X, size=0x%X, buffer=0x%08X, return cmd_buff[2]=0x%08X", pipe, unknown, size, addr, cmd_buff[2]); 384 LOG_DEBUG(Service_DSP, "pipe=%u, unknown=0x%08X, size=0x%X, buffer=0x%08X, return cmd_buff[2]=0x%08X", pipe_index, unknown, size, addr, cmd_buff[2]);
301} 385}
302 386
303/** 387/**
@@ -312,13 +396,16 @@ static void ReadPipe(Service::Interface* self) {
312static void GetPipeReadableSize(Service::Interface* self) { 396static void GetPipeReadableSize(Service::Interface* self) {
313 u32* cmd_buff = Kernel::GetCommandBuffer(); 397 u32* cmd_buff = Kernel::GetCommandBuffer();
314 398
315 DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(cmd_buff[1]); 399 u32 pipe_index = cmd_buff[1];
316 u32 unknown = cmd_buff[2]; 400 u32 unknown = cmd_buff[2];
317 401
402 DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(pipe_index);
403
404 cmd_buff[0] = IPC::MakeHeader(0xF, 2, 0);
318 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 405 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
319 cmd_buff[2] = DSP::HLE::GetPipeReadableSize(pipe); 406 cmd_buff[2] = static_cast<u32>(DSP::HLE::GetPipeReadableSize(pipe));
320 407
321 LOG_DEBUG(Service_DSP, "pipe=0x%08X, unknown=0x%08X, return cmd_buff[2]=0x%08X", pipe, unknown, cmd_buff[2]); 408 LOG_DEBUG(Service_DSP, "pipe=%u, unknown=0x%08X, return cmd_buff[2]=0x%08X", pipe_index, unknown, cmd_buff[2]);
322} 409}
323 410
324/** 411/**
@@ -333,6 +420,7 @@ static void SetSemaphoreMask(Service::Interface* self) {
333 420
334 u32 mask = cmd_buff[1]; 421 u32 mask = cmd_buff[1];
335 422
423 cmd_buff[0] = IPC::MakeHeader(0x17, 1, 0);
336 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 424 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
337 425
338 LOG_WARNING(Service_DSP, "(STUBBED) called mask=0x%08X", mask); 426 LOG_WARNING(Service_DSP, "(STUBBED) called mask=0x%08X", mask);
@@ -350,10 +438,11 @@ static void SetSemaphoreMask(Service::Interface* self) {
350static void GetHeadphoneStatus(Service::Interface* self) { 438static void GetHeadphoneStatus(Service::Interface* self) {
351 u32* cmd_buff = Kernel::GetCommandBuffer(); 439 u32* cmd_buff = Kernel::GetCommandBuffer();
352 440
441 cmd_buff[0] = IPC::MakeHeader(0x1F, 2, 0);
353 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 442 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
354 cmd_buff[2] = 0; // Not using headphones? 443 cmd_buff[2] = 0; // Not using headphones
355 444
356 LOG_WARNING(Service_DSP, "(STUBBED) called"); 445 LOG_DEBUG(Service_DSP, "called");
357} 446}
358 447
359/** 448/**
@@ -376,6 +465,7 @@ static void RecvData(Service::Interface* self) {
376 465
377 // Application reads this after requesting DSP shutdown, to verify the DSP has indeed shutdown or slept. 466 // Application reads this after requesting DSP shutdown, to verify the DSP has indeed shutdown or slept.
378 467
468 cmd_buff[0] = IPC::MakeHeader(0x1, 2, 0);
379 cmd_buff[1] = RESULT_SUCCESS.raw; 469 cmd_buff[1] = RESULT_SUCCESS.raw;
380 switch (DSP::HLE::GetDspState()) { 470 switch (DSP::HLE::GetDspState()) {
381 case DSP::HLE::DspState::On: 471 case DSP::HLE::DspState::On:
@@ -411,6 +501,7 @@ static void RecvDataIsReady(Service::Interface* self) {
411 501
412 ASSERT_MSG(register_number == 0, "Unknown register_number %u", register_number); 502 ASSERT_MSG(register_number == 0, "Unknown register_number %u", register_number);
413 503
504 cmd_buff[0] = IPC::MakeHeader(0x2, 2, 0);
414 cmd_buff[1] = RESULT_SUCCESS.raw; 505 cmd_buff[1] = RESULT_SUCCESS.raw;
415 cmd_buff[2] = 1; // Ready to read 506 cmd_buff[2] = 1; // Ready to read
416 507
@@ -458,14 +549,14 @@ const Interface::FunctionInfo FunctionTable[] = {
458 549
459Interface::Interface() { 550Interface::Interface() {
460 semaphore_event = Kernel::Event::Create(Kernel::ResetType::OneShot, "DSP_DSP::semaphore_event"); 551 semaphore_event = Kernel::Event::Create(Kernel::ResetType::OneShot, "DSP_DSP::semaphore_event");
461 read_pipe_count = 0; 552 interrupt_events = {};
462 553
463 Register(FunctionTable); 554 Register(FunctionTable);
464} 555}
465 556
466Interface::~Interface() { 557Interface::~Interface() {
467 semaphore_event = nullptr; 558 semaphore_event = nullptr;
468 interrupt_events.clear(); 559 interrupt_events = {};
469} 560}
470 561
471} // namespace 562} // namespace
diff --git a/src/core/hle/service/dsp_dsp.h b/src/core/hle/service/dsp_dsp.h
index 32b89e9bb..22f6687cc 100644
--- a/src/core/hle/service/dsp_dsp.h
+++ b/src/core/hle/service/dsp_dsp.h
@@ -8,6 +8,12 @@
8 8
9#include "core/hle/service/service.h" 9#include "core/hle/service/service.h"
10 10
11namespace DSP {
12namespace HLE {
13enum class DspPipe;
14}
15}
16
11//////////////////////////////////////////////////////////////////////////////////////////////////// 17////////////////////////////////////////////////////////////////////////////////////////////////////
12// Namespace DSP_DSP 18// Namespace DSP_DSP
13 19
@@ -23,15 +29,10 @@ public:
23 } 29 }
24}; 30};
25 31
26/// Signal all audio related interrupts.
27void SignalAllInterrupts();
28
29/** 32/**
30 * Signal a specific audio related interrupt based on interrupt id and channel id. 33 * Signal a specific DSP related interrupt of type == InterruptType::Pipe, pipe == pipe.
31 * @param interrupt_id The interrupt id 34 * @param pipe The DSP pipe for which to signal an interrupt for.
32 * @param channel_id The channel id
33 * The significance of various values of interrupt_id and channel_id is not yet known.
34 */ 35 */
35void SignalInterrupt(u32 interrupt_id, u32 channel_id); 36void SignalPipeInterrupt(DSP::HLE::DspPipe pipe);
36 37
37} // namespace 38} // namespace DSP_DSP
diff --git a/src/core/hle/service/fs/archive.cpp b/src/core/hle/service/fs/archive.cpp
index e9588cb72..cc51ede0c 100644
--- a/src/core/hle/service/fs/archive.cpp
+++ b/src/core/hle/service/fs/archive.cpp
@@ -114,6 +114,7 @@ ResultVal<bool> File::SyncRequest() {
114 return read.Code(); 114 return read.Code();
115 } 115 }
116 cmd_buff[2] = static_cast<u32>(*read); 116 cmd_buff[2] = static_cast<u32>(*read);
117 Memory::RasterizerFlushAndInvalidateRegion(Memory::VirtualToPhysicalAddress(address), length);
117 break; 118 break;
118 } 119 }
119 120
diff --git a/src/core/hle/service/fs/fs_user.cpp b/src/core/hle/service/fs/fs_user.cpp
index 3ec7ceb30..7df7da5a4 100644
--- a/src/core/hle/service/fs/fs_user.cpp
+++ b/src/core/hle/service/fs/fs_user.cpp
@@ -250,7 +250,7 @@ static void CreateFile(Service::Interface* self) {
250 250
251 FileSys::Path file_path(filename_type, filename_size, filename_ptr); 251 FileSys::Path file_path(filename_type, filename_size, filename_ptr);
252 252
253 LOG_DEBUG(Service_FS, "type=%d size=%llu data=%s", filename_type, filename_size, file_path.DebugStr().c_str()); 253 LOG_DEBUG(Service_FS, "type=%d size=%llu data=%s", filename_type, file_size, file_path.DebugStr().c_str());
254 254
255 cmd_buff[1] = CreateFileInArchive(archive_handle, file_path, file_size).raw; 255 cmd_buff[1] = CreateFileInArchive(archive_handle, file_path, file_size).raw;
256} 256}
diff --git a/src/core/hle/service/gsp_gpu.cpp b/src/core/hle/service/gsp_gpu.cpp
index 0c655395e..8ded9b09b 100644
--- a/src/core/hle/service/gsp_gpu.cpp
+++ b/src/core/hle/service/gsp_gpu.cpp
@@ -15,8 +15,6 @@
15 15
16#include "video_core/gpu_debugger.h" 16#include "video_core/gpu_debugger.h"
17#include "video_core/debug_utils/debug_utils.h" 17#include "video_core/debug_utils/debug_utils.h"
18#include "video_core/renderer_base.h"
19#include "video_core/video_core.h"
20 18
21#include "gsp_gpu.h" 19#include "gsp_gpu.h"
22 20
@@ -45,6 +43,8 @@ Kernel::SharedPtr<Kernel::SharedMemory> g_shared_memory;
45/// Thread index into interrupt relay queue 43/// Thread index into interrupt relay queue
46u32 g_thread_id = 0; 44u32 g_thread_id = 0;
47 45
46static bool gpu_right_acquired = false;
47
48/// Gets a pointer to a thread command buffer in GSP shared memory 48/// Gets a pointer to a thread command buffer in GSP shared memory
49static inline u8* GetCommandBuffer(u32 thread_id) { 49static inline u8* GetCommandBuffer(u32 thread_id) {
50 return g_shared_memory->GetPointer(0x800 + (thread_id * sizeof(CommandBuffer))); 50 return g_shared_memory->GetPointer(0x800 + (thread_id * sizeof(CommandBuffer)));
@@ -291,8 +291,6 @@ static void FlushDataCache(Service::Interface* self) {
291 u32 size = cmd_buff[2]; 291 u32 size = cmd_buff[2];
292 u32 process = cmd_buff[4]; 292 u32 process = cmd_buff[4];
293 293
294 VideoCore::g_renderer->Rasterizer()->InvalidateRegion(Memory::VirtualToPhysicalAddress(address), size);
295
296 // TODO(purpasmart96): Verify return header on HW 294 // TODO(purpasmart96): Verify return header on HW
297 295
298 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 296 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
@@ -337,8 +335,9 @@ static void RegisterInterruptRelayQueue(Service::Interface* self) {
337 g_interrupt_event->name = "GSP_GPU::interrupt_event"; 335 g_interrupt_event->name = "GSP_GPU::interrupt_event";
338 336
339 using Kernel::MemoryPermission; 337 using Kernel::MemoryPermission;
340 g_shared_memory = Kernel::SharedMemory::Create(0x1000, MemoryPermission::ReadWrite, 338 g_shared_memory = Kernel::SharedMemory::Create(nullptr, 0x1000,
341 MemoryPermission::ReadWrite, "GSPSharedMem"); 339 MemoryPermission::ReadWrite, MemoryPermission::ReadWrite,
340 0, Kernel::MemoryRegion::BASE, "GSP:SharedMemory");
342 341
343 Handle shmem_handle = Kernel::g_handle_table.Create(g_shared_memory).MoveFrom(); 342 Handle shmem_handle = Kernel::g_handle_table.Create(g_shared_memory).MoveFrom();
344 343
@@ -374,6 +373,9 @@ static void UnregisterInterruptRelayQueue(Service::Interface* self) {
374 * @todo This probably does not belong in the GSP module, instead move to video_core 373 * @todo This probably does not belong in the GSP module, instead move to video_core
375 */ 374 */
376void SignalInterrupt(InterruptId interrupt_id) { 375void SignalInterrupt(InterruptId interrupt_id) {
376 if (!gpu_right_acquired) {
377 return;
378 }
377 if (nullptr == g_interrupt_event) { 379 if (nullptr == g_interrupt_event) {
378 LOG_WARNING(Service_GSP, "cannot synchronize until GSP event has been created!"); 380 LOG_WARNING(Service_GSP, "cannot synchronize until GSP event has been created!");
379 return; 381 return;
@@ -408,6 +410,8 @@ void SignalInterrupt(InterruptId interrupt_id) {
408 g_interrupt_event->Signal(); 410 g_interrupt_event->Signal();
409} 411}
410 412
413MICROPROFILE_DEFINE(GPU_GSP_DMA, "GPU", "GSP DMA", MP_RGB(100, 0, 255));
414
411/// Executes the next GSP command 415/// Executes the next GSP command
412static void ExecuteCommand(const Command& command, u32 thread_id) { 416static void ExecuteCommand(const Command& command, u32 thread_id) {
413 // Utility function to convert register ID to address 417 // Utility function to convert register ID to address
@@ -419,18 +423,21 @@ static void ExecuteCommand(const Command& command, u32 thread_id) {
419 423
420 // GX request DMA - typically used for copying memory from GSP heap to VRAM 424 // GX request DMA - typically used for copying memory from GSP heap to VRAM
421 case CommandId::REQUEST_DMA: 425 case CommandId::REQUEST_DMA:
422 VideoCore::g_renderer->Rasterizer()->FlushRegion(Memory::VirtualToPhysicalAddress(command.dma_request.source_address), 426 {
423 command.dma_request.size); 427 MICROPROFILE_SCOPE(GPU_GSP_DMA);
428
429 // TODO: Consider attempting rasterizer-accelerated surface blit if that usage is ever possible/likely
430 Memory::RasterizerFlushRegion(Memory::VirtualToPhysicalAddress(command.dma_request.source_address),
431 command.dma_request.size);
432 Memory::RasterizerFlushAndInvalidateRegion(Memory::VirtualToPhysicalAddress(command.dma_request.dest_address),
433 command.dma_request.size);
424 434
425 memcpy(Memory::GetPointer(command.dma_request.dest_address), 435 memcpy(Memory::GetPointer(command.dma_request.dest_address),
426 Memory::GetPointer(command.dma_request.source_address), 436 Memory::GetPointer(command.dma_request.source_address),
427 command.dma_request.size); 437 command.dma_request.size);
428 SignalInterrupt(InterruptId::DMA); 438 SignalInterrupt(InterruptId::DMA);
429
430 VideoCore::g_renderer->Rasterizer()->InvalidateRegion(Memory::VirtualToPhysicalAddress(command.dma_request.dest_address),
431 command.dma_request.size);
432 break; 439 break;
433 440 }
434 // TODO: This will need some rework in the future. (why?) 441 // TODO: This will need some rework in the future. (why?)
435 case CommandId::SUBMIT_GPU_CMDLIST: 442 case CommandId::SUBMIT_GPU_CMDLIST:
436 { 443 {
@@ -517,13 +524,8 @@ static void ExecuteCommand(const Command& command, u32 thread_id) {
517 524
518 case CommandId::CACHE_FLUSH: 525 case CommandId::CACHE_FLUSH:
519 { 526 {
520 for (auto& region : command.cache_flush.regions) { 527 // NOTE: Rasterizer flushing handled elsewhere in CPU read/write and other GPU handlers
521 if (region.size == 0) 528 // Use command.cache_flush.regions to implement this handler
522 break;
523
524 VideoCore::g_renderer->Rasterizer()->InvalidateRegion(
525 Memory::VirtualToPhysicalAddress(region.address), region.size);
526 }
527 break; 529 break;
528 } 530 }
529 531
@@ -628,6 +630,35 @@ static void ImportDisplayCaptureInfo(Service::Interface* self) {
628 LOG_WARNING(Service_GSP, "called"); 630 LOG_WARNING(Service_GSP, "called");
629} 631}
630 632
633/**
634 * GSP_GPU::AcquireRight service function
635 * Outputs:
636 * 1: Result code
637 */
638static void AcquireRight(Service::Interface* self) {
639 u32* cmd_buff = Kernel::GetCommandBuffer();
640
641 gpu_right_acquired = true;
642
643 cmd_buff[1] = RESULT_SUCCESS.raw;
644
645 LOG_WARNING(Service_GSP, "called");
646}
647
648/**
649 * GSP_GPU::ReleaseRight service function
650 * Outputs:
651 * 1: Result code
652 */
653static void ReleaseRight(Service::Interface* self) {
654 u32* cmd_buff = Kernel::GetCommandBuffer();
655
656 gpu_right_acquired = false;
657
658 cmd_buff[1] = RESULT_SUCCESS.raw;
659
660 LOG_WARNING(Service_GSP, "called");
661}
631 662
632const Interface::FunctionInfo FunctionTable[] = { 663const Interface::FunctionInfo FunctionTable[] = {
633 {0x00010082, WriteHWRegs, "WriteHWRegs"}, 664 {0x00010082, WriteHWRegs, "WriteHWRegs"},
@@ -651,8 +682,8 @@ const Interface::FunctionInfo FunctionTable[] = {
651 {0x00130042, RegisterInterruptRelayQueue, "RegisterInterruptRelayQueue"}, 682 {0x00130042, RegisterInterruptRelayQueue, "RegisterInterruptRelayQueue"},
652 {0x00140000, UnregisterInterruptRelayQueue, "UnregisterInterruptRelayQueue"}, 683 {0x00140000, UnregisterInterruptRelayQueue, "UnregisterInterruptRelayQueue"},
653 {0x00150002, nullptr, "TryAcquireRight"}, 684 {0x00150002, nullptr, "TryAcquireRight"},
654 {0x00160042, nullptr, "AcquireRight"}, 685 {0x00160042, AcquireRight, "AcquireRight"},
655 {0x00170000, nullptr, "ReleaseRight"}, 686 {0x00170000, ReleaseRight, "ReleaseRight"},
656 {0x00180000, ImportDisplayCaptureInfo, "ImportDisplayCaptureInfo"}, 687 {0x00180000, ImportDisplayCaptureInfo, "ImportDisplayCaptureInfo"},
657 {0x00190000, nullptr, "SaveVramSysArea"}, 688 {0x00190000, nullptr, "SaveVramSysArea"},
658 {0x001A0000, nullptr, "RestoreVramSysArea"}, 689 {0x001A0000, nullptr, "RestoreVramSysArea"},
@@ -673,11 +704,13 @@ Interface::Interface() {
673 g_shared_memory = nullptr; 704 g_shared_memory = nullptr;
674 705
675 g_thread_id = 0; 706 g_thread_id = 0;
707 gpu_right_acquired = false;
676} 708}
677 709
678Interface::~Interface() { 710Interface::~Interface() {
679 g_interrupt_event = nullptr; 711 g_interrupt_event = nullptr;
680 g_shared_memory = nullptr; 712 g_shared_memory = nullptr;
713 gpu_right_acquired = false;
681} 714}
682 715
683} // namespace 716} // namespace
diff --git a/src/core/hle/service/gsp_gpu.h b/src/core/hle/service/gsp_gpu.h
index 55a993bb8..3b4b678a3 100644
--- a/src/core/hle/service/gsp_gpu.h
+++ b/src/core/hle/service/gsp_gpu.h
@@ -10,6 +10,7 @@
10#include "common/bit_field.h" 10#include "common/bit_field.h"
11#include "common/common_types.h" 11#include "common/common_types.h"
12 12
13#include "core/hle/result.h"
13#include "core/hle/service/service.h" 14#include "core/hle/service/service.h"
14 15
15//////////////////////////////////////////////////////////////////////////////////////////////////// 16////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index 1053d0f40..d216cecb4 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -280,8 +280,9 @@ void Init() {
280 AddService(new HID_SPVR_Interface); 280 AddService(new HID_SPVR_Interface);
281 281
282 using Kernel::MemoryPermission; 282 using Kernel::MemoryPermission;
283 shared_mem = SharedMemory::Create(0x1000, MemoryPermission::ReadWrite, 283 shared_mem = SharedMemory::Create(nullptr, 0x1000,
284 MemoryPermission::Read, "HID:SharedMem"); 284 MemoryPermission::ReadWrite, MemoryPermission::Read,
285 0, Kernel::MemoryRegion::BASE, "HID:SharedMemory");
285 286
286 next_pad_index = 0; 287 next_pad_index = 0;
287 next_touch_index = 0; 288 next_touch_index = 0;
diff --git a/src/core/hle/service/ir/ir.cpp b/src/core/hle/service/ir/ir.cpp
index 505c441c6..079a87e48 100644
--- a/src/core/hle/service/ir/ir.cpp
+++ b/src/core/hle/service/ir/ir.cpp
@@ -94,8 +94,9 @@ void Init() {
94 AddService(new IR_User_Interface); 94 AddService(new IR_User_Interface);
95 95
96 using Kernel::MemoryPermission; 96 using Kernel::MemoryPermission;
97 shared_memory = SharedMemory::Create(0x1000, Kernel::MemoryPermission::ReadWrite, 97 shared_memory = SharedMemory::Create(nullptr, 0x1000,
98 Kernel::MemoryPermission::ReadWrite, "IR:SharedMemory"); 98 Kernel::MemoryPermission::ReadWrite, Kernel::MemoryPermission::ReadWrite,
99 0, Kernel::MemoryRegion::BASE, "IR:SharedMemory");
99 transfer_shared_memory = nullptr; 100 transfer_shared_memory = nullptr;
100 101
101 // Create event handle(s) 102 // Create event handle(s)
diff --git a/src/core/hle/service/ndm/ndm.cpp b/src/core/hle/service/ndm/ndm.cpp
index 47076a7b8..bc9c3413d 100644
--- a/src/core/hle/service/ndm/ndm.cpp
+++ b/src/core/hle/service/ndm/ndm.cpp
@@ -11,28 +11,217 @@
11namespace Service { 11namespace Service {
12namespace NDM { 12namespace NDM {
13 13
14void SuspendDaemons(Service::Interface* self) { 14enum : u32 {
15 DEFAULT_RETRY_INTERVAL = 10,
16 DEFAULT_SCAN_INTERVAL = 30
17};
18
19static DaemonMask daemon_bit_mask = DaemonMask::Default;
20static DaemonMask default_daemon_bit_mask = DaemonMask::Default;
21static std::array<DaemonStatus, 4> daemon_status = { DaemonStatus::Idle, DaemonStatus::Idle, DaemonStatus::Idle, DaemonStatus::Idle };
22static ExclusiveState exclusive_state = ExclusiveState::None;
23static u32 scan_interval = DEFAULT_SCAN_INTERVAL;
24static u32 retry_interval = DEFAULT_RETRY_INTERVAL;
25static bool daemon_lock_enabled = false;
26
27void EnterExclusiveState(Service::Interface* self) {
28 u32* cmd_buff = Kernel::GetCommandBuffer();
29 exclusive_state = static_cast<ExclusiveState>(cmd_buff[1]);
30
31 cmd_buff[0] = IPC::MakeHeader(0x1, 1, 0);
32 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
33 LOG_WARNING(Service_NDM, "(STUBBED) exclusive_state=0x%08X ", exclusive_state);
34}
35
36void LeaveExclusiveState(Service::Interface* self) {
37 u32* cmd_buff = Kernel::GetCommandBuffer();
38 exclusive_state = ExclusiveState::None;
39
40 cmd_buff[0] = IPC::MakeHeader(0x2, 1, 0);
41 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
42 LOG_WARNING(Service_NDM, "(STUBBED) exclusive_state=0x%08X ", exclusive_state);
43}
44
45void QueryExclusiveMode(Service::Interface* self) {
15 u32* cmd_buff = Kernel::GetCommandBuffer(); 46 u32* cmd_buff = Kernel::GetCommandBuffer();
16 47
17 LOG_WARNING(Service_NDM, "(STUBBED) bit_mask=0x%08X ", cmd_buff[1]); 48 cmd_buff[0] = IPC::MakeHeader(0x3, 2, 0);
49 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
50 cmd_buff[2] = static_cast<u32>(exclusive_state);
51 LOG_WARNING(Service_NDM, "(STUBBED) exclusive_state=0x%08X ", exclusive_state);
52}
53
54void LockState(Service::Interface* self) {
55 u32* cmd_buff = Kernel::GetCommandBuffer();
56 daemon_lock_enabled = true;
57
58 cmd_buff[0] = IPC::MakeHeader(0x4, 1, 0);
59 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
60 LOG_WARNING(Service_NDM, "(STUBBED) daemon_lock_enabled=0x%08X ", daemon_lock_enabled);
61}
62
63void UnlockState(Service::Interface* self) {
64 u32* cmd_buff = Kernel::GetCommandBuffer();
65 daemon_lock_enabled = false;
18 66
67 cmd_buff[0] = IPC::MakeHeader(0x5, 1, 0);
19 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 68 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
69 LOG_WARNING(Service_NDM, "(STUBBED) daemon_lock_enabled=0x%08X ", daemon_lock_enabled);
70}
71
72void SuspendDaemons(Service::Interface* self) {
73 u32* cmd_buff = Kernel::GetCommandBuffer();
74 u32 bit_mask = cmd_buff[1] & 0xF;
75 daemon_bit_mask = static_cast<DaemonMask>(static_cast<u32>(default_daemon_bit_mask) & ~bit_mask);
76 for (size_t index = 0; index < daemon_status.size(); ++index) {
77 if (bit_mask & (1 << index)) {
78 daemon_status[index] = DaemonStatus::Suspended;
79 }
80 }
81
82 cmd_buff[0] = IPC::MakeHeader(0x6, 1, 0);
83 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
84 LOG_WARNING(Service_NDM, "(STUBBED) daemon_bit_mask=0x%08X ", daemon_bit_mask);
20} 85}
21 86
22void ResumeDaemons(Service::Interface* self) { 87void ResumeDaemons(Service::Interface* self) {
23 u32* cmd_buff = Kernel::GetCommandBuffer(); 88 u32* cmd_buff = Kernel::GetCommandBuffer();
89 u32 bit_mask = cmd_buff[1] & 0xF;
90 daemon_bit_mask = static_cast<DaemonMask>(static_cast<u32>(daemon_bit_mask) | bit_mask);
91 for (size_t index = 0; index < daemon_status.size(); ++index) {
92 if (bit_mask & (1 << index)) {
93 daemon_status[index] = DaemonStatus::Idle;
94 }
95 }
96
97 cmd_buff[0] = IPC::MakeHeader(0x7, 1, 0);
98 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
99 LOG_WARNING(Service_NDM, "(STUBBED) daemon_bit_mask=0x%08X ", daemon_bit_mask);
100}
101
102void SuspendScheduler(Service::Interface* self) {
103 u32* cmd_buff = Kernel::GetCommandBuffer();
104
105 cmd_buff[0] = IPC::MakeHeader(0x8, 1, 0);
106 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
107 LOG_WARNING(Service_NDM, "(STUBBED) called");
108}
109
110void ResumeScheduler(Service::Interface* self) {
111 u32* cmd_buff = Kernel::GetCommandBuffer();
112
113 cmd_buff[0] = IPC::MakeHeader(0x9, 1, 0);
114 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
115 LOG_WARNING(Service_NDM, "(STUBBED) called");
116}
117
118void QueryStatus(Service::Interface* self) {
119 u32* cmd_buff = Kernel::GetCommandBuffer();
120 u32 daemon = cmd_buff[1] & 0xF;
24 121
25 LOG_WARNING(Service_NDM, "(STUBBED) bit_mask=0x%08X ", cmd_buff[1]); 122 cmd_buff[0] = IPC::MakeHeader(0xD, 2, 0);
123 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
124 cmd_buff[2] = static_cast<u32>(daemon_status.at(daemon));
125 LOG_WARNING(Service_NDM, "(STUBBED) daemon=0x%08X, daemon_status=0x%08X", daemon, cmd_buff[2]);
126}
127
128void GetDaemonDisableCount(Service::Interface* self) {
129 u32* cmd_buff = Kernel::GetCommandBuffer();
130 u32 daemon = cmd_buff[1] & 0xF;
131
132 cmd_buff[0] = IPC::MakeHeader(0xE, 3, 0);
133 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
134 cmd_buff[2] = 0;
135 cmd_buff[3] = 0;
136 LOG_WARNING(Service_NDM, "(STUBBED) daemon=0x%08X", daemon);
137}
138
139void GetSchedulerDisableCount(Service::Interface* self) {
140 u32* cmd_buff = Kernel::GetCommandBuffer();
141
142 cmd_buff[0] = IPC::MakeHeader(0xF, 3, 0);
143 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
144 cmd_buff[2] = 0;
145 cmd_buff[3] = 0;
146 LOG_WARNING(Service_NDM, "(STUBBED) called");
147}
148
149void SetScanInterval(Service::Interface* self) {
150 u32* cmd_buff = Kernel::GetCommandBuffer();
151 scan_interval = cmd_buff[1];
26 152
153 cmd_buff[0] = IPC::MakeHeader(0x10, 1, 0);
27 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 154 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
155 LOG_WARNING(Service_NDM, "(STUBBED) scan_interval=0x%08X ", scan_interval);
156}
157
158void GetScanInterval(Service::Interface* self) {
159 u32* cmd_buff = Kernel::GetCommandBuffer();
160
161 cmd_buff[0] = IPC::MakeHeader(0x11, 2, 0);
162 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
163 cmd_buff[2] = scan_interval;
164 LOG_WARNING(Service_NDM, "(STUBBED) scan_interval=0x%08X ", scan_interval);
165}
166
167void SetRetryInterval(Service::Interface* self) {
168 u32* cmd_buff = Kernel::GetCommandBuffer();
169 retry_interval = cmd_buff[1];
170
171 cmd_buff[0] = IPC::MakeHeader(0x12, 1, 0);
172 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
173 LOG_WARNING(Service_NDM, "(STUBBED) retry_interval=0x%08X ", retry_interval);
174}
175
176void GetRetryInterval(Service::Interface* self) {
177 u32* cmd_buff = Kernel::GetCommandBuffer();
178
179 cmd_buff[0] = IPC::MakeHeader(0x13, 2, 0);
180 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
181 cmd_buff[2] = retry_interval;
182 LOG_WARNING(Service_NDM, "(STUBBED) retry_interval=0x%08X ", retry_interval);
28} 183}
29 184
30void OverrideDefaultDaemons(Service::Interface* self) { 185void OverrideDefaultDaemons(Service::Interface* self) {
31 u32* cmd_buff = Kernel::GetCommandBuffer(); 186 u32* cmd_buff = Kernel::GetCommandBuffer();
187 u32 bit_mask = cmd_buff[1] & 0xF;
188 default_daemon_bit_mask = static_cast<DaemonMask>(bit_mask);
189 daemon_bit_mask = default_daemon_bit_mask;
190 for (size_t index = 0; index < daemon_status.size(); ++index) {
191 if (bit_mask & (1 << index)) {
192 daemon_status[index] = DaemonStatus::Idle;
193 }
194 }
32 195
33 LOG_WARNING(Service_NDM, "(STUBBED) bit_mask=0x%08X ", cmd_buff[1]); 196 cmd_buff[0] = IPC::MakeHeader(0x14, 1, 0);
197 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
198 LOG_WARNING(Service_NDM, "(STUBBED) default_daemon_bit_mask=0x%08X ", default_daemon_bit_mask);
199}
200
201void ResetDefaultDaemons(Service::Interface* self) {
202 u32* cmd_buff = Kernel::GetCommandBuffer();
203 default_daemon_bit_mask = DaemonMask::Default;
204
205 cmd_buff[0] = IPC::MakeHeader(0x15, 1, 0);
206 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
207 LOG_WARNING(Service_NDM, "(STUBBED) default_daemon_bit_mask=0x%08X ", default_daemon_bit_mask);
208}
209
210void GetDefaultDaemons(Service::Interface* self) {
211 u32* cmd_buff = Kernel::GetCommandBuffer();
212
213 cmd_buff[0] = IPC::MakeHeader(0x16, 2, 0);
214 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
215 cmd_buff[2] = static_cast<u32>(default_daemon_bit_mask);
216 LOG_WARNING(Service_NDM, "(STUBBED) default_daemon_bit_mask=0x%08X ", default_daemon_bit_mask);
217}
218
219void ClearHalfAwakeMacFilter(Service::Interface* self) {
220 u32* cmd_buff = Kernel::GetCommandBuffer();
34 221
222 cmd_buff[0] = IPC::MakeHeader(0x17, 1, 0);
35 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 223 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
224 LOG_WARNING(Service_NDM, "(STUBBED) called");
36} 225}
37 226
38void Init() { 227void Init() {
diff --git a/src/core/hle/service/ndm/ndm.h b/src/core/hle/service/ndm/ndm.h
index 734730f8c..5c2b968dc 100644
--- a/src/core/hle/service/ndm/ndm.h
+++ b/src/core/hle/service/ndm/ndm.h
@@ -12,10 +12,91 @@ class Interface;
12 12
13namespace NDM { 13namespace NDM {
14 14
15enum class Daemon : u32 {
16 Cec = 0,
17 Boss = 1,
18 Nim = 2,
19 Friend = 3
20};
21
22enum class DaemonMask : u32 {
23 None = 0,
24 Cec = (1 << static_cast<u32>(Daemon::Cec)),
25 Boss = (1 << static_cast<u32>(Daemon::Boss)),
26 Nim = (1 << static_cast<u32>(Daemon::Nim)),
27 Friend = (1 << static_cast<u32>(Daemon::Friend)),
28 Default = Cec | Friend,
29 All = Cec | Boss | Nim | Friend
30};
31
32enum class DaemonStatus : u32 {
33 Busy = 0,
34 Idle = 1,
35 Suspending = 2,
36 Suspended = 3
37};
38
39enum class ExclusiveState : u32 {
40 None = 0,
41 Infrastructure = 1,
42 LocalCommunications = 2,
43 Streetpass = 3,
44 StreetpassData = 4,
45};
46
47/**
48 * NDM::EnterExclusiveState service function
49 * Inputs:
50 * 0 : Header code [0x00010042]
51 * 1 : Exclusive State
52 * 2 : 0x20
53 * Outputs:
54 * 1 : Result, 0 on success, otherwise error code
55 */
56void EnterExclusiveState(Service::Interface* self);
57
58/**
59 * NDM::LeaveExclusiveState service function
60 * Inputs:
61 * 0 : Header code [0x00020002]
62 * 1 : 0x20
63 * Outputs:
64 * 1 : Result, 0 on success, otherwise error code
65 */
66void LeaveExclusiveState(Service::Interface* self);
67
68/**
69 * NDM::QueryExclusiveMode service function
70 * Inputs:
71 * 0 : Header code [0x00030000]
72 * Outputs:
73 * 1 : Result, 0 on success, otherwise error code
74 * 2 : Current Exclusive State
75 */
76void QueryExclusiveMode(Service::Interface* self);
77
78/**
79 * NDM::LockState service function
80 * Inputs:
81 * 0 : Header code [0x00040002]
82 * Outputs:
83 * 1 : Result, 0 on success, otherwise error code
84 */
85void LockState(Service::Interface* self);
86
87/**
88 * NDM::UnlockState service function
89 * Inputs:
90 * 0 : Header code [0x00050002]
91 * Outputs:
92 * 1 : Result, 0 on success, otherwise error code
93 */
94void UnlockState(Service::Interface* self);
95
15/** 96/**
16 * SuspendDaemons 97 * NDM::SuspendDaemons service function
17 * Inputs: 98 * Inputs:
18 * 0 : Command header (0x00020082) 99 * 0 : Header code [0x00060040]
19 * 1 : Daemon bit mask 100 * 1 : Daemon bit mask
20 * Outputs: 101 * Outputs:
21 * 1 : Result, 0 on success, otherwise error code 102 * 1 : Result, 0 on success, otherwise error code
@@ -23,9 +104,9 @@ namespace NDM {
23void SuspendDaemons(Service::Interface* self); 104void SuspendDaemons(Service::Interface* self);
24 105
25/** 106/**
26 * ResumeDaemons 107 * NDM::ResumeDaemons service function
27 * Inputs: 108 * Inputs:
28 * 0 : Command header (0x00020082) 109 * 0 : Header code [0x00070040]
29 * 1 : Daemon bit mask 110 * 1 : Daemon bit mask
30 * Outputs: 111 * Outputs:
31 * 1 : Result, 0 on success, otherwise error code 112 * 1 : Result, 0 on success, otherwise error code
@@ -33,15 +114,138 @@ void SuspendDaemons(Service::Interface* self);
33void ResumeDaemons(Service::Interface* self); 114void ResumeDaemons(Service::Interface* self);
34 115
35/** 116/**
36 * OverrideDefaultDaemons 117 * NDM::SuspendScheduler service function
37 * Inputs: 118 * Inputs:
38 * 0 : Command header (0x00020082) 119 * 0 : Header code [0x00080040]
120 * Outputs:
121 * 1 : Result, 0 on success, otherwise error code
122 */
123void SuspendScheduler(Service::Interface* self);
124
125/**
126 * NDM::ResumeScheduler service function
127 * Inputs:
128 * 0 : Header code [0x00090000]
129 * Outputs:
130 * 1 : Result, 0 on success, otherwise error code
131 */
132void ResumeScheduler(Service::Interface* self);
133
134/**
135 * NDM::QueryStatus service function
136 * Inputs:
137 * 0 : Header code [0x000D0040]
138 * 1 : Daemon
139 * Outputs:
140 * 1 : Result, 0 on success, otherwise error code
141 * 2 : Daemon status
142 */
143void QueryStatus(Service::Interface* self);
144
145/**
146 * NDM::GetDaemonDisableCount service function
147 * Inputs:
148 * 0 : Header code [0x000E0040]
149 * 1 : Daemon
150 * Outputs:
151 * 1 : Result, 0 on success, otherwise error code
152 * 2 : Current process disable count
153 * 3 : Total disable count
154 */
155void GetDaemonDisableCount(Service::Interface* self);
156
157/**
158 * NDM::GetSchedulerDisableCount service function
159 * Inputs:
160 * 0 : Header code [0x000F0000]
161 * Outputs:
162 * 1 : Result, 0 on success, otherwise error code
163 * 2 : Current process disable count
164 * 3 : Total disable count
165 */
166void GetSchedulerDisableCount(Service::Interface* self);
167
168/**
169 * NDM::SetScanInterval service function
170 * Inputs:
171 * 0 : Header code [0x00100040]
172 * 1 : Interval (default = 30)
173 * Outputs:
174 * 1 : Result, 0 on success, otherwise error code
175 */
176void SetScanInterval(Service::Interface* self);
177
178/**
179 * NDM::GetScanInterval service function
180 * Inputs:
181 * 0 : Header code [0x00110000]
182 * Outputs:
183 * 1 : Result, 0 on success, otherwise error code
184 * 2 : Interval (default = 30)
185 */
186void GetScanInterval(Service::Interface* self);
187
188/**
189 * NDM::SetRetryInterval service function
190 * Inputs:
191 * 0 : Header code [0x00120040]
192 * 1 : Interval (default = 10)
193 * Outputs:
194 * 1 : Result, 0 on success, otherwise error code
195 */
196void SetRetryInterval(Service::Interface* self);
197
198/**
199 * NDM::GetRetryInterval service function
200 * Inputs:
201 * 0 : Header code [0x00130000]
202 * Outputs:
203 * 1 : Result, 0 on success, otherwise error code
204 * 2 : Interval (default = 10)
205 */
206void GetRetryInterval(Service::Interface* self);
207
208
209/**
210 * NDM::OverrideDefaultDaemons service function
211 * Inputs:
212 * 0 : Header code [0x00140040]
39 * 1 : Daemon bit mask 213 * 1 : Daemon bit mask
40 * Outputs: 214 * Outputs:
41 * 1 : Result, 0 on success, otherwise error code 215 * 1 : Result, 0 on success, otherwise error code
42 */ 216 */
43void OverrideDefaultDaemons(Service::Interface* self); 217void OverrideDefaultDaemons(Service::Interface* self);
44 218
219/**
220 * NDM::ResetDefaultDaemons service function
221 * Inputs:
222 * 0 : Header code [0x00150000]
223 * Outputs:
224 * 1 : Result, 0 on success, otherwise error code
225 */
226void ResetDefaultDaemons(Service::Interface* self);
227
228/**
229 * NDM::GetDefaultDaemons service function
230 * Inputs:
231 * 0 : Header code [0x00160000]
232 * Outputs:
233 * 1 : Result, 0 on success, otherwise error code
234 * 2 : Daemon bit mask
235 * Note:
236 * Gets the current default daemon bit mask. The default value is (DAEMONMASK_CEC | DAEMONMASK_FRIENDS)
237 */
238void GetDefaultDaemons(Service::Interface* self);
239
240/**
241 * NDM::ClearHalfAwakeMacFilter service function
242 * Inputs:
243 * 0 : Header code [0x00170000]
244 * Outputs:
245 * 1 : Result, 0 on success, otherwise error code
246 */
247void ClearHalfAwakeMacFilter(Service::Interface* self);
248
45/// Initialize NDM service 249/// Initialize NDM service
46void Init(); 250void Init();
47 251
diff --git a/src/core/hle/service/ndm/ndm_u.cpp b/src/core/hle/service/ndm/ndm_u.cpp
index bf95cc7aa..3ff0744ee 100644
--- a/src/core/hle/service/ndm/ndm_u.cpp
+++ b/src/core/hle/service/ndm/ndm_u.cpp
@@ -9,29 +9,29 @@ namespace Service {
9namespace NDM { 9namespace NDM {
10 10
11const Interface::FunctionInfo FunctionTable[] = { 11const Interface::FunctionInfo FunctionTable[] = {
12 {0x00010042, nullptr, "EnterExclusiveState"}, 12 {0x00010042, EnterExclusiveState, "EnterExclusiveState"},
13 {0x00020002, nullptr, "LeaveExclusiveState"}, 13 {0x00020002, LeaveExclusiveState, "LeaveExclusiveState"},
14 {0x00030000, nullptr, "QueryExclusiveMode"}, 14 {0x00030000, QueryExclusiveMode, "QueryExclusiveMode"},
15 {0x00040002, nullptr, "LockState"}, 15 {0x00040002, LockState, "LockState"},
16 {0x00050002, nullptr, "UnlockState"}, 16 {0x00050002, UnlockState, "UnlockState"},
17 {0x00060040, SuspendDaemons, "SuspendDaemons"}, 17 {0x00060040, SuspendDaemons, "SuspendDaemons"},
18 {0x00070040, ResumeDaemons, "ResumeDaemons"}, 18 {0x00070040, ResumeDaemons, "ResumeDaemons"},
19 {0x00080040, nullptr, "DisableWifiUsage"}, 19 {0x00080040, SuspendScheduler, "SuspendScheduler"},
20 {0x00090000, nullptr, "EnableWifiUsage"}, 20 {0x00090000, ResumeScheduler, "ResumeScheduler"},
21 {0x000A0000, nullptr, "GetCurrentState"}, 21 {0x000A0000, nullptr, "GetCurrentState"},
22 {0x000B0000, nullptr, "GetTargetState"}, 22 {0x000B0000, nullptr, "GetTargetState"},
23 {0x000C0000, nullptr, "<Stubbed>"}, 23 {0x000C0000, nullptr, "<Stubbed>"},
24 {0x000D0040, nullptr, "QueryStatus"}, 24 {0x000D0040, QueryStatus, "QueryStatus"},
25 {0x000E0040, nullptr, "GetDaemonDisableCount"}, 25 {0x000E0040, GetDaemonDisableCount, "GetDaemonDisableCount"},
26 {0x000F0000, nullptr, "GetSchedulerDisableCount"}, 26 {0x000F0000, GetSchedulerDisableCount,"GetSchedulerDisableCount"},
27 {0x00100040, nullptr, "SetScanInterval"}, 27 {0x00100040, SetScanInterval, "SetScanInterval"},
28 {0x00110000, nullptr, "GetScanInterval"}, 28 {0x00110000, GetScanInterval, "GetScanInterval"},
29 {0x00120040, nullptr, "SetRetryInterval"}, 29 {0x00120040, SetRetryInterval, "SetRetryInterval"},
30 {0x00130000, nullptr, "GetRetryInterval"}, 30 {0x00130000, GetRetryInterval, "GetRetryInterval"},
31 {0x00140040, OverrideDefaultDaemons, "OverrideDefaultDaemons"}, 31 {0x00140040, OverrideDefaultDaemons, "OverrideDefaultDaemons"},
32 {0x00150000, nullptr, "ResetDefaultDaemons"}, 32 {0x00150000, ResetDefaultDaemons, "ResetDefaultDaemons"},
33 {0x00160000, nullptr, "GetDefaultDaemons"}, 33 {0x00160000, GetDefaultDaemons, "GetDefaultDaemons"},
34 {0x00170000, nullptr, "ClearHalfAwakeMacFilter"}, 34 {0x00170000, ClearHalfAwakeMacFilter, "ClearHalfAwakeMacFilter"},
35}; 35};
36 36
37NDM_U_Interface::NDM_U_Interface() { 37NDM_U_Interface::NDM_U_Interface() {
diff --git a/src/core/hle/service/service.cpp b/src/core/hle/service/service.cpp
index 0fe3a4d7a..d7e7d4fe3 100644
--- a/src/core/hle/service/service.cpp
+++ b/src/core/hle/service/service.cpp
@@ -7,6 +7,7 @@
7 7
8#include "core/hle/service/service.h" 8#include "core/hle/service/service.h"
9#include "core/hle/service/ac_u.h" 9#include "core/hle/service/ac_u.h"
10#include "core/hle/service/act_a.h"
10#include "core/hle/service/act_u.h" 11#include "core/hle/service/act_u.h"
11#include "core/hle/service/csnd_snd.h" 12#include "core/hle/service/csnd_snd.h"
12#include "core/hle/service/dlp_srvr.h" 13#include "core/hle/service/dlp_srvr.h"
@@ -119,6 +120,7 @@ void Init() {
119 Service::PTM::Init(); 120 Service::PTM::Init();
120 121
121 AddService(new AC_U::Interface); 122 AddService(new AC_U::Interface);
123 AddService(new ACT_A::Interface);
122 AddService(new ACT_U::Interface); 124 AddService(new ACT_U::Interface);
123 AddService(new CSND_SND::Interface); 125 AddService(new CSND_SND::Interface);
124 AddService(new DLP_SRVR::Interface); 126 AddService(new DLP_SRVR::Interface);
diff --git a/src/core/hle/service/soc_u.cpp b/src/core/hle/service/soc_u.cpp
index ff0af8f12..d3e5d4bca 100644
--- a/src/core/hle/service/soc_u.cpp
+++ b/src/core/hle/service/soc_u.cpp
@@ -151,6 +151,34 @@ static int TranslateError(int error) {
151 return error; 151 return error;
152} 152}
153 153
154/// Holds the translation from system network socket options to 3DS network socket options
155/// Note: -1 = No effect/unavailable
156static const std::unordered_map<int, int> sockopt_map = { {
157 { 0x0004, SO_REUSEADDR },
158 { 0x0080, -1 },
159 { 0x0100, -1 },
160 { 0x1001, SO_SNDBUF },
161 { 0x1002, SO_RCVBUF },
162 { 0x1003, -1 },
163#ifdef _WIN32
164 /// Unsupported in WinSock2
165 { 0x1004, -1 },
166#else
167 { 0x1004, SO_RCVLOWAT },
168#endif
169 { 0x1008, SO_TYPE },
170 { 0x1009, SO_ERROR },
171}};
172
173/// Converts a socket option from 3ds-specific to platform-specific
174static int TranslateSockOpt(int console_opt_name) {
175 auto found = sockopt_map.find(console_opt_name);
176 if (found != sockopt_map.end()) {
177 return found->second;
178 }
179 return console_opt_name;
180}
181
154/// Holds information about a particular socket 182/// Holds information about a particular socket
155struct SocketHolder { 183struct SocketHolder {
156 u32 socket_fd; ///< The socket descriptor 184 u32 socket_fd; ///< The socket descriptor
@@ -568,7 +596,7 @@ static void RecvFrom(Service::Interface* self) {
568 socklen_t src_addr_len = sizeof(src_addr); 596 socklen_t src_addr_len = sizeof(src_addr);
569 int ret = ::recvfrom(socket_handle, (char*)output_buff, len, flags, &src_addr, &src_addr_len); 597 int ret = ::recvfrom(socket_handle, (char*)output_buff, len, flags, &src_addr, &src_addr_len);
570 598
571 if (buffer_parameters.output_src_address_buffer != 0) { 599 if (ret >= 0 && buffer_parameters.output_src_address_buffer != 0 && src_addr_len > 0) {
572 CTRSockAddr* ctr_src_addr = reinterpret_cast<CTRSockAddr*>(Memory::GetPointer(buffer_parameters.output_src_address_buffer)); 600 CTRSockAddr* ctr_src_addr = reinterpret_cast<CTRSockAddr*>(Memory::GetPointer(buffer_parameters.output_src_address_buffer));
573 *ctr_src_addr = CTRSockAddr::FromPlatform(src_addr); 601 *ctr_src_addr = CTRSockAddr::FromPlatform(src_addr);
574 } 602 }
@@ -724,6 +752,72 @@ static void ShutdownSockets(Service::Interface* self) {
724 cmd_buffer[1] = 0; 752 cmd_buffer[1] = 0;
725} 753}
726 754
755static void GetSockOpt(Service::Interface* self) {
756 u32* cmd_buffer = Kernel::GetCommandBuffer();
757 u32 socket_handle = cmd_buffer[1];
758 u32 level = cmd_buffer[2];
759 int optname = TranslateSockOpt(cmd_buffer[3]);
760 socklen_t optlen = (socklen_t)cmd_buffer[4];
761
762 int ret = -1;
763 int err = 0;
764
765 if(optname < 0) {
766#ifdef _WIN32
767 err = WSAEINVAL;
768#else
769 err = EINVAL;
770#endif
771 } else {
772 // 0x100 = static buffer offset (bytes)
773 // + 0x4 = 2nd pointer (u32) position
774 // >> 2 = convert to u32 offset instead of byte offset (cmd_buffer = u32*)
775 char* optval = reinterpret_cast<char *>(Memory::GetPointer(cmd_buffer[0x104 >> 2]));
776
777 ret = ::getsockopt(socket_handle, level, optname, optval, &optlen);
778 err = 0;
779 if (ret == SOCKET_ERROR_VALUE) {
780 err = TranslateError(GET_ERRNO);
781 }
782 }
783
784 cmd_buffer[0] = IPC::MakeHeader(0x11, 4, 2);
785 cmd_buffer[1] = ret;
786 cmd_buffer[2] = err;
787 cmd_buffer[3] = optlen;
788}
789
790static void SetSockOpt(Service::Interface* self) {
791 u32* cmd_buffer = Kernel::GetCommandBuffer();
792 u32 socket_handle = cmd_buffer[1];
793 u32 level = cmd_buffer[2];
794 int optname = TranslateSockOpt(cmd_buffer[3]);
795
796 int ret = -1;
797 int err = 0;
798
799 if(optname < 0) {
800#ifdef _WIN32
801 err = WSAEINVAL;
802#else
803 err = EINVAL;
804#endif
805 } else {
806 socklen_t optlen = static_cast<socklen_t>(cmd_buffer[4]);
807 const char* optval = reinterpret_cast<const char *>(Memory::GetPointer(cmd_buffer[8]));
808
809 ret = static_cast<u32>(::setsockopt(socket_handle, level, optname, optval, optlen));
810 err = 0;
811 if (ret == SOCKET_ERROR_VALUE) {
812 err = TranslateError(GET_ERRNO);
813 }
814 }
815
816 cmd_buffer[0] = IPC::MakeHeader(0x12, 4, 4);
817 cmd_buffer[1] = ret;
818 cmd_buffer[2] = err;
819}
820
727const Interface::FunctionInfo FunctionTable[] = { 821const Interface::FunctionInfo FunctionTable[] = {
728 {0x00010044, InitializeSockets, "InitializeSockets"}, 822 {0x00010044, InitializeSockets, "InitializeSockets"},
729 {0x000200C2, Socket, "Socket"}, 823 {0x000200C2, Socket, "Socket"},
@@ -741,8 +835,8 @@ const Interface::FunctionInfo FunctionTable[] = {
741 {0x000E00C2, nullptr, "GetHostByAddr"}, 835 {0x000E00C2, nullptr, "GetHostByAddr"},
742 {0x000F0106, nullptr, "GetAddrInfo"}, 836 {0x000F0106, nullptr, "GetAddrInfo"},
743 {0x00100102, nullptr, "GetNameInfo"}, 837 {0x00100102, nullptr, "GetNameInfo"},
744 {0x00110102, nullptr, "GetSockOpt"}, 838 {0x00110102, GetSockOpt, "GetSockOpt"},
745 {0x00120104, nullptr, "SetSockOpt"}, 839 {0x00120104, SetSockOpt, "SetSockOpt"},
746 {0x001300C2, Fcntl, "Fcntl"}, 840 {0x001300C2, Fcntl, "Fcntl"},
747 {0x00140084, Poll, "Poll"}, 841 {0x00140084, Poll, "Poll"},
748 {0x00150042, nullptr, "SockAtMark"}, 842 {0x00150042, nullptr, "SockAtMark"},
diff --git a/src/core/hle/service/y2r_u.cpp b/src/core/hle/service/y2r_u.cpp
index 22f373adf..d16578f87 100644
--- a/src/core/hle/service/y2r_u.cpp
+++ b/src/core/hle/service/y2r_u.cpp
@@ -4,6 +4,7 @@
4 4
5#include <cstring> 5#include <cstring>
6 6
7#include "common/common_funcs.h"
7#include "common/common_types.h" 8#include "common/common_types.h"
8#include "common/logging/log.h" 9#include "common/logging/log.h"
9 10
@@ -12,9 +13,6 @@
12#include "core/hle/service/y2r_u.h" 13#include "core/hle/service/y2r_u.h"
13#include "core/hw/y2r.h" 14#include "core/hw/y2r.h"
14 15
15#include "video_core/renderer_base.h"
16#include "video_core/video_core.h"
17
18//////////////////////////////////////////////////////////////////////////////////////////////////// 16////////////////////////////////////////////////////////////////////////////////////////////////////
19// Namespace Y2R_U 17// Namespace Y2R_U
20 18
@@ -28,13 +26,17 @@ struct ConversionParameters {
28 u16 input_line_width; 26 u16 input_line_width;
29 u16 input_lines; 27 u16 input_lines;
30 StandardCoefficient standard_coefficient; 28 StandardCoefficient standard_coefficient;
31 u8 reserved; 29 u8 padding;
32 u16 alpha; 30 u16 alpha;
33}; 31};
34static_assert(sizeof(ConversionParameters) == 12, "ConversionParameters struct has incorrect size"); 32static_assert(sizeof(ConversionParameters) == 12, "ConversionParameters struct has incorrect size");
35 33
36static Kernel::SharedPtr<Kernel::Event> completion_event; 34static Kernel::SharedPtr<Kernel::Event> completion_event;
37static ConversionConfiguration conversion; 35static ConversionConfiguration conversion;
36static DitheringWeightParams dithering_weight_params;
37static u32 temporal_dithering_enabled = 0;
38static u32 transfer_end_interrupt_enabled = 0;
39static u32 spacial_dithering_enabled = 0;
38 40
39static const CoefficientSet standard_coefficients[4] = { 41static const CoefficientSet standard_coefficients[4] = {
40 {{ 0x100, 0x166, 0xB6, 0x58, 0x1C5, -0x166F, 0x10EE, -0x1C5B }}, // ITU_Rec601 42 {{ 0x100, 0x166, 0xB6, 0x58, 0x1C5, -0x166F, 0x10EE, -0x1C5B }}, // ITU_Rec601
@@ -73,7 +75,7 @@ ResultCode ConversionConfiguration::SetInputLines(u16 lines) {
73 75
74ResultCode ConversionConfiguration::SetStandardCoefficient(StandardCoefficient standard_coefficient) { 76ResultCode ConversionConfiguration::SetStandardCoefficient(StandardCoefficient standard_coefficient) {
75 size_t index = static_cast<size_t>(standard_coefficient); 77 size_t index = static_cast<size_t>(standard_coefficient);
76 if (index >= 4) { 78 if (index >= ARRAY_SIZE(standard_coefficients)) {
77 return ResultCode(ErrorDescription::InvalidEnumValue, ErrorModule::CAM, 79 return ResultCode(ErrorDescription::InvalidEnumValue, ErrorModule::CAM,
78 ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E053ED 80 ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E053ED
79 } 81 }
@@ -86,44 +88,183 @@ static void SetInputFormat(Service::Interface* self) {
86 u32* cmd_buff = Kernel::GetCommandBuffer(); 88 u32* cmd_buff = Kernel::GetCommandBuffer();
87 89
88 conversion.input_format = static_cast<InputFormat>(cmd_buff[1]); 90 conversion.input_format = static_cast<InputFormat>(cmd_buff[1]);
91
92 cmd_buff[0] = IPC::MakeHeader(0x1, 1, 0);
93 cmd_buff[1] = RESULT_SUCCESS.raw;
94
89 LOG_DEBUG(Service_Y2R, "called input_format=%hhu", conversion.input_format); 95 LOG_DEBUG(Service_Y2R, "called input_format=%hhu", conversion.input_format);
96}
97
98static void GetInputFormat(Service::Interface* self) {
99 u32* cmd_buff = Kernel::GetCommandBuffer();
90 100
101 cmd_buff[0] = IPC::MakeHeader(0x2, 2, 0);
91 cmd_buff[1] = RESULT_SUCCESS.raw; 102 cmd_buff[1] = RESULT_SUCCESS.raw;
103 cmd_buff[2] = static_cast<u32>(conversion.input_format);
104
105 LOG_DEBUG(Service_Y2R, "called input_format=%hhu", conversion.input_format);
92} 106}
93 107
94static void SetOutputFormat(Service::Interface* self) { 108static void SetOutputFormat(Service::Interface* self) {
95 u32* cmd_buff = Kernel::GetCommandBuffer(); 109 u32* cmd_buff = Kernel::GetCommandBuffer();
96 110
97 conversion.output_format = static_cast<OutputFormat>(cmd_buff[1]); 111 conversion.output_format = static_cast<OutputFormat>(cmd_buff[1]);
112
113 cmd_buff[0] = IPC::MakeHeader(0x3, 1, 0);
114 cmd_buff[1] = RESULT_SUCCESS.raw;
115
98 LOG_DEBUG(Service_Y2R, "called output_format=%hhu", conversion.output_format); 116 LOG_DEBUG(Service_Y2R, "called output_format=%hhu", conversion.output_format);
117}
118
119static void GetOutputFormat(Service::Interface* self) {
120 u32* cmd_buff = Kernel::GetCommandBuffer();
99 121
122 cmd_buff[0] = IPC::MakeHeader(0x4, 2, 0);
100 cmd_buff[1] = RESULT_SUCCESS.raw; 123 cmd_buff[1] = RESULT_SUCCESS.raw;
124 cmd_buff[2] = static_cast<u32>(conversion.output_format);
125
126 LOG_DEBUG(Service_Y2R, "called output_format=%hhu", conversion.output_format);
101} 127}
102 128
103static void SetRotation(Service::Interface* self) { 129static void SetRotation(Service::Interface* self) {
104 u32* cmd_buff = Kernel::GetCommandBuffer(); 130 u32* cmd_buff = Kernel::GetCommandBuffer();
105 131
106 conversion.rotation = static_cast<Rotation>(cmd_buff[1]); 132 conversion.rotation = static_cast<Rotation>(cmd_buff[1]);
133
134 cmd_buff[0] = IPC::MakeHeader(0x5, 1, 0);
135 cmd_buff[1] = RESULT_SUCCESS.raw;
136
107 LOG_DEBUG(Service_Y2R, "called rotation=%hhu", conversion.rotation); 137 LOG_DEBUG(Service_Y2R, "called rotation=%hhu", conversion.rotation);
138}
139
140static void GetRotation(Service::Interface* self) {
141 u32* cmd_buff = Kernel::GetCommandBuffer();
108 142
143 cmd_buff[0] = IPC::MakeHeader(0x6, 2, 0);
109 cmd_buff[1] = RESULT_SUCCESS.raw; 144 cmd_buff[1] = RESULT_SUCCESS.raw;
145 cmd_buff[2] = static_cast<u32>(conversion.rotation);
146
147 LOG_DEBUG(Service_Y2R, "called rotation=%hhu", conversion.rotation);
110} 148}
111 149
112static void SetBlockAlignment(Service::Interface* self) { 150static void SetBlockAlignment(Service::Interface* self) {
113 u32* cmd_buff = Kernel::GetCommandBuffer(); 151 u32* cmd_buff = Kernel::GetCommandBuffer();
114 152
115 conversion.block_alignment = static_cast<BlockAlignment>(cmd_buff[1]); 153 conversion.block_alignment = static_cast<BlockAlignment>(cmd_buff[1]);
116 LOG_DEBUG(Service_Y2R, "called alignment=%hhu", conversion.block_alignment);
117 154
155 cmd_buff[0] = IPC::MakeHeader(0x7, 1, 0);
156 cmd_buff[1] = RESULT_SUCCESS.raw;
157
158 LOG_DEBUG(Service_Y2R, "called block_alignment=%hhu", conversion.block_alignment);
159}
160
161static void GetBlockAlignment(Service::Interface* self) {
162 u32* cmd_buff = Kernel::GetCommandBuffer();
163
164 cmd_buff[0] = IPC::MakeHeader(0x8, 2, 0);
165 cmd_buff[1] = RESULT_SUCCESS.raw;
166 cmd_buff[2] = static_cast<u32>(conversion.block_alignment);
167
168 LOG_DEBUG(Service_Y2R, "called block_alignment=%hhu", conversion.block_alignment);
169}
170
171/**
172 * Y2R_U::SetSpacialDithering service function
173 * Inputs:
174 * 1 : u8, 0 = Disabled, 1 = Enabled
175 * Outputs:
176 * 1 : Result of function, 0 on success, otherwise error code
177 */
178static void SetSpacialDithering(Service::Interface* self) {
179 u32* cmd_buff = Kernel::GetCommandBuffer();
180 spacial_dithering_enabled = cmd_buff[1] & 0xF;
181
182 cmd_buff[0] = IPC::MakeHeader(0x9, 1, 0);
183 cmd_buff[1] = RESULT_SUCCESS.raw;
184
185 LOG_WARNING(Service_Y2R, "(STUBBED) called");
186}
187
188/**
189 * Y2R_U::GetSpacialDithering service function
190 * Outputs:
191 * 1 : Result of function, 0 on success, otherwise error code
192 * 2 : u8, 0 = Disabled, 1 = Enabled
193 */
194static void GetSpacialDithering(Service::Interface* self) {
195 u32* cmd_buff = Kernel::GetCommandBuffer();
196
197 cmd_buff[0] = IPC::MakeHeader(0xA, 2, 0);
198 cmd_buff[1] = RESULT_SUCCESS.raw;
199 cmd_buff[2] = spacial_dithering_enabled;
200
201 LOG_WARNING(Service_Y2R, "(STUBBED) called");
202}
203
204/**
205 * Y2R_U::SetTemporalDithering service function
206 * Inputs:
207 * 1 : u8, 0 = Disabled, 1 = Enabled
208 * Outputs:
209 * 1 : Result of function, 0 on success, otherwise error code
210 */
211static void SetTemporalDithering(Service::Interface* self) {
212 u32* cmd_buff = Kernel::GetCommandBuffer();
213 temporal_dithering_enabled = cmd_buff[1] & 0xF;
214
215 cmd_buff[0] = IPC::MakeHeader(0xB, 1, 0);
118 cmd_buff[1] = RESULT_SUCCESS.raw; 216 cmd_buff[1] = RESULT_SUCCESS.raw;
217
218 LOG_WARNING(Service_Y2R, "(STUBBED) called");
119} 219}
120 220
221/**
222 * Y2R_U::GetTemporalDithering service function
223 * Outputs:
224 * 1 : Result of function, 0 on success, otherwise error code
225 * 2 : u8, 0 = Disabled, 1 = Enabled
226 */
227static void GetTemporalDithering(Service::Interface* self) {
228 u32* cmd_buff = Kernel::GetCommandBuffer();
229
230 cmd_buff[0] = IPC::MakeHeader(0xC, 2, 0);
231 cmd_buff[1] = RESULT_SUCCESS.raw;
232 cmd_buff[2] = temporal_dithering_enabled;
233
234 LOG_WARNING(Service_Y2R, "(STUBBED) called");
235}
236
237/**
238 * Y2R_U::SetTransferEndInterrupt service function
239 * Inputs:
240 * 1 : u8, 0 = Disabled, 1 = Enabled
241 * Outputs:
242 * 1 : Result of function, 0 on success, otherwise error code
243 */
121static void SetTransferEndInterrupt(Service::Interface* self) { 244static void SetTransferEndInterrupt(Service::Interface* self) {
122 u32* cmd_buff = Kernel::GetCommandBuffer(); 245 u32* cmd_buff = Kernel::GetCommandBuffer();
246 transfer_end_interrupt_enabled = cmd_buff[1] & 0xf;
123 247
124 cmd_buff[0] = IPC::MakeHeader(0xD, 1, 0); 248 cmd_buff[0] = IPC::MakeHeader(0xD, 1, 0);
125 cmd_buff[1] = RESULT_SUCCESS.raw; 249 cmd_buff[1] = RESULT_SUCCESS.raw;
126 LOG_DEBUG(Service_Y2R, "(STUBBED) called"); 250
251 LOG_WARNING(Service_Y2R, "(STUBBED) called");
252}
253
254/**
255 * Y2R_U::GetTransferEndInterrupt service function
256 * Outputs:
257 * 1 : Result of function, 0 on success, otherwise error code
258 * 2 : u8, 0 = Disabled, 1 = Enabled
259 */
260static void GetTransferEndInterrupt(Service::Interface* self) {
261 u32* cmd_buff = Kernel::GetCommandBuffer();
262
263 cmd_buff[0] = IPC::MakeHeader(0xE, 2, 0);
264 cmd_buff[1] = RESULT_SUCCESS.raw;
265 cmd_buff[2] = transfer_end_interrupt_enabled;
266
267 LOG_WARNING(Service_Y2R, "(STUBBED) called");
127} 268}
128 269
129/** 270/**
@@ -135,8 +276,10 @@ static void SetTransferEndInterrupt(Service::Interface* self) {
135static void GetTransferEndEvent(Service::Interface* self) { 276static void GetTransferEndEvent(Service::Interface* self) {
136 u32* cmd_buff = Kernel::GetCommandBuffer(); 277 u32* cmd_buff = Kernel::GetCommandBuffer();
137 278
279 cmd_buff[0] = IPC::MakeHeader(0xF, 2, 0);
138 cmd_buff[1] = RESULT_SUCCESS.raw; 280 cmd_buff[1] = RESULT_SUCCESS.raw;
139 cmd_buff[3] = Kernel::g_handle_table.Create(completion_event).MoveFrom(); 281 cmd_buff[3] = Kernel::g_handle_table.Create(completion_event).MoveFrom();
282
140 LOG_DEBUG(Service_Y2R, "called"); 283 LOG_DEBUG(Service_Y2R, "called");
141} 284}
142 285
@@ -147,12 +290,12 @@ static void SetSendingY(Service::Interface* self) {
147 conversion.src_Y.image_size = cmd_buff[2]; 290 conversion.src_Y.image_size = cmd_buff[2];
148 conversion.src_Y.transfer_unit = cmd_buff[3]; 291 conversion.src_Y.transfer_unit = cmd_buff[3];
149 conversion.src_Y.gap = cmd_buff[4]; 292 conversion.src_Y.gap = cmd_buff[4];
150 u32 src_process_handle = cmd_buff[6];
151 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
152 "src_process_handle=0x%08X", conversion.src_Y.image_size,
153 conversion.src_Y.transfer_unit, conversion.src_Y.gap, src_process_handle);
154 293
294 cmd_buff[0] = IPC::MakeHeader(0x10, 1, 0);
155 cmd_buff[1] = RESULT_SUCCESS.raw; 295 cmd_buff[1] = RESULT_SUCCESS.raw;
296
297 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, src_process_handle=0x%08X",
298 conversion.src_Y.image_size, conversion.src_Y.transfer_unit, conversion.src_Y.gap, cmd_buff[6]);
156} 299}
157 300
158static void SetSendingU(Service::Interface* self) { 301static void SetSendingU(Service::Interface* self) {
@@ -162,12 +305,12 @@ static void SetSendingU(Service::Interface* self) {
162 conversion.src_U.image_size = cmd_buff[2]; 305 conversion.src_U.image_size = cmd_buff[2];
163 conversion.src_U.transfer_unit = cmd_buff[3]; 306 conversion.src_U.transfer_unit = cmd_buff[3];
164 conversion.src_U.gap = cmd_buff[4]; 307 conversion.src_U.gap = cmd_buff[4];
165 u32 src_process_handle = cmd_buff[6];
166 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
167 "src_process_handle=0x%08X", conversion.src_U.image_size,
168 conversion.src_U.transfer_unit, conversion.src_U.gap, src_process_handle);
169 308
309 cmd_buff[0] = IPC::MakeHeader(0x11, 1, 0);
170 cmd_buff[1] = RESULT_SUCCESS.raw; 310 cmd_buff[1] = RESULT_SUCCESS.raw;
311
312 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, src_process_handle=0x%08X",
313 conversion.src_U.image_size, conversion.src_U.transfer_unit, conversion.src_U.gap, cmd_buff[6]);
171} 314}
172 315
173static void SetSendingV(Service::Interface* self) { 316static void SetSendingV(Service::Interface* self) {
@@ -177,12 +320,12 @@ static void SetSendingV(Service::Interface* self) {
177 conversion.src_V.image_size = cmd_buff[2]; 320 conversion.src_V.image_size = cmd_buff[2];
178 conversion.src_V.transfer_unit = cmd_buff[3]; 321 conversion.src_V.transfer_unit = cmd_buff[3];
179 conversion.src_V.gap = cmd_buff[4]; 322 conversion.src_V.gap = cmd_buff[4];
180 u32 src_process_handle = cmd_buff[6];
181 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
182 "src_process_handle=0x%08X", conversion.src_V.image_size,
183 conversion.src_V.transfer_unit, conversion.src_V.gap, src_process_handle);
184 323
324 cmd_buff[0] = IPC::MakeHeader(0x12, 1, 0);
185 cmd_buff[1] = RESULT_SUCCESS.raw; 325 cmd_buff[1] = RESULT_SUCCESS.raw;
326
327 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, src_process_handle=0x%08X",
328 conversion.src_V.image_size, conversion.src_V.transfer_unit, conversion.src_V.gap, cmd_buff[6]);
186} 329}
187 330
188static void SetSendingYUYV(Service::Interface* self) { 331static void SetSendingYUYV(Service::Interface* self) {
@@ -192,12 +335,76 @@ static void SetSendingYUYV(Service::Interface* self) {
192 conversion.src_YUYV.image_size = cmd_buff[2]; 335 conversion.src_YUYV.image_size = cmd_buff[2];
193 conversion.src_YUYV.transfer_unit = cmd_buff[3]; 336 conversion.src_YUYV.transfer_unit = cmd_buff[3];
194 conversion.src_YUYV.gap = cmd_buff[4]; 337 conversion.src_YUYV.gap = cmd_buff[4];
195 u32 src_process_handle = cmd_buff[6];
196 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
197 "src_process_handle=0x%08X", conversion.src_YUYV.image_size,
198 conversion.src_YUYV.transfer_unit, conversion.src_YUYV.gap, src_process_handle);
199 338
339 cmd_buff[0] = IPC::MakeHeader(0x13, 1, 0);
340 cmd_buff[1] = RESULT_SUCCESS.raw;
341
342 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, src_process_handle=0x%08X",
343 conversion.src_YUYV.image_size, conversion.src_YUYV.transfer_unit, conversion.src_YUYV.gap, cmd_buff[6]);
344}
345
346/**
347 * Y2R::IsFinishedSendingYuv service function
348 * Output:
349 * 1 : Result of the function, 0 on success, otherwise error code
350 * 2 : u8, 0 = Not Finished, 1 = Finished
351 */
352static void IsFinishedSendingYuv(Service::Interface* self) {
353 u32* cmd_buff = Kernel::GetCommandBuffer();
354
355 cmd_buff[0] = IPC::MakeHeader(0x14, 2, 0);
356 cmd_buff[1] = RESULT_SUCCESS.raw;
357 cmd_buff[2] = 1;
358
359 LOG_WARNING(Service_Y2R, "(STUBBED) called");
360}
361
362/**
363 * Y2R::IsFinishedSendingY service function
364 * Output:
365 * 1 : Result of the function, 0 on success, otherwise error code
366 * 2 : u8, 0 = Not Finished, 1 = Finished
367 */
368static void IsFinishedSendingY(Service::Interface* self) {
369 u32* cmd_buff = Kernel::GetCommandBuffer();
370
371 cmd_buff[0] = IPC::MakeHeader(0x15, 2, 0);
200 cmd_buff[1] = RESULT_SUCCESS.raw; 372 cmd_buff[1] = RESULT_SUCCESS.raw;
373 cmd_buff[2] = 1;
374
375 LOG_WARNING(Service_Y2R, "(STUBBED) called");
376}
377
378/**
379 * Y2R::IsFinishedSendingU service function
380 * Output:
381 * 1 : Result of the function, 0 on success, otherwise error code
382 * 2 : u8, 0 = Not Finished, 1 = Finished
383 */
384static void IsFinishedSendingU(Service::Interface* self) {
385 u32* cmd_buff = Kernel::GetCommandBuffer();
386
387 cmd_buff[0] = IPC::MakeHeader(0x16, 2, 0);
388 cmd_buff[1] = RESULT_SUCCESS.raw;
389 cmd_buff[2] = 1;
390
391 LOG_WARNING(Service_Y2R, "(STUBBED) called");
392}
393
394/**
395 * Y2R::IsFinishedSendingV service function
396 * Output:
397 * 1 : Result of the function, 0 on success, otherwise error code
398 * 2 : u8, 0 = Not Finished, 1 = Finished
399 */
400static void IsFinishedSendingV(Service::Interface* self) {
401 u32* cmd_buff = Kernel::GetCommandBuffer();
402
403 cmd_buff[0] = IPC::MakeHeader(0x17, 2, 0);
404 cmd_buff[1] = RESULT_SUCCESS.raw;
405 cmd_buff[2] = 1;
406
407 LOG_WARNING(Service_Y2R, "(STUBBED) called");
201} 408}
202 409
203static void SetReceiving(Service::Interface* self) { 410static void SetReceiving(Service::Interface* self) {
@@ -207,27 +414,66 @@ static void SetReceiving(Service::Interface* self) {
207 conversion.dst.image_size = cmd_buff[2]; 414 conversion.dst.image_size = cmd_buff[2];
208 conversion.dst.transfer_unit = cmd_buff[3]; 415 conversion.dst.transfer_unit = cmd_buff[3];
209 conversion.dst.gap = cmd_buff[4]; 416 conversion.dst.gap = cmd_buff[4];
210 u32 dst_process_handle = cmd_buff[6];
211 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
212 "dst_process_handle=0x%08X", conversion.dst.image_size,
213 conversion.dst.transfer_unit, conversion.dst.gap,
214 dst_process_handle);
215 417
418 cmd_buff[0] = IPC::MakeHeader(0x18, 1, 0);
216 cmd_buff[1] = RESULT_SUCCESS.raw; 419 cmd_buff[1] = RESULT_SUCCESS.raw;
420
421 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, dst_process_handle=0x%08X",
422 conversion.dst.image_size, conversion.dst.transfer_unit, conversion.dst.gap, cmd_buff[6]);
423}
424
425/**
426 * Y2R::IsFinishedReceiving service function
427 * Output:
428 * 1 : Result of the function, 0 on success, otherwise error code
429 * 2 : u8, 0 = Not Finished, 1 = Finished
430 */
431static void IsFinishedReceiving(Service::Interface* self) {
432 u32* cmd_buff = Kernel::GetCommandBuffer();
433
434 cmd_buff[0] = IPC::MakeHeader(0x19, 2, 0);
435 cmd_buff[1] = RESULT_SUCCESS.raw;
436 cmd_buff[2] = 1;
437
438 LOG_WARNING(Service_Y2R, "(STUBBED) called");
217} 439}
218 440
219static void SetInputLineWidth(Service::Interface* self) { 441static void SetInputLineWidth(Service::Interface* self) {
220 u32* cmd_buff = Kernel::GetCommandBuffer(); 442 u32* cmd_buff = Kernel::GetCommandBuffer();
221 443
222 LOG_DEBUG(Service_Y2R, "called input_line_width=%u", cmd_buff[1]); 444 cmd_buff[0] = IPC::MakeHeader(0x1A, 1, 0);
223 cmd_buff[1] = conversion.SetInputLineWidth(cmd_buff[1]).raw; 445 cmd_buff[1] = conversion.SetInputLineWidth(cmd_buff[1]).raw;
446
447 LOG_DEBUG(Service_Y2R, "called input_line_width=%u", cmd_buff[1]);
448}
449
450static void GetInputLineWidth(Service::Interface* self) {
451 u32* cmd_buff = Kernel::GetCommandBuffer();
452
453 cmd_buff[0] = IPC::MakeHeader(0x1B, 2, 0);
454 cmd_buff[1] = RESULT_SUCCESS.raw;
455 cmd_buff[2] = conversion.input_line_width;
456
457 LOG_DEBUG(Service_Y2R, "called input_line_width=%u", conversion.input_line_width);
224} 458}
225 459
226static void SetInputLines(Service::Interface* self) { 460static void SetInputLines(Service::Interface* self) {
227 u32* cmd_buff = Kernel::GetCommandBuffer(); 461 u32* cmd_buff = Kernel::GetCommandBuffer();
228 462
229 LOG_DEBUG(Service_Y2R, "called input_line_number=%u", cmd_buff[1]); 463 cmd_buff[0] = IPC::MakeHeader(0x1C, 1, 0);
230 cmd_buff[1] = conversion.SetInputLines(cmd_buff[1]).raw; 464 cmd_buff[1] = conversion.SetInputLines(cmd_buff[1]).raw;
465
466 LOG_DEBUG(Service_Y2R, "called input_lines=%u", cmd_buff[1]);
467}
468
469static void GetInputLines(Service::Interface* self) {
470 u32* cmd_buff = Kernel::GetCommandBuffer();
471
472 cmd_buff[0] = IPC::MakeHeader(0x1D, 2, 0);
473 cmd_buff[1] = RESULT_SUCCESS.raw;
474 cmd_buff[2] = static_cast<u32>(conversion.input_lines);
475
476 LOG_DEBUG(Service_Y2R, "called input_lines=%u", conversion.input_lines);
231} 477}
232 478
233static void SetCoefficient(Service::Interface* self) { 479static void SetCoefficient(Service::Interface* self) {
@@ -235,45 +481,111 @@ static void SetCoefficient(Service::Interface* self) {
235 481
236 const u16* coefficients = reinterpret_cast<const u16*>(&cmd_buff[1]); 482 const u16* coefficients = reinterpret_cast<const u16*>(&cmd_buff[1]);
237 std::memcpy(conversion.coefficients.data(), coefficients, sizeof(CoefficientSet)); 483 std::memcpy(conversion.coefficients.data(), coefficients, sizeof(CoefficientSet));
484
485 cmd_buff[0] = IPC::MakeHeader(0x1E, 1, 0);
486 cmd_buff[1] = RESULT_SUCCESS.raw;
487
238 LOG_DEBUG(Service_Y2R, "called coefficients=[%hX, %hX, %hX, %hX, %hX, %hX, %hX, %hX]", 488 LOG_DEBUG(Service_Y2R, "called coefficients=[%hX, %hX, %hX, %hX, %hX, %hX, %hX, %hX]",
239 coefficients[0], coefficients[1], coefficients[2], coefficients[3], 489 coefficients[0], coefficients[1], coefficients[2], coefficients[3],
240 coefficients[4], coefficients[5], coefficients[6], coefficients[7]); 490 coefficients[4], coefficients[5], coefficients[6], coefficients[7]);
491}
241 492
493static void GetCoefficient(Service::Interface* self) {
494 u32* cmd_buff = Kernel::GetCommandBuffer();
495
496 cmd_buff[0] = IPC::MakeHeader(0x1F, 5, 0);
242 cmd_buff[1] = RESULT_SUCCESS.raw; 497 cmd_buff[1] = RESULT_SUCCESS.raw;
498 std::memcpy(&cmd_buff[2], conversion.coefficients.data(), sizeof(CoefficientSet));
499
500 LOG_DEBUG(Service_Y2R, "called");
243} 501}
244 502
245static void SetStandardCoefficient(Service::Interface* self) { 503static void SetStandardCoefficient(Service::Interface* self) {
246 u32* cmd_buff = Kernel::GetCommandBuffer(); 504 u32* cmd_buff = Kernel::GetCommandBuffer();
247 505
248 LOG_DEBUG(Service_Y2R, "called standard_coefficient=%u", cmd_buff[1]); 506 u32 index = cmd_buff[1];
507
508 cmd_buff[0] = IPC::MakeHeader(0x20, 1, 0);
509 cmd_buff[1] = conversion.SetStandardCoefficient((StandardCoefficient)index).raw;
510
511 LOG_DEBUG(Service_Y2R, "called standard_coefficient=%u", index);
512}
513
514static void GetStandardCoefficient(Service::Interface* self) {
515 u32* cmd_buff = Kernel::GetCommandBuffer();
516
517 u32 index = cmd_buff[1];
518
519 if (index < ARRAY_SIZE(standard_coefficients)) {
520 cmd_buff[0] = IPC::MakeHeader(0x21, 5, 0);
521 cmd_buff[1] = RESULT_SUCCESS.raw;
522 std::memcpy(&cmd_buff[2], &standard_coefficients[index], sizeof(CoefficientSet));
249 523
250 cmd_buff[1] = conversion.SetStandardCoefficient((StandardCoefficient)cmd_buff[1]).raw; 524 LOG_DEBUG(Service_Y2R, "called standard_coefficient=%u ", index);
525 } else {
526 cmd_buff[0] = IPC::MakeHeader(0x21, 1, 0);
527 cmd_buff[1] = -1; // TODO(bunnei): Identify the correct error code for this
528
529 LOG_ERROR(Service_Y2R, "called standard_coefficient=%u The argument is invalid!", index);
530 }
251} 531}
252 532
253static void SetAlpha(Service::Interface* self) { 533static void SetAlpha(Service::Interface* self) {
254 u32* cmd_buff = Kernel::GetCommandBuffer(); 534 u32* cmd_buff = Kernel::GetCommandBuffer();
255 535
256 conversion.alpha = cmd_buff[1]; 536 conversion.alpha = cmd_buff[1];
537
538 cmd_buff[0] = IPC::MakeHeader(0x22, 1, 0);
539 cmd_buff[1] = RESULT_SUCCESS.raw;
540
257 LOG_DEBUG(Service_Y2R, "called alpha=%hu", conversion.alpha); 541 LOG_DEBUG(Service_Y2R, "called alpha=%hu", conversion.alpha);
542}
543
544static void GetAlpha(Service::Interface* self) {
545 u32* cmd_buff = Kernel::GetCommandBuffer();
258 546
547 cmd_buff[0] = IPC::MakeHeader(0x23, 2, 0);
259 cmd_buff[1] = RESULT_SUCCESS.raw; 548 cmd_buff[1] = RESULT_SUCCESS.raw;
549 cmd_buff[2] = conversion.alpha;
550
551 LOG_DEBUG(Service_Y2R, "called alpha=%hu", conversion.alpha);
260} 552}
261 553
262static void StartConversion(Service::Interface* self) { 554static void SetDitheringWeightParams(Service::Interface* self) {
263 u32* cmd_buff = Kernel::GetCommandBuffer(); 555 u32* cmd_buff = Kernel::GetCommandBuffer();
556 std::memcpy(&dithering_weight_params, &cmd_buff[1], sizeof(DitheringWeightParams));
264 557
265 HW::Y2R::PerformConversion(conversion); 558 cmd_buff[0] = IPC::MakeHeader(0x24, 1, 0);
559 cmd_buff[1] = RESULT_SUCCESS.raw;
266 560
267 // dst_image_size would seem to be perfect for this, but it doesn't include the gap :( 561 LOG_DEBUG(Service_Y2R, "called");
268 u32 total_output_size = conversion.input_lines * 562}
269 (conversion.dst.transfer_unit + conversion.dst.gap); 563
270 VideoCore::g_renderer->Rasterizer()->InvalidateRegion( 564static void GetDitheringWeightParams(Service::Interface* self) {
271 Memory::VirtualToPhysicalAddress(conversion.dst.address), total_output_size); 565 u32* cmd_buff = Kernel::GetCommandBuffer();
566
567 cmd_buff[0] = IPC::MakeHeader(0x25, 9, 0);
568 cmd_buff[1] = RESULT_SUCCESS.raw;
569 std::memcpy(&cmd_buff[2], &dithering_weight_params, sizeof(DitheringWeightParams));
272 570
273 LOG_DEBUG(Service_Y2R, "called"); 571 LOG_DEBUG(Service_Y2R, "called");
572}
573
574static void StartConversion(Service::Interface* self) {
575 u32* cmd_buff = Kernel::GetCommandBuffer();
576
577 // dst_image_size would seem to be perfect for this, but it doesn't include the gap :(
578 u32 total_output_size = conversion.input_lines * (conversion.dst.transfer_unit + conversion.dst.gap);
579 Memory::RasterizerFlushAndInvalidateRegion(Memory::VirtualToPhysicalAddress(conversion.dst.address), total_output_size);
580
581 HW::Y2R::PerformConversion(conversion);
582
274 completion_event->Signal(); 583 completion_event->Signal();
275 584
585 cmd_buff[0] = IPC::MakeHeader(0x26, 1, 0);
276 cmd_buff[1] = RESULT_SUCCESS.raw; 586 cmd_buff[1] = RESULT_SUCCESS.raw;
587
588 LOG_DEBUG(Service_Y2R, "called");
277} 589}
278 590
279static void StopConversion(Service::Interface* self) { 591static void StopConversion(Service::Interface* self) {
@@ -281,6 +593,7 @@ static void StopConversion(Service::Interface* self) {
281 593
282 cmd_buff[0] = IPC::MakeHeader(0x27, 1, 0); 594 cmd_buff[0] = IPC::MakeHeader(0x27, 1, 0);
283 cmd_buff[1] = RESULT_SUCCESS.raw; 595 cmd_buff[1] = RESULT_SUCCESS.raw;
596
284 LOG_DEBUG(Service_Y2R, "called"); 597 LOG_DEBUG(Service_Y2R, "called");
285} 598}
286 599
@@ -293,50 +606,61 @@ static void StopConversion(Service::Interface* self) {
293static void IsBusyConversion(Service::Interface* self) { 606static void IsBusyConversion(Service::Interface* self) {
294 u32* cmd_buff = Kernel::GetCommandBuffer(); 607 u32* cmd_buff = Kernel::GetCommandBuffer();
295 608
609 cmd_buff[0] = IPC::MakeHeader(0x28, 2, 0);
296 cmd_buff[1] = RESULT_SUCCESS.raw; 610 cmd_buff[1] = RESULT_SUCCESS.raw;
297 cmd_buff[2] = 0; // StartConversion always finishes immediately 611 cmd_buff[2] = 0; // StartConversion always finishes immediately
612
298 LOG_DEBUG(Service_Y2R, "called"); 613 LOG_DEBUG(Service_Y2R, "called");
299} 614}
300 615
301/** 616/**
302 * Y2R_U::SetConversionParams service function 617 * Y2R_U::SetPackageParameter service function
303 */ 618 */
304static void SetConversionParams(Service::Interface* self) { 619static void SetPackageParameter(Service::Interface* self) {
305 u32* cmd_buff = Kernel::GetCommandBuffer(); 620 u32* cmd_buff = Kernel::GetCommandBuffer();
306 621
307 auto params = reinterpret_cast<const ConversionParameters*>(&cmd_buff[1]); 622 auto params = reinterpret_cast<const ConversionParameters*>(&cmd_buff[1]);
308 LOG_DEBUG(Service_Y2R,
309 "called input_format=%hhu output_format=%hhu rotation=%hhu block_alignment=%hhu "
310 "input_line_width=%hu input_lines=%hu standard_coefficient=%hhu "
311 "reserved=%hhu alpha=%hX",
312 params->input_format, params->output_format, params->rotation, params->block_alignment,
313 params->input_line_width, params->input_lines, params->standard_coefficient,
314 params->reserved, params->alpha);
315
316 ResultCode result = RESULT_SUCCESS;
317 623
318 conversion.input_format = params->input_format; 624 conversion.input_format = params->input_format;
319 conversion.output_format = params->output_format; 625 conversion.output_format = params->output_format;
320 conversion.rotation = params->rotation; 626 conversion.rotation = params->rotation;
321 conversion.block_alignment = params->block_alignment; 627 conversion.block_alignment = params->block_alignment;
322 result = conversion.SetInputLineWidth(params->input_line_width); 628
323 if (result.IsError()) goto cleanup; 629 ResultCode result = conversion.SetInputLineWidth(params->input_line_width);
630
631 if (result.IsError())
632 goto cleanup;
633
324 result = conversion.SetInputLines(params->input_lines); 634 result = conversion.SetInputLines(params->input_lines);
325 if (result.IsError()) goto cleanup; 635
636 if (result.IsError())
637 goto cleanup;
638
326 result = conversion.SetStandardCoefficient(params->standard_coefficient); 639 result = conversion.SetStandardCoefficient(params->standard_coefficient);
327 if (result.IsError()) goto cleanup; 640
641 if (result.IsError())
642 goto cleanup;
643
644 conversion.padding = params->padding;
328 conversion.alpha = params->alpha; 645 conversion.alpha = params->alpha;
329 646
330cleanup: 647cleanup:
331 cmd_buff[0] = IPC::MakeHeader(0x29, 1, 0); 648 cmd_buff[0] = IPC::MakeHeader(0x29, 1, 0);
332 cmd_buff[1] = result.raw; 649 cmd_buff[1] = result.raw;
650
651 LOG_DEBUG(Service_Y2R, "called input_format=%hhu output_format=%hhu rotation=%hhu block_alignment=%hhu "
652 "input_line_width=%hu input_lines=%hu standard_coefficient=%hhu reserved=%hhu alpha=%hX",
653 params->input_format, params->output_format, params->rotation, params->block_alignment,
654 params->input_line_width, params->input_lines, params->standard_coefficient, params->padding, params->alpha);
333} 655}
334 656
335static void PingProcess(Service::Interface* self) { 657static void PingProcess(Service::Interface* self) {
336 u32* cmd_buff = Kernel::GetCommandBuffer(); 658 u32* cmd_buff = Kernel::GetCommandBuffer();
337 659
660 cmd_buff[0] = IPC::MakeHeader(0x2A, 2, 0);
338 cmd_buff[1] = RESULT_SUCCESS.raw; 661 cmd_buff[1] = RESULT_SUCCESS.raw;
339 cmd_buff[2] = 0; 662 cmd_buff[2] = 0;
663
340 LOG_WARNING(Service_Y2R, "(STUBBED) called"); 664 LOG_WARNING(Service_Y2R, "(STUBBED) called");
341} 665}
342 666
@@ -362,6 +686,7 @@ static void DriverInitialize(Service::Interface* self) {
362 686
363 cmd_buff[0] = IPC::MakeHeader(0x2B, 1, 0); 687 cmd_buff[0] = IPC::MakeHeader(0x2B, 1, 0);
364 cmd_buff[1] = RESULT_SUCCESS.raw; 688 cmd_buff[1] = RESULT_SUCCESS.raw;
689
365 LOG_DEBUG(Service_Y2R, "called"); 690 LOG_DEBUG(Service_Y2R, "called");
366} 691}
367 692
@@ -370,54 +695,67 @@ static void DriverFinalize(Service::Interface* self) {
370 695
371 cmd_buff[0] = IPC::MakeHeader(0x2C, 1, 0); 696 cmd_buff[0] = IPC::MakeHeader(0x2C, 1, 0);
372 cmd_buff[1] = RESULT_SUCCESS.raw; 697 cmd_buff[1] = RESULT_SUCCESS.raw;
698
699 LOG_DEBUG(Service_Y2R, "called");
700}
701
702
703static void GetPackageParameter(Service::Interface* self) {
704 u32* cmd_buff = Kernel::GetCommandBuffer();
705
706 cmd_buff[0] = IPC::MakeHeader(0x2D, 4, 0);
707 cmd_buff[1] = RESULT_SUCCESS.raw;
708 std::memcpy(&cmd_buff[2], &conversion, sizeof(ConversionParameters));
709
373 LOG_DEBUG(Service_Y2R, "called"); 710 LOG_DEBUG(Service_Y2R, "called");
374} 711}
375 712
376const Interface::FunctionInfo FunctionTable[] = { 713const Interface::FunctionInfo FunctionTable[] = {
377 {0x00010040, SetInputFormat, "SetInputFormat"}, 714 {0x00010040, SetInputFormat, "SetInputFormat"},
378 {0x00020000, nullptr, "GetInputFormat"}, 715 {0x00020000, GetInputFormat, "GetInputFormat"},
379 {0x00030040, SetOutputFormat, "SetOutputFormat"}, 716 {0x00030040, SetOutputFormat, "SetOutputFormat"},
380 {0x00040000, nullptr, "GetOutputFormat"}, 717 {0x00040000, GetOutputFormat, "GetOutputFormat"},
381 {0x00050040, SetRotation, "SetRotation"}, 718 {0x00050040, SetRotation, "SetRotation"},
382 {0x00060000, nullptr, "GetRotation"}, 719 {0x00060000, GetRotation, "GetRotation"},
383 {0x00070040, SetBlockAlignment, "SetBlockAlignment"}, 720 {0x00070040, SetBlockAlignment, "SetBlockAlignment"},
384 {0x00080000, nullptr, "GetBlockAlignment"}, 721 {0x00080000, GetBlockAlignment, "GetBlockAlignment"},
385 {0x00090040, nullptr, "SetSpacialDithering"}, 722 {0x00090040, SetSpacialDithering, "SetSpacialDithering"},
386 {0x000A0000, nullptr, "GetSpacialDithering"}, 723 {0x000A0000, GetSpacialDithering, "GetSpacialDithering"},
387 {0x000B0040, nullptr, "SetTemporalDithering"}, 724 {0x000B0040, SetTemporalDithering, "SetTemporalDithering"},
388 {0x000C0000, nullptr, "GetTemporalDithering"}, 725 {0x000C0000, GetTemporalDithering, "GetTemporalDithering"},
389 {0x000D0040, SetTransferEndInterrupt, "SetTransferEndInterrupt"}, 726 {0x000D0040, SetTransferEndInterrupt, "SetTransferEndInterrupt"},
727 {0x000E0000, GetTransferEndInterrupt, "GetTransferEndInterrupt"},
390 {0x000F0000, GetTransferEndEvent, "GetTransferEndEvent"}, 728 {0x000F0000, GetTransferEndEvent, "GetTransferEndEvent"},
391 {0x00100102, SetSendingY, "SetSendingY"}, 729 {0x00100102, SetSendingY, "SetSendingY"},
392 {0x00110102, SetSendingU, "SetSendingU"}, 730 {0x00110102, SetSendingU, "SetSendingU"},
393 {0x00120102, SetSendingV, "SetSendingV"}, 731 {0x00120102, SetSendingV, "SetSendingV"},
394 {0x00130102, SetSendingYUYV, "SetSendingYUYV"}, 732 {0x00130102, SetSendingYUYV, "SetSendingYUYV"},
395 {0x00140000, nullptr, "IsFinishedSendingYuv"}, 733 {0x00140000, IsFinishedSendingYuv, "IsFinishedSendingYuv"},
396 {0x00150000, nullptr, "IsFinishedSendingY"}, 734 {0x00150000, IsFinishedSendingY, "IsFinishedSendingY"},
397 {0x00160000, nullptr, "IsFinishedSendingU"}, 735 {0x00160000, IsFinishedSendingU, "IsFinishedSendingU"},
398 {0x00170000, nullptr, "IsFinishedSendingV"}, 736 {0x00170000, IsFinishedSendingV, "IsFinishedSendingV"},
399 {0x00180102, SetReceiving, "SetReceiving"}, 737 {0x00180102, SetReceiving, "SetReceiving"},
400 {0x00190000, nullptr, "IsFinishedReceiving"}, 738 {0x00190000, IsFinishedReceiving, "IsFinishedReceiving"},
401 {0x001A0040, SetInputLineWidth, "SetInputLineWidth"}, 739 {0x001A0040, SetInputLineWidth, "SetInputLineWidth"},
402 {0x001B0000, nullptr, "GetInputLineWidth"}, 740 {0x001B0000, GetInputLineWidth, "GetInputLineWidth"},
403 {0x001C0040, SetInputLines, "SetInputLines"}, 741 {0x001C0040, SetInputLines, "SetInputLines"},
404 {0x001D0000, nullptr, "GetInputLines"}, 742 {0x001D0000, GetInputLines, "GetInputLines"},
405 {0x001E0100, SetCoefficient, "SetCoefficient"}, 743 {0x001E0100, SetCoefficient, "SetCoefficient"},
406 {0x001F0000, nullptr, "GetCoefficient"}, 744 {0x001F0000, GetCoefficient, "GetCoefficient"},
407 {0x00200040, SetStandardCoefficient, "SetStandardCoefficient"}, 745 {0x00200040, SetStandardCoefficient, "SetStandardCoefficient"},
408 {0x00210040, nullptr, "GetStandardCoefficientParams"}, 746 {0x00210040, GetStandardCoefficient, "GetStandardCoefficient"},
409 {0x00220040, SetAlpha, "SetAlpha"}, 747 {0x00220040, SetAlpha, "SetAlpha"},
410 {0x00230000, nullptr, "GetAlpha"}, 748 {0x00230000, GetAlpha, "GetAlpha"},
411 {0x00240200, nullptr, "SetDitheringWeightParams"}, 749 {0x00240200, SetDitheringWeightParams,"SetDitheringWeightParams"},
412 {0x00250000, nullptr, "GetDitheringWeightParams"}, 750 {0x00250000, GetDitheringWeightParams,"GetDitheringWeightParams"},
413 {0x00260000, StartConversion, "StartConversion"}, 751 {0x00260000, StartConversion, "StartConversion"},
414 {0x00270000, StopConversion, "StopConversion"}, 752 {0x00270000, StopConversion, "StopConversion"},
415 {0x00280000, IsBusyConversion, "IsBusyConversion"}, 753 {0x00280000, IsBusyConversion, "IsBusyConversion"},
416 {0x002901C0, SetConversionParams, "SetConversionParams"}, 754 {0x002901C0, SetPackageParameter, "SetPackageParameter"},
417 {0x002A0000, PingProcess, "PingProcess"}, 755 {0x002A0000, PingProcess, "PingProcess"},
418 {0x002B0000, DriverInitialize, "DriverInitialize"}, 756 {0x002B0000, DriverInitialize, "DriverInitialize"},
419 {0x002C0000, DriverFinalize, "DriverFinalize"}, 757 {0x002C0000, DriverFinalize, "DriverFinalize"},
420 {0x002D0000, nullptr, "GetPackageParameter"}, 758 {0x002D0000, GetPackageParameter, "GetPackageParameter"},
421}; 759};
422 760
423//////////////////////////////////////////////////////////////////////////////////////////////////// 761////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/core/hle/service/y2r_u.h b/src/core/hle/service/y2r_u.h
index 3965a5545..95fa2fdb7 100644
--- a/src/core/hle/service/y2r_u.h
+++ b/src/core/hle/service/y2r_u.h
@@ -97,6 +97,7 @@ struct ConversionConfiguration {
97 u16 input_line_width; 97 u16 input_line_width;
98 u16 input_lines; 98 u16 input_lines;
99 CoefficientSet coefficients; 99 CoefficientSet coefficients;
100 u8 padding;
100 u16 alpha; 101 u16 alpha;
101 102
102 /// Input parameters for the Y (luma) plane 103 /// Input parameters for the Y (luma) plane
@@ -109,6 +110,25 @@ struct ConversionConfiguration {
109 ResultCode SetStandardCoefficient(StandardCoefficient standard_coefficient); 110 ResultCode SetStandardCoefficient(StandardCoefficient standard_coefficient);
110}; 111};
111 112
113struct DitheringWeightParams {
114 u16 w0_xEven_yEven;
115 u16 w0_xOdd_yEven;
116 u16 w0_xEven_yOdd;
117 u16 w0_xOdd_yOdd;
118 u16 w1_xEven_yEven;
119 u16 w1_xOdd_yEven;
120 u16 w1_xEven_yOdd;
121 u16 w1_xOdd_yOdd;
122 u16 w2_xEven_yEven;
123 u16 w2_xOdd_yEven;
124 u16 w2_xEven_yOdd;
125 u16 w2_xOdd_yOdd;
126 u16 w3_xEven_yEven;
127 u16 w3_xOdd_yEven;
128 u16 w3_xEven_yOdd;
129 u16 w3_xOdd_yOdd;
130};
131
112class Interface : public Service::Interface { 132class Interface : public Service::Interface {
113public: 133public:
114 Interface(); 134 Interface();
diff --git a/src/core/hle/shared_page.cpp b/src/core/hle/shared_page.cpp
index 50c5bc01b..2a1caeaac 100644
--- a/src/core/hle/shared_page.cpp
+++ b/src/core/hle/shared_page.cpp
@@ -16,6 +16,9 @@ void Init() {
16 std::memset(&shared_page, 0, sizeof(shared_page)); 16 std::memset(&shared_page, 0, sizeof(shared_page));
17 17
18 shared_page.running_hw = 0x1; // product 18 shared_page.running_hw = 0x1; // product
19
20 // Some games wait until this value becomes 0x1, before asking running_hw
21 shared_page.unknown_value = 0x1;
19} 22}
20 23
21} // namespace 24} // namespace
diff --git a/src/core/hle/shared_page.h b/src/core/hle/shared_page.h
index 379bb7b63..35a07c685 100644
--- a/src/core/hle/shared_page.h
+++ b/src/core/hle/shared_page.h
@@ -39,12 +39,14 @@ struct SharedPageDef {
39 DateTime date_time_0; // 20 39 DateTime date_time_0; // 20
40 DateTime date_time_1; // 40 40 DateTime date_time_1; // 40
41 u8 wifi_macaddr[6]; // 60 41 u8 wifi_macaddr[6]; // 60
42 u8 wifi_unknown1; // 66 42 u8 wifi_link_level; // 66
43 u8 wifi_unknown2; // 67 43 u8 wifi_unknown2; // 67
44 INSERT_PADDING_BYTES(0x80 - 0x68); // 68 44 INSERT_PADDING_BYTES(0x80 - 0x68); // 68
45 float_le sliderstate_3d; // 80 45 float_le sliderstate_3d; // 80
46 u8 ledstate_3d; // 84 46 u8 ledstate_3d; // 84
47 INSERT_PADDING_BYTES(0xA0 - 0x85); // 85 47 INSERT_PADDING_BYTES(1); // 85
48 u8 unknown_value; // 86
49 INSERT_PADDING_BYTES(0xA0 - 0x87); // 87
48 u64_le menu_title_id; // A0 50 u64_le menu_title_id; // A0
49 u64_le active_menu_title_id; // A8 51 u64_le active_menu_title_id; // A8
50 INSERT_PADDING_BYTES(0x1000 - 0xB0); // B0 52 INSERT_PADDING_BYTES(0x1000 - 0xB0); // B0
diff --git a/src/core/hle/svc.cpp b/src/core/hle/svc.cpp
index ae54afb1c..0ce72de87 100644
--- a/src/core/hle/svc.cpp
+++ b/src/core/hle/svc.cpp
@@ -6,7 +6,7 @@
6 6
7#include "common/logging/log.h" 7#include "common/logging/log.h"
8#include "common/microprofile.h" 8#include "common/microprofile.h"
9#include "common/profiler.h" 9#include "common/scope_exit.h"
10#include "common/string_util.h" 10#include "common/string_util.h"
11#include "common/symbols.h" 11#include "common/symbols.h"
12 12
@@ -100,6 +100,7 @@ static ResultCode ControlMemory(u32* out_addr, u32 operation, u32 addr0, u32 add
100 switch (operation & MEMOP_OPERATION_MASK) { 100 switch (operation & MEMOP_OPERATION_MASK) {
101 case MEMOP_FREE: 101 case MEMOP_FREE:
102 { 102 {
103 // TODO(Subv): What happens if an application tries to FREE a block of memory that has a SharedMemory pointing to it?
103 if (addr0 >= Memory::HEAP_VADDR && addr0 < Memory::HEAP_VADDR_END) { 104 if (addr0 >= Memory::HEAP_VADDR && addr0 < Memory::HEAP_VADDR_END) {
104 ResultCode result = process.HeapFree(addr0, size); 105 ResultCode result = process.HeapFree(addr0, size);
105 if (result.IsError()) return result; 106 if (result.IsError()) return result;
@@ -161,8 +162,6 @@ static ResultCode MapMemoryBlock(Handle handle, u32 addr, u32 permissions, u32 o
161 LOG_TRACE(Kernel_SVC, "called memblock=0x%08X, addr=0x%08X, mypermissions=0x%08X, otherpermission=%d", 162 LOG_TRACE(Kernel_SVC, "called memblock=0x%08X, addr=0x%08X, mypermissions=0x%08X, otherpermission=%d",
162 handle, addr, permissions, other_permissions); 163 handle, addr, permissions, other_permissions);
163 164
164 // TODO(Subv): The same process that created a SharedMemory object can not map it in its own address space
165
166 SharedPtr<SharedMemory> shared_memory = Kernel::g_handle_table.Get<SharedMemory>(handle); 165 SharedPtr<SharedMemory> shared_memory = Kernel::g_handle_table.Get<SharedMemory>(handle);
167 if (shared_memory == nullptr) 166 if (shared_memory == nullptr)
168 return ERR_INVALID_HANDLE; 167 return ERR_INVALID_HANDLE;
@@ -177,7 +176,7 @@ static ResultCode MapMemoryBlock(Handle handle, u32 addr, u32 permissions, u32 o
177 case MemoryPermission::WriteExecute: 176 case MemoryPermission::WriteExecute:
178 case MemoryPermission::ReadWriteExecute: 177 case MemoryPermission::ReadWriteExecute:
179 case MemoryPermission::DontCare: 178 case MemoryPermission::DontCare:
180 return shared_memory->Map(addr, permissions_type, 179 return shared_memory->Map(Kernel::g_current_process.get(), addr, permissions_type,
181 static_cast<MemoryPermission>(other_permissions)); 180 static_cast<MemoryPermission>(other_permissions));
182 default: 181 default:
183 LOG_ERROR(Kernel_SVC, "unknown permissions=0x%08X", permissions); 182 LOG_ERROR(Kernel_SVC, "unknown permissions=0x%08X", permissions);
@@ -197,7 +196,7 @@ static ResultCode UnmapMemoryBlock(Handle handle, u32 addr) {
197 if (shared_memory == nullptr) 196 if (shared_memory == nullptr)
198 return ERR_INVALID_HANDLE; 197 return ERR_INVALID_HANDLE;
199 198
200 return shared_memory->Unmap(addr); 199 return shared_memory->Unmap(Kernel::g_current_process.get(), addr);
201} 200}
202 201
203/// Connect to an OS service given the port name, returns the handle to the port to out 202/// Connect to an OS service given the port name, returns the handle to the port to out
@@ -328,9 +327,9 @@ static ResultCode WaitSynchronizationN(s32* out, Handle* handles, s32 handle_cou
328 } 327 }
329 } 328 }
330 329
331 HLE::Reschedule(__func__); 330 SCOPE_EXIT({HLE::Reschedule("WaitSynchronizationN");}); // Reschedule after putting the threads to sleep.
332 331
333 // If thread should wait, then set its state to waiting and then reschedule... 332 // If thread should wait, then set its state to waiting
334 if (wait_thread) { 333 if (wait_thread) {
335 334
336 // Actually wait the current thread on each object if we decided to wait... 335 // Actually wait the current thread on each object if we decided to wait...
@@ -497,8 +496,16 @@ static ResultCode CreateThread(Handle* out_handle, s32 priority, u32 entry_point
497 break; 496 break;
498 } 497 }
499 498
499 if (processor_id == THREADPROCESSORID_1 || processor_id == THREADPROCESSORID_ALL ||
500 (processor_id == THREADPROCESSORID_DEFAULT && Kernel::g_current_process->ideal_processor == THREADPROCESSORID_1)) {
501 LOG_WARNING(Kernel_SVC, "Newly created thread is allowed to be run in the SysCore, unimplemented.");
502 }
503
500 CASCADE_RESULT(SharedPtr<Thread> thread, Kernel::Thread::Create( 504 CASCADE_RESULT(SharedPtr<Thread> thread, Kernel::Thread::Create(
501 name, entry_point, priority, arg, processor_id, stack_top)); 505 name, entry_point, priority, arg, processor_id, stack_top));
506
507 thread->context.fpscr = FPSCR_DEFAULT_NAN | FPSCR_FLUSH_TO_ZERO | FPSCR_ROUND_TOZERO; // 0x03C00000
508
502 CASCADE_RESULT(*out_handle, Kernel::g_handle_table.Create(std::move(thread))); 509 CASCADE_RESULT(*out_handle, Kernel::g_handle_table.Create(std::move(thread)));
503 510
504 LOG_TRACE(Kernel_SVC, "called entrypoint=0x%08X (%s), arg=0x%08X, stacktop=0x%08X, " 511 LOG_TRACE(Kernel_SVC, "called entrypoint=0x%08X (%s), arg=0x%08X, stacktop=0x%08X, "
@@ -786,18 +793,44 @@ static ResultCode CreateMemoryBlock(Handle* out_handle, u32 addr, u32 size, u32
786 if (size % Memory::PAGE_SIZE != 0) 793 if (size % Memory::PAGE_SIZE != 0)
787 return ResultCode(ErrorDescription::MisalignedSize, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage); 794 return ResultCode(ErrorDescription::MisalignedSize, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage);
788 795
789 // TODO(Subv): Return E0A01BF5 if the address is not in the application's heap 796 SharedPtr<SharedMemory> shared_memory = nullptr;
790
791 // TODO(Subv): Implement this function properly
792 797
793 using Kernel::MemoryPermission; 798 using Kernel::MemoryPermission;
794 SharedPtr<SharedMemory> shared_memory = SharedMemory::Create(size, 799 auto VerifyPermissions = [](MemoryPermission permission) {
795 (MemoryPermission)my_permission, (MemoryPermission)other_permission); 800 // SharedMemory blocks can not be created with Execute permissions
796 // Map the SharedMemory to the specified address 801 switch (permission) {
797 shared_memory->base_address = addr; 802 case MemoryPermission::None:
803 case MemoryPermission::Read:
804 case MemoryPermission::Write:
805 case MemoryPermission::ReadWrite:
806 case MemoryPermission::DontCare:
807 return true;
808 default:
809 return false;
810 }
811 };
812
813 if (!VerifyPermissions(static_cast<MemoryPermission>(my_permission)) ||
814 !VerifyPermissions(static_cast<MemoryPermission>(other_permission)))
815 return ResultCode(ErrorDescription::InvalidCombination, ErrorModule::OS,
816 ErrorSummary::InvalidArgument, ErrorLevel::Usage);
817
818 if (addr < Memory::PROCESS_IMAGE_VADDR || addr + size > Memory::SHARED_MEMORY_VADDR_END) {
819 return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage);
820 }
821
822 // When trying to create a memory block with address = 0,
823 // if the process has the Shared Device Memory flag in the exheader,
824 // then we have to allocate from the same region as the caller process instead of the BASE region.
825 Kernel::MemoryRegion region = Kernel::MemoryRegion::BASE;
826 if (addr == 0 && Kernel::g_current_process->flags.shared_device_mem)
827 region = Kernel::g_current_process->flags.memory_region;
828
829 shared_memory = SharedMemory::Create(Kernel::g_current_process, size,
830 static_cast<MemoryPermission>(my_permission), static_cast<MemoryPermission>(other_permission), addr, region);
798 CASCADE_RESULT(*out_handle, Kernel::g_handle_table.Create(std::move(shared_memory))); 831 CASCADE_RESULT(*out_handle, Kernel::g_handle_table.Create(std::move(shared_memory)));
799 832
800 LOG_WARNING(Kernel_SVC, "(STUBBED) called addr=0x%08X", addr); 833 LOG_WARNING(Kernel_SVC, "called addr=0x%08X", addr);
801 return RESULT_SUCCESS; 834 return RESULT_SUCCESS;
802} 835}
803 836
@@ -860,6 +893,10 @@ static ResultCode GetProcessInfo(s64* out, Handle process_handle, u32 type) {
860 // TODO(yuriks): Type 0 returns a slightly higher number than type 2, but I'm not sure 893 // TODO(yuriks): Type 0 returns a slightly higher number than type 2, but I'm not sure
861 // what's the difference between them. 894 // what's the difference between them.
862 *out = process->heap_used + process->linear_heap_used + process->misc_memory_used; 895 *out = process->heap_used + process->linear_heap_used + process->misc_memory_used;
896 if(*out % Memory::PAGE_SIZE != 0) {
897 LOG_ERROR(Kernel_SVC, "called, memory size not page-aligned");
898 return ERR_MISALIGNED_SIZE;
899 }
863 break; 900 break;
864 case 1: 901 case 1:
865 case 3: 902 case 3:
@@ -1031,8 +1068,6 @@ static const FunctionDef SVC_Table[] = {
1031 {0x7D, HLE::Wrap<QueryProcessMemory>, "QueryProcessMemory"}, 1068 {0x7D, HLE::Wrap<QueryProcessMemory>, "QueryProcessMemory"},
1032}; 1069};
1033 1070
1034Common::Profiling::TimingCategory profiler_svc("SVC Calls");
1035
1036static const FunctionDef* GetSVCInfo(u32 func_num) { 1071static const FunctionDef* GetSVCInfo(u32 func_num) {
1037 if (func_num >= ARRAY_SIZE(SVC_Table)) { 1072 if (func_num >= ARRAY_SIZE(SVC_Table)) {
1038 LOG_ERROR(Kernel_SVC, "unknown svc=0x%02X", func_num); 1073 LOG_ERROR(Kernel_SVC, "unknown svc=0x%02X", func_num);
@@ -1044,7 +1079,6 @@ static const FunctionDef* GetSVCInfo(u32 func_num) {
1044MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70)); 1079MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70));
1045 1080
1046void CallSVC(u32 immediate) { 1081void CallSVC(u32 immediate) {
1047 Common::Profiling::ScopeTimer timer_svc(profiler_svc);
1048 MICROPROFILE_SCOPE(Kernel_SVC); 1082 MICROPROFILE_SCOPE(Kernel_SVC);
1049 1083
1050 const FunctionDef* info = GetSVCInfo(immediate); 1084 const FunctionDef* info = GetSVCInfo(immediate);
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index 7e2f9cdfa..a4dfb7e43 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -115,21 +115,39 @@ inline void Write(u32 addr, const T data) {
115 u8* start = Memory::GetPhysicalPointer(config.GetStartAddress()); 115 u8* start = Memory::GetPhysicalPointer(config.GetStartAddress());
116 u8* end = Memory::GetPhysicalPointer(config.GetEndAddress()); 116 u8* end = Memory::GetPhysicalPointer(config.GetEndAddress());
117 117
118 if (config.fill_24bit) { 118 // TODO: Consider always accelerating and returning vector of
119 // fill with 24-bit values 119 // regions that the accelerated fill did not cover to
120 for (u8* ptr = start; ptr < end; ptr += 3) { 120 // reduce/eliminate the fill that the cpu has to do.
121 ptr[0] = config.value_24bit_r; 121 // This would also mean that the flush below is not needed.
122 ptr[1] = config.value_24bit_g; 122 // Fill should first flush all surfaces that touch but are
123 ptr[2] = config.value_24bit_b; 123 // not completely within the fill range.
124 // Then fill all completely covered surfaces, and return the
125 // regions that were between surfaces or within the touching
126 // ones for cpu to manually fill here.
127 if (!VideoCore::g_renderer->Rasterizer()->AccelerateFill(config)) {
128 Memory::RasterizerFlushAndInvalidateRegion(config.GetStartAddress(), config.GetEndAddress() - config.GetStartAddress());
129
130 if (config.fill_24bit) {
131 // fill with 24-bit values
132 for (u8* ptr = start; ptr < end; ptr += 3) {
133 ptr[0] = config.value_24bit_r;
134 ptr[1] = config.value_24bit_g;
135 ptr[2] = config.value_24bit_b;
136 }
137 } else if (config.fill_32bit) {
138 // fill with 32-bit values
139 if (end > start) {
140 u32 value = config.value_32bit;
141 size_t len = (end - start) / sizeof(u32);
142 for (size_t i = 0; i < len; ++i)
143 memcpy(&start[i * sizeof(u32)], &value, sizeof(u32));
144 }
145 } else {
146 // fill with 16-bit values
147 u16 value_16bit = config.value_16bit.Value();
148 for (u8* ptr = start; ptr < end; ptr += sizeof(u16))
149 memcpy(ptr, &value_16bit, sizeof(u16));
124 } 150 }
125 } else if (config.fill_32bit) {
126 // fill with 32-bit values
127 for (u32* ptr = (u32*)start; ptr < (u32*)end; ++ptr)
128 *ptr = config.value_32bit;
129 } else {
130 // fill with 16-bit values
131 for (u16* ptr = (u16*)start; ptr < (u16*)end; ++ptr)
132 *ptr = config.value_16bit;
133 } 151 }
134 152
135 LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), config.GetEndAddress()); 153 LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), config.GetEndAddress());
@@ -139,8 +157,6 @@ inline void Write(u32 addr, const T data) {
139 } else { 157 } else {
140 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC1); 158 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC1);
141 } 159 }
142
143 VideoCore::g_renderer->Rasterizer()->InvalidateRegion(config.GetStartAddress(), config.GetEndAddress() - config.GetStartAddress());
144 } 160 }
145 161
146 // Reset "trigger" flag and set the "finish" flag 162 // Reset "trigger" flag and set the "finish" flag
@@ -161,184 +177,185 @@ inline void Write(u32 addr, const T data) {
161 if (Pica::g_debug_context) 177 if (Pica::g_debug_context)
162 Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer, nullptr); 178 Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer, nullptr);
163 179
164 u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress()); 180 if (!VideoCore::g_renderer->Rasterizer()->AccelerateDisplayTransfer(config)) {
165 u8* dst_pointer = Memory::GetPhysicalPointer(config.GetPhysicalOutputAddress()); 181 u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress());
166 182 u8* dst_pointer = Memory::GetPhysicalPointer(config.GetPhysicalOutputAddress());
167 if (config.is_texture_copy) {
168 u32 input_width = config.texture_copy.input_width * 16;
169 u32 input_gap = config.texture_copy.input_gap * 16;
170 u32 output_width = config.texture_copy.output_width * 16;
171 u32 output_gap = config.texture_copy.output_gap * 16;
172
173 size_t contiguous_input_size = config.texture_copy.size / input_width * (input_width + input_gap);
174 VideoCore::g_renderer->Rasterizer()->FlushRegion(config.GetPhysicalInputAddress(), contiguous_input_size);
175
176 u32 remaining_size = config.texture_copy.size;
177 u32 remaining_input = input_width;
178 u32 remaining_output = output_width;
179 while (remaining_size > 0) {
180 u32 copy_size = std::min({ remaining_input, remaining_output, remaining_size });
181 183
182 std::memcpy(dst_pointer, src_pointer, copy_size); 184 if (config.is_texture_copy) {
183 src_pointer += copy_size; 185 u32 input_width = config.texture_copy.input_width * 16;
184 dst_pointer += copy_size; 186 u32 input_gap = config.texture_copy.input_gap * 16;
187 u32 output_width = config.texture_copy.output_width * 16;
188 u32 output_gap = config.texture_copy.output_gap * 16;
185 189
186 remaining_input -= copy_size; 190 size_t contiguous_input_size = config.texture_copy.size / input_width * (input_width + input_gap);
187 remaining_output -= copy_size; 191 Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), static_cast<u32>(contiguous_input_size));
188 remaining_size -= copy_size;
189 192
190 if (remaining_input == 0) { 193 size_t contiguous_output_size = config.texture_copy.size / output_width * (output_width + output_gap);
191 remaining_input = input_width; 194 Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), static_cast<u32>(contiguous_output_size));
192 src_pointer += input_gap;
193 }
194 if (remaining_output == 0) {
195 remaining_output = output_width;
196 dst_pointer += output_gap;
197 }
198 }
199 195
200 LOG_TRACE(HW_GPU, "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X", 196 u32 remaining_size = config.texture_copy.size;
201 config.texture_copy.size, 197 u32 remaining_input = input_width;
202 config.GetPhysicalInputAddress(), input_width, input_gap, 198 u32 remaining_output = output_width;
203 config.GetPhysicalOutputAddress(), output_width, output_gap, 199 while (remaining_size > 0) {
204 config.flags); 200 u32 copy_size = std::min({ remaining_input, remaining_output, remaining_size });
205 201
206 size_t contiguous_output_size = config.texture_copy.size / output_width * (output_width + output_gap); 202 std::memcpy(dst_pointer, src_pointer, copy_size);
207 VideoCore::g_renderer->Rasterizer()->InvalidateRegion(config.GetPhysicalOutputAddress(), contiguous_output_size); 203 src_pointer += copy_size;
204 dst_pointer += copy_size;
208 205
209 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); 206 remaining_input -= copy_size;
210 break; 207 remaining_output -= copy_size;
211 } 208 remaining_size -= copy_size;
212 209
213 if (config.scaling > config.ScaleXY) { 210 if (remaining_input == 0) {
214 LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", config.scaling.Value()); 211 remaining_input = input_width;
215 UNIMPLEMENTED(); 212 src_pointer += input_gap;
216 break; 213 }
217 } 214 if (remaining_output == 0) {
215 remaining_output = output_width;
216 dst_pointer += output_gap;
217 }
218 }
218 219
219 if (config.input_linear && config.scaling != config.NoScale) { 220 LOG_TRACE(HW_GPU, "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X",
220 LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input"); 221 config.texture_copy.size,
221 UNIMPLEMENTED(); 222 config.GetPhysicalInputAddress(), input_width, input_gap,
222 break; 223 config.GetPhysicalOutputAddress(), output_width, output_gap,
223 } 224 config.flags);
224 225
225 bool horizontal_scale = config.scaling != config.NoScale; 226 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF);
226 bool vertical_scale = config.scaling == config.ScaleXY; 227 break;
228 }
227 229
228 u32 output_width = config.output_width >> horizontal_scale; 230 if (config.scaling > config.ScaleXY) {
229 u32 output_height = config.output_height >> vertical_scale; 231 LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", config.scaling.Value());
232 UNIMPLEMENTED();
233 break;
234 }
230 235
231 u32 input_size = config.input_width * config.input_height * GPU::Regs::BytesPerPixel(config.input_format); 236 if (config.input_linear && config.scaling != config.NoScale) {
232 u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format); 237 LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input");
238 UNIMPLEMENTED();
239 break;
240 }
233 241
234 VideoCore::g_renderer->Rasterizer()->FlushRegion(config.GetPhysicalInputAddress(), input_size); 242 int horizontal_scale = config.scaling != config.NoScale ? 1 : 0;
243 int vertical_scale = config.scaling == config.ScaleXY ? 1 : 0;
235 244
236 for (u32 y = 0; y < output_height; ++y) { 245 u32 output_width = config.output_width >> horizontal_scale;
237 for (u32 x = 0; x < output_width; ++x) { 246 u32 output_height = config.output_height >> vertical_scale;
238 Math::Vec4<u8> src_color;
239 247
240 // Calculate the [x,y] position of the input image 248 u32 input_size = config.input_width * config.input_height * GPU::Regs::BytesPerPixel(config.input_format);
241 // based on the current output position and the scale 249 u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format);
242 u32 input_x = x << horizontal_scale;
243 u32 input_y = y << vertical_scale;
244 250
245 if (config.flip_vertically) { 251 Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), input_size);
246 // Flip the y value of the output data, 252 Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), output_size);
247 // we do this after calculating the [x,y] position of the input image
248 // to account for the scaling options.
249 y = output_height - y - 1;
250 }
251 253
252 u32 dst_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.output_format); 254 for (u32 y = 0; y < output_height; ++y) {
253 u32 src_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.input_format); 255 for (u32 x = 0; x < output_width; ++x) {
254 u32 src_offset; 256 Math::Vec4<u8> src_color;
255 u32 dst_offset;
256 257
257 if (config.input_linear) { 258 // Calculate the [x,y] position of the input image
258 if (!config.dont_swizzle) { 259 // based on the current output position and the scale
259 // Interpret the input as linear and the output as tiled 260 u32 input_x = x << horizontal_scale;
260 u32 coarse_y = y & ~7; 261 u32 input_y = y << vertical_scale;
261 u32 stride = output_width * dst_bytes_per_pixel;
262 262
263 src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel; 263 if (config.flip_vertically) {
264 dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + coarse_y * stride; 264 // Flip the y value of the output data,
265 } else { 265 // we do this after calculating the [x,y] position of the input image
266 // Both input and output are linear 266 // to account for the scaling options.
267 src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel; 267 y = output_height - y - 1;
268 dst_offset = (x + y * output_width) * dst_bytes_per_pixel;
269 } 268 }
270 } else {
271 if (!config.dont_swizzle) {
272 // Interpret the input as tiled and the output as linear
273 u32 coarse_y = input_y & ~7;
274 u32 stride = config.input_width * src_bytes_per_pixel;
275 269
276 src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + coarse_y * stride; 270 u32 dst_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.output_format);
277 dst_offset = (x + y * output_width) * dst_bytes_per_pixel; 271 u32 src_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.input_format);
272 u32 src_offset;
273 u32 dst_offset;
274
275 if (config.input_linear) {
276 if (!config.dont_swizzle) {
277 // Interpret the input as linear and the output as tiled
278 u32 coarse_y = y & ~7;
279 u32 stride = output_width * dst_bytes_per_pixel;
280
281 src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel;
282 dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + coarse_y * stride;
283 } else {
284 // Both input and output are linear
285 src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel;
286 dst_offset = (x + y * output_width) * dst_bytes_per_pixel;
287 }
278 } else { 288 } else {
279 // Both input and output are tiled 289 if (!config.dont_swizzle) {
280 u32 out_coarse_y = y & ~7; 290 // Interpret the input as tiled and the output as linear
281 u32 out_stride = output_width * dst_bytes_per_pixel; 291 u32 coarse_y = input_y & ~7;
282 292 u32 stride = config.input_width * src_bytes_per_pixel;
283 u32 in_coarse_y = input_y & ~7; 293
284 u32 in_stride = config.input_width * src_bytes_per_pixel; 294 src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + coarse_y * stride;
285 295 dst_offset = (x + y * output_width) * dst_bytes_per_pixel;
286 src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + in_coarse_y * in_stride; 296 } else {
287 dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + out_coarse_y * out_stride; 297 // Both input and output are tiled
298 u32 out_coarse_y = y & ~7;
299 u32 out_stride = output_width * dst_bytes_per_pixel;
300
301 u32 in_coarse_y = input_y & ~7;
302 u32 in_stride = config.input_width * src_bytes_per_pixel;
303
304 src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + in_coarse_y * in_stride;
305 dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + out_coarse_y * out_stride;
306 }
288 } 307 }
289 }
290 308
291 const u8* src_pixel = src_pointer + src_offset; 309 const u8* src_pixel = src_pointer + src_offset;
292 src_color = DecodePixel(config.input_format, src_pixel); 310 src_color = DecodePixel(config.input_format, src_pixel);
293 if (config.scaling == config.ScaleX) { 311 if (config.scaling == config.ScaleX) {
294 Math::Vec4<u8> pixel = DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel); 312 Math::Vec4<u8> pixel = DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel);
295 src_color = ((src_color + pixel) / 2).Cast<u8>(); 313 src_color = ((src_color + pixel) / 2).Cast<u8>();
296 } else if (config.scaling == config.ScaleXY) { 314 } else if (config.scaling == config.ScaleXY) {
297 Math::Vec4<u8> pixel1 = DecodePixel(config.input_format, src_pixel + 1 * src_bytes_per_pixel); 315 Math::Vec4<u8> pixel1 = DecodePixel(config.input_format, src_pixel + 1 * src_bytes_per_pixel);
298 Math::Vec4<u8> pixel2 = DecodePixel(config.input_format, src_pixel + 2 * src_bytes_per_pixel); 316 Math::Vec4<u8> pixel2 = DecodePixel(config.input_format, src_pixel + 2 * src_bytes_per_pixel);
299 Math::Vec4<u8> pixel3 = DecodePixel(config.input_format, src_pixel + 3 * src_bytes_per_pixel); 317 Math::Vec4<u8> pixel3 = DecodePixel(config.input_format, src_pixel + 3 * src_bytes_per_pixel);
300 src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast<u8>(); 318 src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast<u8>();
301 } 319 }
302 320
303 u8* dst_pixel = dst_pointer + dst_offset; 321 u8* dst_pixel = dst_pointer + dst_offset;
304 switch (config.output_format) { 322 switch (config.output_format) {
305 case Regs::PixelFormat::RGBA8: 323 case Regs::PixelFormat::RGBA8:
306 Color::EncodeRGBA8(src_color, dst_pixel); 324 Color::EncodeRGBA8(src_color, dst_pixel);
307 break; 325 break;
308 326
309 case Regs::PixelFormat::RGB8: 327 case Regs::PixelFormat::RGB8:
310 Color::EncodeRGB8(src_color, dst_pixel); 328 Color::EncodeRGB8(src_color, dst_pixel);
311 break; 329 break;
312 330
313 case Regs::PixelFormat::RGB565: 331 case Regs::PixelFormat::RGB565:
314 Color::EncodeRGB565(src_color, dst_pixel); 332 Color::EncodeRGB565(src_color, dst_pixel);
315 break; 333 break;
316 334
317 case Regs::PixelFormat::RGB5A1: 335 case Regs::PixelFormat::RGB5A1:
318 Color::EncodeRGB5A1(src_color, dst_pixel); 336 Color::EncodeRGB5A1(src_color, dst_pixel);
319 break; 337 break;
320 338
321 case Regs::PixelFormat::RGBA4: 339 case Regs::PixelFormat::RGBA4:
322 Color::EncodeRGBA4(src_color, dst_pixel); 340 Color::EncodeRGBA4(src_color, dst_pixel);
323 break; 341 break;
324 342
325 default: 343 default:
326 LOG_ERROR(HW_GPU, "Unknown destination framebuffer format %x", config.output_format.Value()); 344 LOG_ERROR(HW_GPU, "Unknown destination framebuffer format %x", config.output_format.Value());
327 break; 345 break;
346 }
328 } 347 }
329 } 348 }
330 }
331 349
332 LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), dst format %x, flags 0x%08X", 350 LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), dst format %x, flags 0x%08X",
333 config.output_height * output_width * GPU::Regs::BytesPerPixel(config.output_format), 351 config.output_height * output_width * GPU::Regs::BytesPerPixel(config.output_format),
334 config.GetPhysicalInputAddress(), config.input_width.Value(), config.input_height.Value(), 352 config.GetPhysicalInputAddress(), config.input_width.Value(), config.input_height.Value(),
335 config.GetPhysicalOutputAddress(), output_width, output_height, 353 config.GetPhysicalOutputAddress(), output_width, output_height,
336 config.output_format.Value(), config.flags); 354 config.output_format.Value(), config.flags);
355 }
337 356
338 g_regs.display_transfer_config.trigger = 0; 357 g_regs.display_transfer_config.trigger = 0;
339 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); 358 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF);
340
341 VideoCore::g_renderer->Rasterizer()->InvalidateRegion(config.GetPhysicalOutputAddress(), output_size);
342 } 359 }
343 break; 360 break;
344 } 361 }
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h
index a00adbf53..da4c345b4 100644
--- a/src/core/hw/gpu.h
+++ b/src/core/hw/gpu.h
@@ -78,7 +78,7 @@ struct Regs {
78 78
79 INSERT_PADDING_WORDS(0x4); 79 INSERT_PADDING_WORDS(0x4);
80 80
81 struct { 81 struct MemoryFillConfig {
82 u32 address_start; 82 u32 address_start;
83 u32 address_end; 83 u32 address_end;
84 84
@@ -165,7 +165,7 @@ struct Regs {
165 165
166 INSERT_PADDING_WORDS(0x169); 166 INSERT_PADDING_WORDS(0x169);
167 167
168 struct { 168 struct DisplayTransferConfig {
169 u32 input_address; 169 u32 input_address;
170 u32 output_address; 170 u32 output_address;
171 171
diff --git a/src/core/hw/lcd.h b/src/core/hw/lcd.h
index 3dd877fbf..57029c5e8 100644
--- a/src/core/hw/lcd.h
+++ b/src/core/hw/lcd.h
@@ -52,8 +52,6 @@ struct Regs {
52 return content[index]; 52 return content[index];
53 } 53 }
54 54
55#undef ASSERT_MEMBER_SIZE
56
57}; 55};
58static_assert(std::is_standard_layout<Regs>::value, "Structure does not use standard layout"); 56static_assert(std::is_standard_layout<Regs>::value, "Structure does not use standard layout");
59 57
diff --git a/src/core/hw/y2r.cpp b/src/core/hw/y2r.cpp
index 48c45564f..083391e83 100644
--- a/src/core/hw/y2r.cpp
+++ b/src/core/hw/y2r.cpp
@@ -261,7 +261,7 @@ void PerformConversion(ConversionConfiguration& cvt) {
261 ASSERT(cvt.block_alignment != BlockAlignment::Block8x8 || cvt.input_lines % 8 == 0); 261 ASSERT(cvt.block_alignment != BlockAlignment::Block8x8 || cvt.input_lines % 8 == 0);
262 // Tiles per row 262 // Tiles per row
263 size_t num_tiles = cvt.input_line_width / 8; 263 size_t num_tiles = cvt.input_line_width / 8;
264 ASSERT(num_tiles < MAX_TILES); 264 ASSERT(num_tiles <= MAX_TILES);
265 265
266 // Buffer used as a CDMA source/target. 266 // Buffer used as a CDMA source/target.
267 std::unique_ptr<u8[]> data_buffer(new u8[cvt.input_line_width * 8 * 4]); 267 std::unique_ptr<u8[]> data_buffer(new u8[cvt.input_line_width * 8 * 4]);
diff --git a/src/core/loader/3dsx.cpp b/src/core/loader/3dsx.cpp
index 8eed6a50a..98e7ab48f 100644
--- a/src/core/loader/3dsx.cpp
+++ b/src/core/loader/3dsx.cpp
@@ -10,13 +10,9 @@
10#include "core/file_sys/archive_romfs.h" 10#include "core/file_sys/archive_romfs.h"
11#include "core/hle/kernel/process.h" 11#include "core/hle/kernel/process.h"
12#include "core/hle/kernel/resource_limit.h" 12#include "core/hle/kernel/resource_limit.h"
13#include "core/hle/service/fs/archive.h" 13#include "core/loader/3dsx.h"
14#include "core/loader/elf.h"
15#include "core/loader/ncch.h"
16#include "core/memory.h" 14#include "core/memory.h"
17 15
18#include "3dsx.h"
19
20namespace Loader { 16namespace Loader {
21 17
22/* 18/*
@@ -182,11 +178,11 @@ static THREEDSX_Error Load3DSXFile(FileUtil::IOFile& file, u32 base_addr, Shared
182 for (unsigned current_inprogress = 0; current_inprogress < remaining && pos < end_pos; current_inprogress++) { 178 for (unsigned current_inprogress = 0; current_inprogress < remaining && pos < end_pos; current_inprogress++) {
183 const auto& table = reloc_table[current_inprogress]; 179 const auto& table = reloc_table[current_inprogress];
184 LOG_TRACE(Loader, "(t=%d,skip=%u,patch=%u)", current_segment_reloc_table, 180 LOG_TRACE(Loader, "(t=%d,skip=%u,patch=%u)", current_segment_reloc_table,
185 (u32)table.skip, (u32)table.patch); 181 static_cast<u32>(table.skip), static_cast<u32>(table.patch));
186 pos += table.skip; 182 pos += table.skip;
187 s32 num_patches = table.patch; 183 s32 num_patches = table.patch;
188 while (0 < num_patches && pos < end_pos) { 184 while (0 < num_patches && pos < end_pos) {
189 u32 in_addr = (u8*)pos - program_image.data(); 185 u32 in_addr = static_cast<u32>(reinterpret_cast<u8*>(pos) - program_image.data());
190 u32 addr = TranslateAddr(*pos, &loadinfo, offsets); 186 u32 addr = TranslateAddr(*pos, &loadinfo, offsets);
191 LOG_TRACE(Loader, "Patching %08X <-- rel(%08X,%d) (%08X)", 187 LOG_TRACE(Loader, "Patching %08X <-- rel(%08X,%d) (%08X)",
192 base_addr + in_addr, addr, current_segment_reloc_table, *pos); 188 base_addr + in_addr, addr, current_segment_reloc_table, *pos);
@@ -288,7 +284,7 @@ ResultStatus AppLoader_THREEDSX::ReadRomFS(std::shared_ptr<FileUtil::IOFile>& ro
288 // Check if the 3DSX has a RomFS... 284 // Check if the 3DSX has a RomFS...
289 if (hdr.fs_offset != 0) { 285 if (hdr.fs_offset != 0) {
290 u32 romfs_offset = hdr.fs_offset; 286 u32 romfs_offset = hdr.fs_offset;
291 u32 romfs_size = file.GetSize() - hdr.fs_offset; 287 u32 romfs_size = static_cast<u32>(file.GetSize()) - hdr.fs_offset;
292 288
293 LOG_DEBUG(Loader, "RomFS offset: 0x%08X", romfs_offset); 289 LOG_DEBUG(Loader, "RomFS offset: 0x%08X", romfs_offset);
294 LOG_DEBUG(Loader, "RomFS size: 0x%08X", romfs_size); 290 LOG_DEBUG(Loader, "RomFS size: 0x%08X", romfs_size);
@@ -307,4 +303,31 @@ ResultStatus AppLoader_THREEDSX::ReadRomFS(std::shared_ptr<FileUtil::IOFile>& ro
307 return ResultStatus::ErrorNotUsed; 303 return ResultStatus::ErrorNotUsed;
308} 304}
309 305
306ResultStatus AppLoader_THREEDSX::ReadIcon(std::vector<u8>& buffer) {
307 if (!file.IsOpen())
308 return ResultStatus::Error;
309
310 // Reset read pointer in case this file has been read before.
311 file.Seek(0, SEEK_SET);
312
313 THREEDSX_Header hdr;
314 if (file.ReadBytes(&hdr, sizeof(THREEDSX_Header)) != sizeof(THREEDSX_Header))
315 return ResultStatus::Error;
316
317 if (hdr.header_size != sizeof(THREEDSX_Header))
318 return ResultStatus::Error;
319
320 // Check if the 3DSX has a SMDH...
321 if (hdr.smdh_offset != 0) {
322 file.Seek(hdr.smdh_offset, SEEK_SET);
323 buffer.resize(hdr.smdh_size);
324
325 if (file.ReadBytes(&buffer[0], hdr.smdh_size) != hdr.smdh_size)
326 return ResultStatus::Error;
327
328 return ResultStatus::Success;
329 }
330 return ResultStatus::ErrorNotUsed;
331}
332
310} // namespace Loader 333} // namespace Loader
diff --git a/src/core/loader/3dsx.h b/src/core/loader/3dsx.h
index 365ddb7a5..3ee686703 100644
--- a/src/core/loader/3dsx.h
+++ b/src/core/loader/3dsx.h
@@ -17,7 +17,7 @@ namespace Loader {
17/// Loads an 3DSX file 17/// Loads an 3DSX file
18class AppLoader_THREEDSX final : public AppLoader { 18class AppLoader_THREEDSX final : public AppLoader {
19public: 19public:
20 AppLoader_THREEDSX(FileUtil::IOFile&& file, std::string filename, const std::string& filepath) 20 AppLoader_THREEDSX(FileUtil::IOFile&& file, const std::string& filename, const std::string& filepath)
21 : AppLoader(std::move(file)), filename(std::move(filename)), filepath(filepath) {} 21 : AppLoader(std::move(file)), filename(std::move(filename)), filepath(filepath) {}
22 22
23 /** 23 /**
@@ -34,6 +34,13 @@ public:
34 ResultStatus Load() override; 34 ResultStatus Load() override;
35 35
36 /** 36 /**
37 * Get the icon (typically icon section) of the application
38 * @param buffer Reference to buffer to store data
39 * @return ResultStatus result of function
40 */
41 ResultStatus ReadIcon(std::vector<u8>& buffer) override;
42
43 /**
37 * Get the RomFS of the application 44 * Get the RomFS of the application
38 * @param romfs_file Reference to buffer to store data 45 * @param romfs_file Reference to buffer to store data
39 * @param offset Offset in the file to the RomFS 46 * @param offset Offset in the file to the RomFS
diff --git a/src/core/loader/loader.cpp b/src/core/loader/loader.cpp
index 886501c41..af3f62248 100644
--- a/src/core/loader/loader.cpp
+++ b/src/core/loader/loader.cpp
@@ -90,6 +90,28 @@ const char* GetFileTypeString(FileType type) {
90 return "unknown"; 90 return "unknown";
91} 91}
92 92
93std::unique_ptr<AppLoader> GetLoader(FileUtil::IOFile&& file, FileType type,
94 const std::string& filename, const std::string& filepath) {
95 switch (type) {
96
97 // 3DSX file format.
98 case FileType::THREEDSX:
99 return std::make_unique<AppLoader_THREEDSX>(std::move(file), filename, filepath);
100
101 // Standard ELF file format.
102 case FileType::ELF:
103 return std::make_unique<AppLoader_ELF>(std::move(file), filename);
104
105 // NCCH/NCSD container formats.
106 case FileType::CXI:
107 case FileType::CCI:
108 return std::make_unique<AppLoader_NCCH>(std::move(file), filepath);
109
110 default:
111 return std::unique_ptr<AppLoader>();
112 }
113}
114
93ResultStatus LoadFile(const std::string& filename) { 115ResultStatus LoadFile(const std::string& filename) {
94 FileUtil::IOFile file(filename, "rb"); 116 FileUtil::IOFile file(filename, "rb");
95 if (!file.IsOpen()) { 117 if (!file.IsOpen()) {
@@ -111,38 +133,29 @@ ResultStatus LoadFile(const std::string& filename) {
111 133
112 LOG_INFO(Loader, "Loading file %s as %s...", filename.c_str(), GetFileTypeString(type)); 134 LOG_INFO(Loader, "Loading file %s as %s...", filename.c_str(), GetFileTypeString(type));
113 135
136 std::unique_ptr<AppLoader> app_loader = GetLoader(std::move(file), type, filename_filename, filename);
137
114 switch (type) { 138 switch (type) {
115 139
116 //3DSX file format... 140 // 3DSX file format...
141 // or NCCH/NCSD container formats...
117 case FileType::THREEDSX: 142 case FileType::THREEDSX:
118 {
119 AppLoader_THREEDSX app_loader(std::move(file), filename_filename, filename);
120 // Load application and RomFS
121 if (ResultStatus::Success == app_loader.Load()) {
122 Service::FS::RegisterArchiveType(std::make_unique<FileSys::ArchiveFactory_RomFS>(app_loader), Service::FS::ArchiveIdCode::RomFS);
123 return ResultStatus::Success;
124 }
125 break;
126 }
127
128 // Standard ELF file format...
129 case FileType::ELF:
130 return AppLoader_ELF(std::move(file), filename_filename).Load();
131
132 // NCCH/NCSD container formats...
133 case FileType::CXI: 143 case FileType::CXI:
134 case FileType::CCI: 144 case FileType::CCI:
135 { 145 {
136 AppLoader_NCCH app_loader(std::move(file), filename);
137
138 // Load application and RomFS 146 // Load application and RomFS
139 ResultStatus result = app_loader.Load(); 147 ResultStatus result = app_loader->Load();
140 if (ResultStatus::Success == result) { 148 if (ResultStatus::Success == result) {
141 Service::FS::RegisterArchiveType(std::make_unique<FileSys::ArchiveFactory_RomFS>(app_loader), Service::FS::ArchiveIdCode::RomFS); 149 Service::FS::RegisterArchiveType(std::make_unique<FileSys::ArchiveFactory_RomFS>(*app_loader), Service::FS::ArchiveIdCode::RomFS);
150 return ResultStatus::Success;
142 } 151 }
143 return result; 152 return result;
144 } 153 }
145 154
155 // Standard ELF file format...
156 case FileType::ELF:
157 return app_loader->Load();
158
146 // CIA file format... 159 // CIA file format...
147 case FileType::CIA: 160 case FileType::CIA:
148 return ResultStatus::ErrorNotImplemented; 161 return ResultStatus::ErrorNotImplemented;
diff --git a/src/core/loader/loader.h b/src/core/loader/loader.h
index 84a4ce5fc..9d3e9ed3b 100644
--- a/src/core/loader/loader.h
+++ b/src/core/loader/loader.h
@@ -10,8 +10,10 @@
10#include <string> 10#include <string>
11#include <vector> 11#include <vector>
12 12
13#include "common/common_funcs.h"
13#include "common/common_types.h" 14#include "common/common_types.h"
14#include "common/file_util.h" 15#include "common/file_util.h"
16#include "common/swap.h"
15 17
16namespace Kernel { 18namespace Kernel {
17struct AddressMapping; 19struct AddressMapping;
@@ -78,6 +80,51 @@ constexpr u32 MakeMagic(char a, char b, char c, char d) {
78 return a | b << 8 | c << 16 | d << 24; 80 return a | b << 8 | c << 16 | d << 24;
79} 81}
80 82
83/// SMDH data structure that contains titles, icons etc. See https://www.3dbrew.org/wiki/SMDH
84struct SMDH {
85 u32_le magic;
86 u16_le version;
87 INSERT_PADDING_BYTES(2);
88
89 struct Title {
90 std::array<u16, 0x40> short_title;
91 std::array<u16, 0x80> long_title;
92 std::array<u16, 0x40> publisher;
93 };
94 std::array<Title, 16> titles;
95
96 std::array<u8, 16> ratings;
97 u32_le region_lockout;
98 u32_le match_maker_id;
99 u64_le match_maker_bit_id;
100 u32_le flags;
101 u16_le eula_version;
102 INSERT_PADDING_BYTES(2);
103 float_le banner_animation_frame;
104 u32_le cec_id;
105 INSERT_PADDING_BYTES(8);
106
107 std::array<u8, 0x480> small_icon;
108 std::array<u8, 0x1200> large_icon;
109
110 /// indicates the language used for each title entry
111 enum class TitleLanguage {
112 Japanese = 0,
113 English = 1,
114 French = 2,
115 German = 3,
116 Italian = 4,
117 Spanish = 5,
118 SimplifiedChinese = 6,
119 Korean= 7,
120 Dutch = 8,
121 Portuguese = 9,
122 Russian = 10,
123 TraditionalChinese = 11
124 };
125};
126static_assert(sizeof(SMDH) == 0x36C0, "SMDH structure size is wrong");
127
81/// Interface for loading an application 128/// Interface for loading an application
82class AppLoader : NonCopyable { 129class AppLoader : NonCopyable {
83public: 130public:
@@ -150,6 +197,16 @@ protected:
150extern const std::initializer_list<Kernel::AddressMapping> default_address_mappings; 197extern const std::initializer_list<Kernel::AddressMapping> default_address_mappings;
151 198
152/** 199/**
200 * Get a loader for a file with a specific type
201 * @param file The file to load
202 * @param type The type of the file
203 * @param filename the file name (without path)
204 * @param filepath the file full path (with name)
205 * @return std::unique_ptr<AppLoader> a pointer to a loader object; nullptr for unsupported type
206 */
207std::unique_ptr<AppLoader> GetLoader(FileUtil::IOFile&& file, FileType type, const std::string& filename, const std::string& filepath);
208
209/**
153 * Identifies and loads a bootable file 210 * Identifies and loads a bootable file
154 * @param filename String filename of bootable file 211 * @param filename String filename of bootable file
155 * @return ResultStatus result of function 212 * @return ResultStatus result of function
diff --git a/src/core/loader/ncch.cpp b/src/core/loader/ncch.cpp
index e63cab33f..7391bdb26 100644
--- a/src/core/loader/ncch.cpp
+++ b/src/core/loader/ncch.cpp
@@ -156,6 +156,9 @@ ResultStatus AppLoader_NCCH::LoadExec() {
156 Kernel::g_current_process->resource_limit = Kernel::ResourceLimit::GetForCategory( 156 Kernel::g_current_process->resource_limit = Kernel::ResourceLimit::GetForCategory(
157 static_cast<Kernel::ResourceLimitCategory>(exheader_header.arm11_system_local_caps.resource_limit_category)); 157 static_cast<Kernel::ResourceLimitCategory>(exheader_header.arm11_system_local_caps.resource_limit_category));
158 158
159 // Set the default CPU core for this process
160 Kernel::g_current_process->ideal_processor = exheader_header.arm11_system_local_caps.ideal_processor;
161
159 // Copy data while converting endianess 162 // Copy data while converting endianess
160 std::array<u32, ARRAY_SIZE(exheader_header.arm11_kernel_caps.descriptors)> kernel_caps; 163 std::array<u32, ARRAY_SIZE(exheader_header.arm11_kernel_caps.descriptors)> kernel_caps;
161 std::copy_n(exheader_header.arm11_kernel_caps.descriptors, kernel_caps.size(), begin(kernel_caps)); 164 std::copy_n(exheader_header.arm11_kernel_caps.descriptors, kernel_caps.size(), begin(kernel_caps));
@@ -173,8 +176,12 @@ ResultStatus AppLoader_NCCH::LoadSectionExeFS(const char* name, std::vector<u8>&
173 if (!file.IsOpen()) 176 if (!file.IsOpen())
174 return ResultStatus::Error; 177 return ResultStatus::Error;
175 178
179 ResultStatus result = LoadExeFS();
180 if (result != ResultStatus::Success)
181 return result;
182
176 LOG_DEBUG(Loader, "%d sections:", kMaxSections); 183 LOG_DEBUG(Loader, "%d sections:", kMaxSections);
177 // Iterate through the ExeFs archive until we find the .code file... 184 // Iterate through the ExeFs archive until we find a section with the specified name...
178 for (unsigned section_number = 0; section_number < kMaxSections; section_number++) { 185 for (unsigned section_number = 0; section_number < kMaxSections; section_number++) {
179 const auto& section = exefs_header.section[section_number]; 186 const auto& section = exefs_header.section[section_number];
180 187
@@ -186,7 +193,7 @@ ResultStatus AppLoader_NCCH::LoadSectionExeFS(const char* name, std::vector<u8>&
186 s64 section_offset = (section.offset + exefs_offset + sizeof(ExeFs_Header) + ncch_offset); 193 s64 section_offset = (section.offset + exefs_offset + sizeof(ExeFs_Header) + ncch_offset);
187 file.Seek(section_offset, SEEK_SET); 194 file.Seek(section_offset, SEEK_SET);
188 195
189 if (is_compressed) { 196 if (strcmp(section.name, ".code") == 0 && is_compressed) {
190 // Section is compressed, read compressed .code section... 197 // Section is compressed, read compressed .code section...
191 std::unique_ptr<u8[]> temp_buffer; 198 std::unique_ptr<u8[]> temp_buffer;
192 try { 199 try {
@@ -215,9 +222,9 @@ ResultStatus AppLoader_NCCH::LoadSectionExeFS(const char* name, std::vector<u8>&
215 return ResultStatus::ErrorNotUsed; 222 return ResultStatus::ErrorNotUsed;
216} 223}
217 224
218ResultStatus AppLoader_NCCH::Load() { 225ResultStatus AppLoader_NCCH::LoadExeFS() {
219 if (is_loaded) 226 if (is_exefs_loaded)
220 return ResultStatus::ErrorAlreadyLoaded; 227 return ResultStatus::Success;
221 228
222 if (!file.IsOpen()) 229 if (!file.IsOpen())
223 return ResultStatus::Error; 230 return ResultStatus::Error;
@@ -255,7 +262,7 @@ ResultStatus AppLoader_NCCH::Load() {
255 resource_limit_category = exheader_header.arm11_system_local_caps.resource_limit_category; 262 resource_limit_category = exheader_header.arm11_system_local_caps.resource_limit_category;
256 263
257 LOG_INFO(Loader, "Name: %s" , exheader_header.codeset_info.name); 264 LOG_INFO(Loader, "Name: %s" , exheader_header.codeset_info.name);
258 LOG_INFO(Loader, "Program ID: %016X" , ncch_header.program_id); 265 LOG_INFO(Loader, "Program ID: %016llX" , ncch_header.program_id);
259 LOG_DEBUG(Loader, "Code compressed: %s" , is_compressed ? "yes" : "no"); 266 LOG_DEBUG(Loader, "Code compressed: %s" , is_compressed ? "yes" : "no");
260 LOG_DEBUG(Loader, "Entry point: 0x%08X", entry_point); 267 LOG_DEBUG(Loader, "Entry point: 0x%08X", entry_point);
261 LOG_DEBUG(Loader, "Code size: 0x%08X", code_size); 268 LOG_DEBUG(Loader, "Code size: 0x%08X", code_size);
@@ -282,6 +289,18 @@ ResultStatus AppLoader_NCCH::Load() {
282 if (file.ReadBytes(&exefs_header, sizeof(ExeFs_Header)) != sizeof(ExeFs_Header)) 289 if (file.ReadBytes(&exefs_header, sizeof(ExeFs_Header)) != sizeof(ExeFs_Header))
283 return ResultStatus::Error; 290 return ResultStatus::Error;
284 291
292 is_exefs_loaded = true;
293 return ResultStatus::Success;
294}
295
296ResultStatus AppLoader_NCCH::Load() {
297 if (is_loaded)
298 return ResultStatus::ErrorAlreadyLoaded;
299
300 ResultStatus result = LoadExeFS();
301 if (result != ResultStatus::Success)
302 return result;
303
285 is_loaded = true; // Set state to loaded 304 is_loaded = true; // Set state to loaded
286 305
287 return LoadExec(); // Load the executable into memory for booting 306 return LoadExec(); // Load the executable into memory for booting
diff --git a/src/core/loader/ncch.h b/src/core/loader/ncch.h
index ca6772a78..fd852c3de 100644
--- a/src/core/loader/ncch.h
+++ b/src/core/loader/ncch.h
@@ -232,6 +232,13 @@ private:
232 */ 232 */
233 ResultStatus LoadExec(); 233 ResultStatus LoadExec();
234 234
235 /**
236 * Ensure ExeFS is loaded and ready for reading sections
237 * @return ResultStatus result of function
238 */
239 ResultStatus LoadExeFS();
240
241 bool is_exefs_loaded = false;
235 bool is_compressed = false; 242 bool is_compressed = false;
236 243
237 u32 entry_point = 0; 244 u32 entry_point = 0;
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 7de5bd15d..ee9b69f81 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -15,6 +15,9 @@
15#include "core/memory_setup.h" 15#include "core/memory_setup.h"
16#include "core/mmio.h" 16#include "core/mmio.h"
17 17
18#include "video_core/renderer_base.h"
19#include "video_core/video_core.h"
20
18namespace Memory { 21namespace Memory {
19 22
20enum class PageType { 23enum class PageType {
@@ -22,8 +25,12 @@ enum class PageType {
22 Unmapped, 25 Unmapped,
23 /// Page is mapped to regular memory. This is the only type you can get pointers to. 26 /// Page is mapped to regular memory. This is the only type you can get pointers to.
24 Memory, 27 Memory,
28 /// Page is mapped to regular memory, but also needs to check for rasterizer cache flushing and invalidation
29 RasterizerCachedMemory,
25 /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions. 30 /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions.
26 Special, 31 Special,
32 /// Page is mapped to a I/O region, but also needs to check for rasterizer cache flushing and invalidation
33 RasterizerCachedSpecial,
27}; 34};
28 35
29struct SpecialRegion { 36struct SpecialRegion {
@@ -57,6 +64,12 @@ struct PageTable {
57 * the corresponding entry in `pointers` MUST be set to null. 64 * the corresponding entry in `pointers` MUST be set to null.
58 */ 65 */
59 std::array<PageType, NUM_ENTRIES> attributes; 66 std::array<PageType, NUM_ENTRIES> attributes;
67
68 /**
69 * Indicates the number of externally cached resources touching a page that should be
70 * flushed before the memory is accessed
71 */
72 std::array<u8, NUM_ENTRIES> cached_res_count;
60}; 73};
61 74
62/// Singular page table used for the singleton process 75/// Singular page table used for the singleton process
@@ -72,8 +85,15 @@ static void MapPages(u32 base, u32 size, u8* memory, PageType type) {
72 while (base != end) { 85 while (base != end) {
73 ASSERT_MSG(base < PageTable::NUM_ENTRIES, "out of range mapping at %08X", base); 86 ASSERT_MSG(base < PageTable::NUM_ENTRIES, "out of range mapping at %08X", base);
74 87
88 // Since pages are unmapped on shutdown after video core is shutdown, the renderer may be null here
89 if (current_page_table->attributes[base] == PageType::RasterizerCachedMemory ||
90 current_page_table->attributes[base] == PageType::RasterizerCachedSpecial) {
91 RasterizerFlushAndInvalidateRegion(VirtualToPhysicalAddress(base << PAGE_BITS), PAGE_SIZE);
92 }
93
75 current_page_table->attributes[base] = type; 94 current_page_table->attributes[base] = type;
76 current_page_table->pointers[base] = memory; 95 current_page_table->pointers[base] = memory;
96 current_page_table->cached_res_count[base] = 0;
77 97
78 base += 1; 98 base += 1;
79 if (memory != nullptr) 99 if (memory != nullptr)
@@ -84,6 +104,7 @@ static void MapPages(u32 base, u32 size, u8* memory, PageType type) {
84void InitMemoryMap() { 104void InitMemoryMap() {
85 main_page_table.pointers.fill(nullptr); 105 main_page_table.pointers.fill(nullptr);
86 main_page_table.attributes.fill(PageType::Unmapped); 106 main_page_table.attributes.fill(PageType::Unmapped);
107 main_page_table.cached_res_count.fill(0);
87} 108}
88 109
89void MapMemoryRegion(VAddr base, u32 size, u8* target) { 110void MapMemoryRegion(VAddr base, u32 size, u8* target) {
@@ -107,6 +128,28 @@ void UnmapRegion(VAddr base, u32 size) {
107} 128}
108 129
109/** 130/**
131 * Gets a pointer to the exact memory at the virtual address (i.e. not page aligned)
132 * using a VMA from the current process
133 */
134static u8* GetPointerFromVMA(VAddr vaddr) {
135 u8* direct_pointer = nullptr;
136
137 auto& vma = Kernel::g_current_process->vm_manager.FindVMA(vaddr)->second;
138 switch (vma.type) {
139 case Kernel::VMAType::AllocatedMemoryBlock:
140 direct_pointer = vma.backing_block->data() + vma.offset;
141 break;
142 case Kernel::VMAType::BackingMemory:
143 direct_pointer = vma.backing_memory;
144 break;
145 default:
146 UNREACHABLE();
147 }
148
149 return direct_pointer + (vaddr - vma.base);
150}
151
152/**
110 * This function should only be called for virtual addreses with attribute `PageType::Special`. 153 * This function should only be called for virtual addreses with attribute `PageType::Special`.
111 */ 154 */
112static MMIORegionPointer GetMMIOHandler(VAddr vaddr) { 155static MMIORegionPointer GetMMIOHandler(VAddr vaddr) {
@@ -126,6 +169,7 @@ template <typename T>
126T Read(const VAddr vaddr) { 169T Read(const VAddr vaddr) {
127 const u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; 170 const u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS];
128 if (page_pointer) { 171 if (page_pointer) {
172 // NOTE: Avoid adding any extra logic to this fast-path block
129 T value; 173 T value;
130 std::memcpy(&value, &page_pointer[vaddr & PAGE_MASK], sizeof(T)); 174 std::memcpy(&value, &page_pointer[vaddr & PAGE_MASK], sizeof(T));
131 return value; 175 return value;
@@ -139,8 +183,22 @@ T Read(const VAddr vaddr) {
139 case PageType::Memory: 183 case PageType::Memory:
140 ASSERT_MSG(false, "Mapped memory page without a pointer @ %08X", vaddr); 184 ASSERT_MSG(false, "Mapped memory page without a pointer @ %08X", vaddr);
141 break; 185 break;
186 case PageType::RasterizerCachedMemory:
187 {
188 RasterizerFlushRegion(VirtualToPhysicalAddress(vaddr), sizeof(T));
189
190 T value;
191 std::memcpy(&value, GetPointerFromVMA(vaddr), sizeof(T));
192 return value;
193 }
142 case PageType::Special: 194 case PageType::Special:
143 return ReadMMIO<T>(GetMMIOHandler(vaddr), vaddr); 195 return ReadMMIO<T>(GetMMIOHandler(vaddr), vaddr);
196 case PageType::RasterizerCachedSpecial:
197 {
198 RasterizerFlushRegion(VirtualToPhysicalAddress(vaddr), sizeof(T));
199
200 return ReadMMIO<T>(GetMMIOHandler(vaddr), vaddr);
201 }
144 default: 202 default:
145 UNREACHABLE(); 203 UNREACHABLE();
146 } 204 }
@@ -153,6 +211,7 @@ template <typename T>
153void Write(const VAddr vaddr, const T data) { 211void Write(const VAddr vaddr, const T data) {
154 u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; 212 u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS];
155 if (page_pointer) { 213 if (page_pointer) {
214 // NOTE: Avoid adding any extra logic to this fast-path block
156 std::memcpy(&page_pointer[vaddr & PAGE_MASK], &data, sizeof(T)); 215 std::memcpy(&page_pointer[vaddr & PAGE_MASK], &data, sizeof(T));
157 return; 216 return;
158 } 217 }
@@ -165,9 +224,23 @@ void Write(const VAddr vaddr, const T data) {
165 case PageType::Memory: 224 case PageType::Memory:
166 ASSERT_MSG(false, "Mapped memory page without a pointer @ %08X", vaddr); 225 ASSERT_MSG(false, "Mapped memory page without a pointer @ %08X", vaddr);
167 break; 226 break;
227 case PageType::RasterizerCachedMemory:
228 {
229 RasterizerFlushAndInvalidateRegion(VirtualToPhysicalAddress(vaddr), sizeof(T));
230
231 std::memcpy(GetPointerFromVMA(vaddr), &data, sizeof(T));
232 break;
233 }
168 case PageType::Special: 234 case PageType::Special:
169 WriteMMIO<T>(GetMMIOHandler(vaddr), vaddr, data); 235 WriteMMIO<T>(GetMMIOHandler(vaddr), vaddr, data);
170 break; 236 break;
237 case PageType::RasterizerCachedSpecial:
238 {
239 RasterizerFlushAndInvalidateRegion(VirtualToPhysicalAddress(vaddr), sizeof(T));
240
241 WriteMMIO<T>(GetMMIOHandler(vaddr), vaddr, data);
242 break;
243 }
171 default: 244 default:
172 UNREACHABLE(); 245 UNREACHABLE();
173 } 246 }
@@ -179,6 +252,10 @@ u8* GetPointer(const VAddr vaddr) {
179 return page_pointer + (vaddr & PAGE_MASK); 252 return page_pointer + (vaddr & PAGE_MASK);
180 } 253 }
181 254
255 if (current_page_table->attributes[vaddr >> PAGE_BITS] == PageType::RasterizerCachedMemory) {
256 return GetPointerFromVMA(vaddr);
257 }
258
182 LOG_ERROR(HW_Memory, "unknown GetPointer @ 0x%08x", vaddr); 259 LOG_ERROR(HW_Memory, "unknown GetPointer @ 0x%08x", vaddr);
183 return nullptr; 260 return nullptr;
184} 261}
@@ -187,6 +264,69 @@ u8* GetPhysicalPointer(PAddr address) {
187 return GetPointer(PhysicalToVirtualAddress(address)); 264 return GetPointer(PhysicalToVirtualAddress(address));
188} 265}
189 266
267void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) {
268 if (start == 0) {
269 return;
270 }
271
272 u32 num_pages = ((start + size - 1) >> PAGE_BITS) - (start >> PAGE_BITS) + 1;
273 PAddr paddr = start;
274
275 for (unsigned i = 0; i < num_pages; ++i) {
276 VAddr vaddr = PhysicalToVirtualAddress(paddr);
277 u8& res_count = current_page_table->cached_res_count[vaddr >> PAGE_BITS];
278 ASSERT_MSG(count_delta <= UINT8_MAX - res_count, "Rasterizer resource cache counter overflow!");
279 ASSERT_MSG(count_delta >= -res_count, "Rasterizer resource cache counter underflow!");
280
281 // Switch page type to cached if now cached
282 if (res_count == 0) {
283 PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
284 switch (page_type) {
285 case PageType::Memory:
286 page_type = PageType::RasterizerCachedMemory;
287 current_page_table->pointers[vaddr >> PAGE_BITS] = nullptr;
288 break;
289 case PageType::Special:
290 page_type = PageType::RasterizerCachedSpecial;
291 break;
292 default:
293 UNREACHABLE();
294 }
295 }
296
297 res_count += count_delta;
298
299 // Switch page type to uncached if now uncached
300 if (res_count == 0) {
301 PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
302 switch (page_type) {
303 case PageType::RasterizerCachedMemory:
304 page_type = PageType::Memory;
305 current_page_table->pointers[vaddr >> PAGE_BITS] = GetPointerFromVMA(vaddr & ~PAGE_MASK);
306 break;
307 case PageType::RasterizerCachedSpecial:
308 page_type = PageType::Special;
309 break;
310 default:
311 UNREACHABLE();
312 }
313 }
314 paddr += PAGE_SIZE;
315 }
316}
317
318void RasterizerFlushRegion(PAddr start, u32 size) {
319 if (VideoCore::g_renderer != nullptr) {
320 VideoCore::g_renderer->Rasterizer()->FlushRegion(start, size);
321 }
322}
323
324void RasterizerFlushAndInvalidateRegion(PAddr start, u32 size) {
325 if (VideoCore::g_renderer != nullptr) {
326 VideoCore::g_renderer->Rasterizer()->FlushAndInvalidateRegion(start, size);
327 }
328}
329
190u8 Read8(const VAddr addr) { 330u8 Read8(const VAddr addr) {
191 return Read<u8>(addr); 331 return Read<u8>(addr);
192} 332}
diff --git a/src/core/memory.h b/src/core/memory.h
index 5af72b7a7..126d60471 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -100,15 +100,9 @@ enum : VAddr {
100 SHARED_PAGE_SIZE = 0x00001000, 100 SHARED_PAGE_SIZE = 0x00001000,
101 SHARED_PAGE_VADDR_END = SHARED_PAGE_VADDR + SHARED_PAGE_SIZE, 101 SHARED_PAGE_VADDR_END = SHARED_PAGE_VADDR + SHARED_PAGE_SIZE,
102 102
103 // TODO(yuriks): The size of this area is dynamic, the kernel grows
104 // it as more and more threads are created. For now we'll just use a
105 // hardcoded value.
106 /// Area where TLS (Thread-Local Storage) buffers are allocated. 103 /// Area where TLS (Thread-Local Storage) buffers are allocated.
107 TLS_AREA_VADDR = 0x1FF82000, 104 TLS_AREA_VADDR = 0x1FF82000,
108 TLS_ENTRY_SIZE = 0x200, 105 TLS_ENTRY_SIZE = 0x200,
109 TLS_AREA_SIZE = 300 * TLS_ENTRY_SIZE + 0x800, // Space for up to 300 threads + round to page size
110 TLS_AREA_VADDR_END = TLS_AREA_VADDR + TLS_AREA_SIZE,
111
112 106
113 /// Equivalent to LINEAR_HEAP_VADDR, but expanded to cover the extra memory in the New 3DS. 107 /// Equivalent to LINEAR_HEAP_VADDR, but expanded to cover the extra memory in the New 3DS.
114 NEW_LINEAR_HEAP_VADDR = 0x30000000, 108 NEW_LINEAR_HEAP_VADDR = 0x30000000,
@@ -148,4 +142,20 @@ VAddr PhysicalToVirtualAddress(PAddr addr);
148 */ 142 */
149u8* GetPhysicalPointer(PAddr address); 143u8* GetPhysicalPointer(PAddr address);
150 144
145/**
146 * Adds the supplied value to the rasterizer resource cache counter of each
147 * page touching the region.
148 */
149void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta);
150
151/**
152 * Flushes any externally cached rasterizer resources touching the given region.
153 */
154void RasterizerFlushRegion(PAddr start, u32 size);
155
156/**
157 * Flushes and invalidates any externally cached rasterizer resources touching the given region.
158 */
159void RasterizerFlushAndInvalidateRegion(PAddr start, u32 size);
160
151} 161}
diff --git a/src/core/settings.cpp b/src/core/settings.cpp
index 8a14f75aa..77261eafe 100644
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -4,8 +4,27 @@
4 4
5#include "settings.h" 5#include "settings.h"
6 6
7#include "audio_core/audio_core.h"
8
9#include "core/gdbstub/gdbstub.h"
10
11#include "video_core/video_core.h"
12
7namespace Settings { 13namespace Settings {
8 14
9Values values = {}; 15Values values = {};
10 16
17void Apply() {
18
19 GDBStub::SetServerPort(static_cast<u32>(values.gdbstub_port));
20 GDBStub::ToggleServer(values.use_gdbstub);
21
22 VideoCore::g_hw_renderer_enabled = values.use_hw_renderer;
23 VideoCore::g_shader_jit_enabled = values.use_shader_jit;
24 VideoCore::g_scaled_resolution_enabled = values.use_scaled_resolution;
25
26 AudioCore::SelectSink(values.sink_id);
27
11} 28}
29
30} // namespace
diff --git a/src/core/settings.h b/src/core/settings.h
index 4034b795a..a61f25cbe 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -6,7 +6,8 @@
6 6
7#include <string> 7#include <string>
8#include <array> 8#include <array>
9#include <common/file_util.h> 9
10#include "common/common_types.h"
10 11
11namespace Settings { 12namespace Settings {
12 13
@@ -58,6 +59,7 @@ struct Values {
58 // Renderer 59 // Renderer
59 bool use_hw_renderer; 60 bool use_hw_renderer;
60 bool use_shader_jit; 61 bool use_shader_jit;
62 bool use_scaled_resolution;
61 63
62 float bg_red; 64 float bg_red;
63 float bg_green; 65 float bg_green;
@@ -65,9 +67,14 @@ struct Values {
65 67
66 std::string log_filter; 68 std::string log_filter;
67 69
70 // Audio
71 std::string sink_id;
72
68 // Debugging 73 // Debugging
69 bool use_gdbstub; 74 bool use_gdbstub;
70 u16 gdbstub_port; 75 u16 gdbstub_port;
71} extern values; 76} extern values;
72 77
78void Apply();
79
73} 80}
diff --git a/src/core/tracer/recorder.cpp b/src/core/tracer/recorder.cpp
index c6dc35c83..7abaacf70 100644
--- a/src/core/tracer/recorder.cpp
+++ b/src/core/tracer/recorder.cpp
@@ -26,17 +26,17 @@ void Recorder::Finish(const std::string& filename) {
26 // Calculate file offsets 26 // Calculate file offsets
27 auto& initial = header.initial_state_offsets; 27 auto& initial = header.initial_state_offsets;
28 28
29 initial.gpu_registers_size = initial_state.gpu_registers.size(); 29 initial.gpu_registers_size = static_cast<u32>(initial_state.gpu_registers.size());
30 initial.lcd_registers_size = initial_state.lcd_registers.size(); 30 initial.lcd_registers_size = static_cast<u32>(initial_state.lcd_registers.size());
31 initial.pica_registers_size = initial_state.pica_registers.size(); 31 initial.pica_registers_size = static_cast<u32>(initial_state.pica_registers.size());
32 initial.default_attributes_size = initial_state.default_attributes.size(); 32 initial.default_attributes_size = static_cast<u32>(initial_state.default_attributes.size());
33 initial.vs_program_binary_size = initial_state.vs_program_binary.size(); 33 initial.vs_program_binary_size = static_cast<u32>(initial_state.vs_program_binary.size());
34 initial.vs_swizzle_data_size = initial_state.vs_swizzle_data.size(); 34 initial.vs_swizzle_data_size = static_cast<u32>(initial_state.vs_swizzle_data.size());
35 initial.vs_float_uniforms_size = initial_state.vs_float_uniforms.size(); 35 initial.vs_float_uniforms_size = static_cast<u32>(initial_state.vs_float_uniforms.size());
36 initial.gs_program_binary_size = initial_state.gs_program_binary.size(); 36 initial.gs_program_binary_size = static_cast<u32>(initial_state.gs_program_binary.size());
37 initial.gs_swizzle_data_size = initial_state.gs_swizzle_data.size(); 37 initial.gs_swizzle_data_size = static_cast<u32>(initial_state.gs_swizzle_data.size());
38 initial.gs_float_uniforms_size = initial_state.gs_float_uniforms.size(); 38 initial.gs_float_uniforms_size = static_cast<u32>(initial_state.gs_float_uniforms.size());
39 header.stream_size = stream.size(); 39 header.stream_size = static_cast<u32>(stream.size());
40 40
41 initial.gpu_registers = sizeof(header); 41 initial.gpu_registers = sizeof(header);
42 initial.lcd_registers = initial.gpu_registers + initial.gpu_registers_size * sizeof(u32); 42 initial.lcd_registers = initial.gpu_registers + initial.gpu_registers_size * sizeof(u32);
@@ -68,7 +68,7 @@ void Recorder::Finish(const std::string& filename) {
68 DEBUG_ASSERT(stream_element.extra_data.size() == 0); 68 DEBUG_ASSERT(stream_element.extra_data.size() == 0);
69 break; 69 break;
70 } 70 }
71 header.stream_offset += stream_element.extra_data.size(); 71 header.stream_offset += static_cast<u32>(stream_element.extra_data.size());
72 } 72 }
73 73
74 try { 74 try {
diff --git a/src/core/tracer/recorder.h b/src/core/tracer/recorder.h
index a42ccc45f..febf883c8 100644
--- a/src/core/tracer/recorder.h
+++ b/src/core/tracer/recorder.h
@@ -4,6 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <string>
7#include <unordered_map> 8#include <unordered_map>
8#include <vector> 9#include <vector>
9 10
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
new file mode 100644
index 000000000..457c55571
--- /dev/null
+++ b/src/tests/CMakeLists.txt
@@ -0,0 +1,16 @@
1set(SRCS
2 tests.cpp
3 )
4
5set(HEADERS
6 )
7
8create_directory_groups(${SRCS} ${HEADERS})
9
10include_directories(../../externals/catch/single_include/)
11
12add_executable(tests ${SRCS} ${HEADERS})
13target_link_libraries(tests core video_core audio_core common)
14target_link_libraries(tests ${PLATFORM_LIBRARIES})
15
16add_test(NAME tests COMMAND $<TARGET_FILE:tests>)
diff --git a/src/tests/tests.cpp b/src/tests/tests.cpp
new file mode 100644
index 000000000..73978676f
--- /dev/null
+++ b/src/tests/tests.cpp
@@ -0,0 +1,9 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#define CATCH_CONFIG_MAIN
6#include <catch.hpp>
7
8// Catch provides the main function since we've given it the
9// CATCH_CONFIG_MAIN preprocessor directive.
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 76cfd4f7d..581a37897 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -15,7 +15,7 @@ set(SRCS
15 shader/shader.cpp 15 shader/shader.cpp
16 shader/shader_interpreter.cpp 16 shader/shader_interpreter.cpp
17 swrasterizer.cpp 17 swrasterizer.cpp
18 utils.cpp 18 vertex_loader.cpp
19 video_core.cpp 19 video_core.cpp
20 ) 20 )
21 21
@@ -43,6 +43,7 @@ set(HEADERS
43 shader/shader_interpreter.h 43 shader/shader_interpreter.h
44 swrasterizer.h 44 swrasterizer.h
45 utils.h 45 utils.h
46 vertex_loader.h
46 video_core.h 47 video_core.h
47 ) 48 )
48 49
diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp
index 3d503486e..db99ce666 100644
--- a/src/video_core/clipper.cpp
+++ b/src/video_core/clipper.cpp
@@ -2,13 +2,24 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
6#include <array>
7#include <cstddef>
8
5#include <boost/container/static_vector.hpp> 9#include <boost/container/static_vector.hpp>
10#include <boost/container/vector.hpp>
11
12#include "common/bit_field.h"
13#include "common/common_types.h"
14#include "common/logging/log.h"
15#include "common/vector_math.h"
6 16
7#include "video_core/clipper.h" 17#include "video_core/clipper.h"
8#include "video_core/pica.h" 18#include "video_core/pica.h"
9#include "video_core/pica_state.h" 19#include "video_core/pica_state.h"
20#include "video_core/pica_types.h"
10#include "video_core/rasterizer.h" 21#include "video_core/rasterizer.h"
11#include "video_core/shader/shader_interpreter.h" 22#include "video_core/shader/shader.h"
12 23
13namespace Pica { 24namespace Pica {
14 25
@@ -64,8 +75,6 @@ static void InitScreenCoordinates(OutputVertex& vtx)
64 viewport.halfsize_y = float24::FromRaw(regs.viewport_size_y); 75 viewport.halfsize_y = float24::FromRaw(regs.viewport_size_y);
65 viewport.offset_x = float24::FromFloat32(static_cast<float>(regs.viewport_corner.x)); 76 viewport.offset_x = float24::FromFloat32(static_cast<float>(regs.viewport_corner.x));
66 viewport.offset_y = float24::FromFloat32(static_cast<float>(regs.viewport_corner.y)); 77 viewport.offset_y = float24::FromFloat32(static_cast<float>(regs.viewport_corner.y));
67 viewport.zscale = float24::FromRaw(regs.viewport_depth_range);
68 viewport.offset_z = float24::FromRaw(regs.viewport_depth_far_plane);
69 78
70 float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w; 79 float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w;
71 vtx.color *= inv_w; 80 vtx.color *= inv_w;
@@ -78,7 +87,7 @@ static void InitScreenCoordinates(OutputVertex& vtx)
78 87
79 vtx.screenpos[0] = (vtx.pos.x * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x; 88 vtx.screenpos[0] = (vtx.pos.x * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x;
80 vtx.screenpos[1] = (vtx.pos.y * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y; 89 vtx.screenpos[1] = (vtx.pos.y * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y;
81 vtx.screenpos[2] = viewport.offset_z + vtx.pos.z * inv_w * viewport.zscale; 90 vtx.screenpos[2] = vtx.pos.z * inv_w;
82} 91}
83 92
84void ProcessTriangle(const OutputVertex &v0, const OutputVertex &v1, const OutputVertex &v2) { 93void ProcessTriangle(const OutputVertex &v0, const OutputVertex &v1, const OutputVertex &v2) {
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 028b59348..bf4664f9e 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -2,26 +2,32 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cmath> 5#include <array>
6#include <boost/range/algorithm/fill.hpp> 6#include <cstddef>
7#include <memory>
8#include <utility>
7 9
8#include "common/alignment.h" 10#include "common/assert.h"
11#include "common/logging/log.h"
9#include "common/microprofile.h" 12#include "common/microprofile.h"
10#include "common/profiler.h" 13#include "common/vector_math.h"
11 14
12#include "core/settings.h"
13#include "core/hle/service/gsp_gpu.h" 15#include "core/hle/service/gsp_gpu.h"
14#include "core/hw/gpu.h" 16#include "core/hw/gpu.h"
17#include "core/memory.h"
18#include "core/tracer/recorder.h"
15 19
16#include "video_core/clipper.h"
17#include "video_core/command_processor.h" 20#include "video_core/command_processor.h"
21#include "video_core/debug_utils/debug_utils.h"
18#include "video_core/pica.h" 22#include "video_core/pica.h"
19#include "video_core/pica_state.h" 23#include "video_core/pica_state.h"
24#include "video_core/pica_types.h"
20#include "video_core/primitive_assembly.h" 25#include "video_core/primitive_assembly.h"
26#include "video_core/rasterizer_interface.h"
21#include "video_core/renderer_base.h" 27#include "video_core/renderer_base.h"
28#include "video_core/shader/shader.h"
29#include "video_core/vertex_loader.h"
22#include "video_core/video_core.h" 30#include "video_core/video_core.h"
23#include "video_core/debug_utils/debug_utils.h"
24#include "video_core/shader/shader_interpreter.h"
25 31
26namespace Pica { 32namespace Pica {
27 33
@@ -35,8 +41,6 @@ static int default_attr_counter = 0;
35 41
36static u32 default_attr_write_buffer[3]; 42static u32 default_attr_write_buffer[3];
37 43
38Common::Profiling::TimingCategory category_drawing("Drawing");
39
40// Expand a 4-bit mask to 4-byte mask, e.g. 0b0101 -> 0x00FF00FF 44// Expand a 4-bit mask to 4-byte mask, e.g. 0b0101 -> 0x00FF00FF
41static const u32 expand_bits_to_bytes[] = { 45static const u32 expand_bits_to_bytes[] = {
42 0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff, 46 0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff,
@@ -124,7 +128,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
124 128
125 // TODO: Verify that this actually modifies the register! 129 // TODO: Verify that this actually modifies the register!
126 if (setup.index < 15) { 130 if (setup.index < 15) {
127 g_state.vs.default_attributes[setup.index] = attribute; 131 g_state.vs_default_attributes[setup.index] = attribute;
128 setup.index++; 132 setup.index++;
129 } else { 133 } else {
130 // Put each attribute into an immediate input buffer. 134 // Put each attribute into an immediate input buffer.
@@ -140,13 +144,12 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
140 immediate_attribute_id = 0; 144 immediate_attribute_id = 0;
141 145
142 Shader::UnitState<false> shader_unit; 146 Shader::UnitState<false> shader_unit;
143 Shader::Setup(shader_unit); 147 g_state.vs.Setup();
144
145 if (g_debug_context)
146 g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, static_cast<void*>(&immediate_input));
147 148
148 // Send to vertex shader 149 // Send to vertex shader
149 Shader::OutputVertex output = Shader::Run(shader_unit, immediate_input, regs.vs.num_input_attributes+1); 150 if (g_debug_context)
151 g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, static_cast<void*>(&immediate_input));
152 Shader::OutputVertex output = g_state.vs.Run(shader_unit, immediate_input, regs.vs.num_input_attributes+1);
150 153
151 // Send to renderer 154 // Send to renderer
152 using Pica::Shader::OutputVertex; 155 using Pica::Shader::OutputVertex;
@@ -186,60 +189,18 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
186 case PICA_REG_INDEX(trigger_draw): 189 case PICA_REG_INDEX(trigger_draw):
187 case PICA_REG_INDEX(trigger_draw_indexed): 190 case PICA_REG_INDEX(trigger_draw_indexed):
188 { 191 {
189 Common::Profiling::ScopeTimer scope_timer(category_drawing);
190 MICROPROFILE_SCOPE(GPU_Drawing); 192 MICROPROFILE_SCOPE(GPU_Drawing);
191 193
192#if PICA_LOG_TEV 194#if PICA_LOG_TEV
193 DebugUtils::DumpTevStageConfig(regs.GetTevStages()); 195 DebugUtils::DumpTevStageConfig(regs.GetTevStages());
194#endif 196#endif
195
196 if (g_debug_context) 197 if (g_debug_context)
197 g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr); 198 g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr);
198 199
199 const auto& attribute_config = regs.vertex_attributes; 200 // Processes information about internal vertex attributes to figure out how a vertex is loaded.
200 const u32 base_address = attribute_config.GetPhysicalBaseAddress(); 201 // Later, these can be compiled and cached.
201 202 const u32 base_address = regs.vertex_attributes.GetPhysicalBaseAddress();
202 // Information about internal vertex attributes 203 VertexLoader loader(regs);
203 u32 vertex_attribute_sources[16];
204 boost::fill(vertex_attribute_sources, 0xdeadbeef);
205 u32 vertex_attribute_strides[16] = {};
206 Regs::VertexAttributeFormat vertex_attribute_formats[16] = {};
207
208 u32 vertex_attribute_elements[16] = {};
209 u32 vertex_attribute_element_size[16] = {};
210
211 // Setup attribute data from loaders
212 for (int loader = 0; loader < 12; ++loader) {
213 const auto& loader_config = attribute_config.attribute_loaders[loader];
214
215 u32 offset = 0;
216
217 // TODO: What happens if a loader overwrites a previous one's data?
218 for (unsigned component = 0; component < loader_config.component_count; ++component) {
219 if (component >= 12) {
220 LOG_ERROR(HW_GPU, "Overflow in the vertex attribute loader %u trying to load component %u", loader, component);
221 continue;
222 }
223
224 u32 attribute_index = loader_config.GetComponent(component);
225 if (attribute_index < 12) {
226 int element_size = attribute_config.GetElementSizeInBytes(attribute_index);
227 offset = Common::AlignUp(offset, element_size);
228 vertex_attribute_sources[attribute_index] = base_address + loader_config.data_offset + offset;
229 vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count);
230 vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index);
231 vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index);
232 vertex_attribute_element_size[attribute_index] = element_size;
233 offset += attribute_config.GetStride(attribute_index);
234 } else if (attribute_index < 16) {
235 // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively
236 offset = Common::AlignUp(offset, 4);
237 offset += (attribute_index - 11) * 4;
238 } else {
239 UNREACHABLE(); // This is truly unreachable due to the number of bits for each component
240 }
241 }
242 }
243 204
244 // Load vertices 205 // Load vertices
245 bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed)); 206 bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed));
@@ -249,10 +210,6 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
249 const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8); 210 const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8);
250 bool index_u16 = index_info.format != 0; 211 bool index_u16 = index_info.format != 0;
251 212
252#if PICA_DUMP_GEOMETRY
253 DebugUtils::GeometryDumper geometry_dumper;
254 PrimitiveAssembler<DebugUtils::GeometryDumper::Vertex> dumping_primitive_assembler(regs.triangle_topology.Value());
255#endif
256 PrimitiveAssembler<Shader::OutputVertex>& primitive_assembler = g_state.primitive_assembler; 213 PrimitiveAssembler<Shader::OutputVertex>& primitive_assembler = g_state.primitive_assembler;
257 214
258 if (g_debug_context) { 215 if (g_debug_context) {
@@ -267,32 +224,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
267 } 224 }
268 } 225 }
269 226
270 class { 227 DebugUtils::MemoryAccessTracker memory_accesses;
271 /// Combine overlapping and close ranges
272 void SimplifyRanges() {
273 for (auto it = ranges.begin(); it != ranges.end(); ++it) {
274 // NOTE: We add 32 to the range end address to make sure "close" ranges are combined, too
275 auto it2 = std::next(it);
276 while (it2 != ranges.end() && it->first + it->second + 32 >= it2->first) {
277 it->second = std::max(it->second, it2->first + it2->second - it->first);
278 it2 = ranges.erase(it2);
279 }
280 }
281 }
282
283 public:
284 /// Record a particular memory access in the list
285 void AddAccess(u32 paddr, u32 size) {
286 // Create new range or extend existing one
287 ranges[paddr] = std::max(ranges[paddr], size);
288
289 // Simplify ranges...
290 SimplifyRanges();
291 }
292
293 /// Map of accessed ranges (mapping start address to range size)
294 std::map<u32, u32> ranges;
295 } memory_accesses;
296 228
297 // Simple circular-replacement vertex cache 229 // Simple circular-replacement vertex cache
298 // The size has been tuned for optimal balance between hit-rate and the cost of lookup 230 // The size has been tuned for optimal balance between hit-rate and the cost of lookup
@@ -304,7 +236,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
304 vertex_cache_ids.fill(-1); 236 vertex_cache_ids.fill(-1);
305 237
306 Shader::UnitState<false> shader_unit; 238 Shader::UnitState<false> shader_unit;
307 Shader::Setup(shader_unit); 239 g_state.vs.Setup();
308 240
309 for (unsigned int index = 0; index < regs.num_vertices; ++index) 241 for (unsigned int index = 0; index < regs.num_vertices; ++index)
310 { 242 {
@@ -336,71 +268,12 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
336 if (!vertex_cache_hit) { 268 if (!vertex_cache_hit) {
337 // Initialize data for the current vertex 269 // Initialize data for the current vertex
338 Shader::InputVertex input; 270 Shader::InputVertex input;
271 loader.LoadVertex(base_address, index, vertex, input, memory_accesses);
339 272
340 for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) {
341 if (vertex_attribute_elements[i] != 0) {
342 // Default attribute values set if array elements have < 4 components. This
343 // is *not* carried over from the default attribute settings even if they're
344 // enabled for this attribute.
345 static const float24 zero = float24::FromFloat32(0.0f);
346 static const float24 one = float24::FromFloat32(1.0f);
347 input.attr[i] = Math::Vec4<float24>(zero, zero, zero, one);
348
349 // Load per-vertex data from the loader arrays
350 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
351 u32 source_addr = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i];
352 const u8* srcdata = Memory::GetPhysicalPointer(source_addr);
353
354 if (g_debug_context && Pica::g_debug_context->recorder) {
355 memory_accesses.AddAccess(source_addr,
356 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4
357 : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1);
358 }
359
360 const float srcval =
361 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE) ? *reinterpret_cast<const s8*>(srcdata) :
362 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? *reinterpret_cast<const u8*>(srcdata) :
363 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? *reinterpret_cast<const s16*>(srcdata) :
364 *reinterpret_cast<const float*>(srcdata);
365
366 input.attr[i][comp] = float24::FromFloat32(srcval);
367 LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f",
368 comp, i, vertex, index,
369 attribute_config.GetPhysicalBaseAddress(),
370 vertex_attribute_sources[i] - base_address,
371 vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i],
372 input.attr[i][comp].ToFloat32());
373 }
374 } else if (attribute_config.IsDefaultAttribute(i)) {
375 // Load the default attribute if we're configured to do so
376 input.attr[i] = g_state.vs.default_attributes[i];
377 LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)",
378 i, vertex, index,
379 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(),
380 input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
381 } else {
382 // TODO(yuriks): In this case, no data gets loaded and the vertex
383 // remains with the last value it had. This isn't currently maintained
384 // as global state, however, and so won't work in Citra yet.
385 }
386 }
387
388 if (g_debug_context)
389 g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input);
390
391#if PICA_DUMP_GEOMETRY
392 // NOTE: When dumping geometry, we simply assume that the first input attribute
393 // corresponds to the position for now.
394 DebugUtils::GeometryDumper::Vertex dumped_vertex = {
395 input.attr[0][0].ToFloat32(), input.attr[0][1].ToFloat32(), input.attr[0][2].ToFloat32()
396 };
397 using namespace std::placeholders;
398 dumping_primitive_assembler.SubmitVertex(dumped_vertex,
399 std::bind(&DebugUtils::GeometryDumper::AddTriangle,
400 &geometry_dumper, _1, _2, _3));
401#endif
402 // Send to vertex shader 273 // Send to vertex shader
403 output = Shader::Run(shader_unit, input, attribute_config.GetNumTotalAttributes()); 274 if (g_debug_context)
275 g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, (void*)&input);
276 output = g_state.vs.Run(shader_unit, input, loader.GetNumTotalAttributes());
404 277
405 if (is_indexed) { 278 if (is_indexed) {
406 vertex_cache[vertex_cache_pos] = output; 279 vertex_cache[vertex_cache_pos] = output;
@@ -424,10 +297,6 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
424 range.second, range.first); 297 range.second, range.first);
425 } 298 }
426 299
427#if PICA_DUMP_GEOMETRY
428 geometry_dumper.Dump();
429#endif
430
431 break; 300 break;
432 } 301 }
433 302
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index bac6d69c7..871368323 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -4,35 +4,41 @@
4 4
5#include <algorithm> 5#include <algorithm>
6#include <condition_variable> 6#include <condition_variable>
7#include <cstdint>
7#include <cstring> 8#include <cstring>
8#include <fstream> 9#include <fstream>
9#include <list>
10#include <map> 10#include <map>
11#include <mutex> 11#include <mutex>
12#include <stdexcept>
12#include <string> 13#include <string>
13 14
14#ifdef HAVE_PNG 15#ifdef HAVE_PNG
15#include <png.h> 16#include <png.h>
17#include <setjmp.h>
16#endif 18#endif
17 19
20#include <nihstro/bit_field.h>
18#include <nihstro/float24.h> 21#include <nihstro/float24.h>
19#include <nihstro/shader_binary.h> 22#include <nihstro/shader_binary.h>
20 23
21#include "common/assert.h" 24#include "common/assert.h"
25#include "common/bit_field.h"
22#include "common/color.h" 26#include "common/color.h"
23#include "common/common_types.h" 27#include "common/common_types.h"
24#include "common/file_util.h" 28#include "common/file_util.h"
29#include "common/logging/log.h"
25#include "common/math_util.h" 30#include "common/math_util.h"
26#include "common/vector_math.h" 31#include "common/vector_math.h"
27 32
28#include "core/settings.h" 33#include "video_core/debug_utils/debug_utils.h"
29
30#include "video_core/pica.h" 34#include "video_core/pica.h"
31#include "video_core/pica_state.h" 35#include "video_core/pica_state.h"
36#include "video_core/pica_types.h"
37#include "video_core/rasterizer_interface.h"
32#include "video_core/renderer_base.h" 38#include "video_core/renderer_base.h"
39#include "video_core/shader/shader.h"
33#include "video_core/utils.h" 40#include "video_core/utils.h"
34#include "video_core/video_core.h" 41#include "video_core/video_core.h"
35#include "video_core/debug_utils/debug_utils.h"
36 42
37using nihstro::DVLBHeader; 43using nihstro::DVLBHeader;
38using nihstro::DVLEHeader; 44using nihstro::DVLEHeader;
@@ -40,15 +46,12 @@ using nihstro::DVLPHeader;
40 46
41namespace Pica { 47namespace Pica {
42 48
43void DebugContext::OnEvent(Event event, void* data) { 49void DebugContext::DoOnEvent(Event event, void* data) {
44 if (!breakpoints[event].enabled)
45 return;
46
47 { 50 {
48 std::unique_lock<std::mutex> lock(breakpoint_mutex); 51 std::unique_lock<std::mutex> lock(breakpoint_mutex);
49 52
50 // Commit the hardware renderer's framebuffer so it will show on debug widgets 53 // Commit the rasterizer's caches so framebuffers, render targets, etc. will show on debug widgets
51 VideoCore::g_renderer->Rasterizer()->FlushFramebuffer(); 54 VideoCore::g_renderer->Rasterizer()->FlushAll();
52 55
53 // TODO: Should stop the CPU thread here once we multithread emulation. 56 // TODO: Should stop the CPU thread here once we multithread emulation.
54 57
@@ -85,35 +88,6 @@ std::shared_ptr<DebugContext> g_debug_context; // TODO: Get rid of this global
85 88
86namespace DebugUtils { 89namespace DebugUtils {
87 90
88void GeometryDumper::AddTriangle(Vertex& v0, Vertex& v1, Vertex& v2) {
89 vertices.push_back(v0);
90 vertices.push_back(v1);
91 vertices.push_back(v2);
92
93 int num_vertices = (int)vertices.size();
94 faces.push_back({{ num_vertices-3, num_vertices-2, num_vertices-1 }});
95}
96
97void GeometryDumper::Dump() {
98 static int index = 0;
99 std::string filename = std::string("geometry_dump") + std::to_string(++index) + ".obj";
100
101 std::ofstream file(filename);
102
103 for (const auto& vertex : vertices) {
104 file << "v " << vertex.pos[0]
105 << " " << vertex.pos[1]
106 << " " << vertex.pos[2] << std::endl;
107 }
108
109 for (const Face& face : faces) {
110 file << "f " << 1+face.index[0]
111 << " " << 1+face.index[1]
112 << " " << 1+face.index[2] << std::endl;
113 }
114}
115
116
117void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, const Shader::ShaderSetup& setup, const Regs::VSOutputAttributes* output_attributes) 91void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, const Shader::ShaderSetup& setup, const Regs::VSOutputAttributes* output_attributes)
118{ 92{
119 struct StuffToWrite { 93 struct StuffToWrite {
@@ -234,11 +208,12 @@ void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, c
234 208
235 // TODO: Reduce the amount of binary code written to relevant portions 209 // TODO: Reduce the amount of binary code written to relevant portions
236 dvlp.binary_offset = write_offset - dvlp_offset; 210 dvlp.binary_offset = write_offset - dvlp_offset;
237 dvlp.binary_size_words = setup.program_code.size(); 211 dvlp.binary_size_words = static_cast<uint32_t>(setup.program_code.size());
238 QueueForWriting(reinterpret_cast<const u8*>(setup.program_code.data()), setup.program_code.size() * sizeof(u32)); 212 QueueForWriting(reinterpret_cast<const u8*>(setup.program_code.data()),
213 static_cast<u32>(setup.program_code.size()) * sizeof(u32));
239 214
240 dvlp.swizzle_info_offset = write_offset - dvlp_offset; 215 dvlp.swizzle_info_offset = write_offset - dvlp_offset;
241 dvlp.swizzle_info_num_entries = setup.swizzle_data.size(); 216 dvlp.swizzle_info_num_entries = static_cast<uint32_t>(setup.swizzle_data.size());
242 u32 dummy = 0; 217 u32 dummy = 0;
243 for (unsigned int i = 0; i < setup.swizzle_data.size(); ++i) { 218 for (unsigned int i = 0; i < setup.swizzle_data.size(); ++i) {
244 QueueForWriting(reinterpret_cast<const u8*>(&setup.swizzle_data[i]), sizeof(setup.swizzle_data[i])); 219 QueueForWriting(reinterpret_cast<const u8*>(&setup.swizzle_data[i]), sizeof(setup.swizzle_data[i]));
@@ -290,7 +265,7 @@ void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, c
290 constant_table.emplace_back(constant); 265 constant_table.emplace_back(constant);
291 } 266 }
292 dvle.constant_table_offset = write_offset - dvlb.dvle_offset; 267 dvle.constant_table_offset = write_offset - dvlb.dvle_offset;
293 dvle.constant_table_size = constant_table.size(); 268 dvle.constant_table_size = static_cast<uint32_t>(constant_table.size());
294 for (const auto& constant : constant_table) { 269 for (const auto& constant : constant_table) {
295 QueueForWriting(reinterpret_cast<const u8*>(&constant), sizeof(constant)); 270 QueueForWriting(reinterpret_cast<const u8*>(&constant), sizeof(constant));
296 } 271 }
@@ -315,7 +290,7 @@ void StartPicaTracing()
315 } 290 }
316 291
317 std::lock_guard<std::mutex> lock(pica_trace_mutex); 292 std::lock_guard<std::mutex> lock(pica_trace_mutex);
318 pica_trace = std::unique_ptr<PicaTrace>(new PicaTrace); 293 pica_trace = std::make_unique<PicaTrace>();
319 294
320 is_pica_tracing = true; 295 is_pica_tracing = true;
321} 296}
@@ -615,6 +590,21 @@ TextureInfo TextureInfo::FromPicaRegister(const Regs::TextureConfig& config,
615 return info; 590 return info;
616} 591}
617 592
593#ifdef HAVE_PNG
594// Adapter functions to libpng to write/flush to File::IOFile instances.
595static void WriteIOFile(png_structp png_ptr, png_bytep data, png_size_t length) {
596 auto* fp = static_cast<FileUtil::IOFile*>(png_get_io_ptr(png_ptr));
597 if (!fp->WriteBytes(data, length))
598 png_error(png_ptr, "Failed to write to output PNG file.");
599}
600
601static void FlushIOFile(png_structp png_ptr) {
602 auto* fp = static_cast<FileUtil::IOFile*>(png_get_io_ptr(png_ptr));
603 if (!fp->Flush())
604 png_error(png_ptr, "Failed to flush to output PNG file.");
605}
606#endif
607
618void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) { 608void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) {
619#ifndef HAVE_PNG 609#ifndef HAVE_PNG
620 return; 610 return;
@@ -658,7 +648,7 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) {
658 goto finalise; 648 goto finalise;
659 } 649 }
660 650
661 png_init_io(png_ptr, fp.GetHandle()); 651 png_set_write_fn(png_ptr, static_cast<void*>(&fp), WriteIOFile, FlushIOFile);
662 652
663 // Write header (8 bit color depth) 653 // Write header (8 bit color depth)
664 png_set_IHDR(png_ptr, info_ptr, texture_config.width, texture_config.height, 654 png_set_IHDR(png_ptr, info_ptr, texture_config.width, texture_config.height,
@@ -706,106 +696,125 @@ finalise:
706#endif 696#endif
707} 697}
708 698
709void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages) 699static std::string ReplacePattern(const std::string& input, const std::string& pattern, const std::string& replacement) {
710{ 700 size_t start = input.find(pattern);
701 if (start == std::string::npos)
702 return input;
703
704 std::string ret = input;
705 ret.replace(start, pattern.length(), replacement);
706 return ret;
707}
708
709static std::string GetTevStageConfigSourceString(const Pica::Regs::TevStageConfig::Source& source) {
711 using Source = Pica::Regs::TevStageConfig::Source; 710 using Source = Pica::Regs::TevStageConfig::Source;
711 static const std::map<Source, std::string> source_map = {
712 { Source::PrimaryColor, "PrimaryColor" },
713 { Source::PrimaryFragmentColor, "PrimaryFragmentColor" },
714 { Source::SecondaryFragmentColor, "SecondaryFragmentColor" },
715 { Source::Texture0, "Texture0" },
716 { Source::Texture1, "Texture1" },
717 { Source::Texture2, "Texture2" },
718 { Source::Texture3, "Texture3" },
719 { Source::PreviousBuffer, "PreviousBuffer" },
720 { Source::Constant, "Constant" },
721 { Source::Previous, "Previous" },
722 };
723
724 const auto src_it = source_map.find(source);
725 if (src_it == source_map.end())
726 return "Unknown";
727
728 return src_it->second;
729}
730
731static std::string GetTevStageConfigColorSourceString(const Pica::Regs::TevStageConfig::Source& source, const Pica::Regs::TevStageConfig::ColorModifier modifier) {
712 using ColorModifier = Pica::Regs::TevStageConfig::ColorModifier; 732 using ColorModifier = Pica::Regs::TevStageConfig::ColorModifier;
733 static const std::map<ColorModifier, std::string> color_modifier_map = {
734 { ColorModifier::SourceColor, "%source.rgb" },
735 { ColorModifier::OneMinusSourceColor, "(1.0 - %source.rgb)" },
736 { ColorModifier::SourceAlpha, "%source.aaa" },
737 { ColorModifier::OneMinusSourceAlpha, "(1.0 - %source.aaa)" },
738 { ColorModifier::SourceRed, "%source.rrr" },
739 { ColorModifier::OneMinusSourceRed, "(1.0 - %source.rrr)" },
740 { ColorModifier::SourceGreen, "%source.ggg" },
741 { ColorModifier::OneMinusSourceGreen, "(1.0 - %source.ggg)" },
742 { ColorModifier::SourceBlue, "%source.bbb" },
743 { ColorModifier::OneMinusSourceBlue, "(1.0 - %source.bbb)" },
744 };
745
746 auto src_str = GetTevStageConfigSourceString(source);
747 auto modifier_it = color_modifier_map.find(modifier);
748 std::string modifier_str = "%source.????";
749 if (modifier_it != color_modifier_map.end())
750 modifier_str = modifier_it->second;
751
752 return ReplacePattern(modifier_str, "%source", src_str);
753}
754
755static std::string GetTevStageConfigAlphaSourceString(const Pica::Regs::TevStageConfig::Source& source, const Pica::Regs::TevStageConfig::AlphaModifier modifier) {
713 using AlphaModifier = Pica::Regs::TevStageConfig::AlphaModifier; 756 using AlphaModifier = Pica::Regs::TevStageConfig::AlphaModifier;
757 static const std::map<AlphaModifier, std::string> alpha_modifier_map = {
758 { AlphaModifier::SourceAlpha, "%source.a" },
759 { AlphaModifier::OneMinusSourceAlpha, "(1.0 - %source.a)" },
760 { AlphaModifier::SourceRed, "%source.r" },
761 { AlphaModifier::OneMinusSourceRed, "(1.0 - %source.r)" },
762 { AlphaModifier::SourceGreen, "%source.g" },
763 { AlphaModifier::OneMinusSourceGreen, "(1.0 - %source.g)" },
764 { AlphaModifier::SourceBlue, "%source.b" },
765 { AlphaModifier::OneMinusSourceBlue, "(1.0 - %source.b)" },
766 };
767
768 auto src_str = GetTevStageConfigSourceString(source);
769 auto modifier_it = alpha_modifier_map.find(modifier);
770 std::string modifier_str = "%source.????";
771 if (modifier_it != alpha_modifier_map.end())
772 modifier_str = modifier_it->second;
773
774 return ReplacePattern(modifier_str, "%source", src_str);
775}
776
777static std::string GetTevStageConfigOperationString(const Pica::Regs::TevStageConfig::Operation& operation) {
714 using Operation = Pica::Regs::TevStageConfig::Operation; 778 using Operation = Pica::Regs::TevStageConfig::Operation;
779 static const std::map<Operation, std::string> combiner_map = {
780 { Operation::Replace, "%source1" },
781 { Operation::Modulate, "(%source1 * %source2)" },
782 { Operation::Add, "(%source1 + %source2)" },
783 { Operation::AddSigned, "(%source1 + %source2) - 0.5" },
784 { Operation::Lerp, "lerp(%source1, %source2, %source3)" },
785 { Operation::Subtract, "(%source1 - %source2)" },
786 { Operation::Dot3_RGB, "dot(%source1, %source2)" },
787 { Operation::MultiplyThenAdd, "((%source1 * %source2) + %source3)" },
788 { Operation::AddThenMultiply, "((%source1 + %source2) * %source3)" },
789 };
715 790
716 std::string stage_info = "Tev setup:\n"; 791 const auto op_it = combiner_map.find(operation);
717 for (size_t index = 0; index < stages.size(); ++index) { 792 if (op_it == combiner_map.end())
718 const auto& tev_stage = stages[index]; 793 return "Unknown op (%source1, %source2, %source3)";
719 794
720 static const std::map<Source, std::string> source_map = { 795 return op_it->second;
721 { Source::PrimaryColor, "PrimaryColor" }, 796}
722 { Source::Texture0, "Texture0" },
723 { Source::Texture1, "Texture1" },
724 { Source::Texture2, "Texture2" },
725 { Source::Constant, "Constant" },
726 { Source::Previous, "Previous" },
727 };
728 797
729 static const std::map<ColorModifier, std::string> color_modifier_map = { 798std::string GetTevStageConfigColorCombinerString(const Pica::Regs::TevStageConfig& tev_stage) {
730 { ColorModifier::SourceColor, { "%source.rgb" } }, 799 auto op_str = GetTevStageConfigOperationString(tev_stage.color_op);
731 { ColorModifier::SourceAlpha, { "%source.aaa" } }, 800 op_str = ReplacePattern(op_str, "%source1", GetTevStageConfigColorSourceString(tev_stage.color_source1, tev_stage.color_modifier1));
732 }; 801 op_str = ReplacePattern(op_str, "%source2", GetTevStageConfigColorSourceString(tev_stage.color_source2, tev_stage.color_modifier2));
733 static const std::map<AlphaModifier, std::string> alpha_modifier_map = { 802 return ReplacePattern(op_str, "%source3", GetTevStageConfigColorSourceString(tev_stage.color_source3, tev_stage.color_modifier3));
734 { AlphaModifier::SourceAlpha, "%source.a" }, 803}
735 { AlphaModifier::OneMinusSourceAlpha, "(255 - %source.a)" },
736 };
737 804
738 static const std::map<Operation, std::string> combiner_map = { 805std::string GetTevStageConfigAlphaCombinerString(const Pica::Regs::TevStageConfig& tev_stage) {
739 { Operation::Replace, "%source1" }, 806 auto op_str = GetTevStageConfigOperationString(tev_stage.alpha_op);
740 { Operation::Modulate, "(%source1 * %source2) / 255" }, 807 op_str = ReplacePattern(op_str, "%source1", GetTevStageConfigAlphaSourceString(tev_stage.alpha_source1, tev_stage.alpha_modifier1));
741 { Operation::Add, "(%source1 + %source2)" }, 808 op_str = ReplacePattern(op_str, "%source2", GetTevStageConfigAlphaSourceString(tev_stage.alpha_source2, tev_stage.alpha_modifier2));
742 { Operation::Lerp, "lerp(%source1, %source2, %source3)" }, 809 return ReplacePattern(op_str, "%source3", GetTevStageConfigAlphaSourceString(tev_stage.alpha_source3, tev_stage.alpha_modifier3));
743 }; 810}
744 811
745 static auto ReplacePattern = 812void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig, 6>& stages) {
746 [](const std::string& input, const std::string& pattern, const std::string& replacement) -> std::string { 813 std::string stage_info = "Tev setup:\n";
747 size_t start = input.find(pattern); 814 for (size_t index = 0; index < stages.size(); ++index) {
748 if (start == std::string::npos) 815 const auto& tev_stage = stages[index];
749 return input; 816 stage_info += "Stage " + std::to_string(index) + ": " + GetTevStageConfigColorCombinerString(tev_stage) + " " + GetTevStageConfigAlphaCombinerString(tev_stage) + "\n";
750
751 std::string ret = input;
752 ret.replace(start, pattern.length(), replacement);
753 return ret;
754 };
755 static auto GetColorSourceStr =
756 [](const Source& src, const ColorModifier& modifier) {
757 auto src_it = source_map.find(src);
758 std::string src_str = "Unknown";
759 if (src_it != source_map.end())
760 src_str = src_it->second;
761
762 auto modifier_it = color_modifier_map.find(modifier);
763 std::string modifier_str = "%source.????";
764 if (modifier_it != color_modifier_map.end())
765 modifier_str = modifier_it->second;
766
767 return ReplacePattern(modifier_str, "%source", src_str);
768 };
769 static auto GetColorCombinerStr =
770 [](const Regs::TevStageConfig& tev_stage) {
771 auto op_it = combiner_map.find(tev_stage.color_op);
772 std::string op_str = "Unknown op (%source1, %source2, %source3)";
773 if (op_it != combiner_map.end())
774 op_str = op_it->second;
775
776 op_str = ReplacePattern(op_str, "%source1", GetColorSourceStr(tev_stage.color_source1, tev_stage.color_modifier1));
777 op_str = ReplacePattern(op_str, "%source2", GetColorSourceStr(tev_stage.color_source2, tev_stage.color_modifier2));
778 return ReplacePattern(op_str, "%source3", GetColorSourceStr(tev_stage.color_source3, tev_stage.color_modifier3));
779 };
780 static auto GetAlphaSourceStr =
781 [](const Source& src, const AlphaModifier& modifier) {
782 auto src_it = source_map.find(src);
783 std::string src_str = "Unknown";
784 if (src_it != source_map.end())
785 src_str = src_it->second;
786
787 auto modifier_it = alpha_modifier_map.find(modifier);
788 std::string modifier_str = "%source.????";
789 if (modifier_it != alpha_modifier_map.end())
790 modifier_str = modifier_it->second;
791
792 return ReplacePattern(modifier_str, "%source", src_str);
793 };
794 static auto GetAlphaCombinerStr =
795 [](const Regs::TevStageConfig& tev_stage) {
796 auto op_it = combiner_map.find(tev_stage.alpha_op);
797 std::string op_str = "Unknown op (%source1, %source2, %source3)";
798 if (op_it != combiner_map.end())
799 op_str = op_it->second;
800
801 op_str = ReplacePattern(op_str, "%source1", GetAlphaSourceStr(tev_stage.alpha_source1, tev_stage.alpha_modifier1));
802 op_str = ReplacePattern(op_str, "%source2", GetAlphaSourceStr(tev_stage.alpha_source2, tev_stage.alpha_modifier2));
803 return ReplacePattern(op_str, "%source3", GetAlphaSourceStr(tev_stage.alpha_source3, tev_stage.alpha_modifier3));
804 };
805
806 stage_info += "Stage " + std::to_string(index) + ": " + GetColorCombinerStr(tev_stage) + " " + GetAlphaCombinerStr(tev_stage) + "\n";
807 } 817 }
808
809 LOG_TRACE(HW_GPU, "%s", stage_info.c_str()); 818 LOG_TRACE(HW_GPU, "%s", stage_info.c_str());
810} 819}
811 820
diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h
index 795160a32..92e9734ae 100644
--- a/src/video_core/debug_utils/debug_utils.h
+++ b/src/video_core/debug_utils/debug_utils.h
@@ -4,23 +4,33 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <algorithm>
7#include <array> 8#include <array>
8#include <condition_variable> 9#include <condition_variable>
10#include <iterator>
9#include <list> 11#include <list>
10#include <map> 12#include <map>
11#include <memory> 13#include <memory>
12#include <mutex> 14#include <mutex>
15#include <string>
16#include <utility>
13#include <vector> 17#include <vector>
14 18
19#include "common/common_types.h"
15#include "common/vector_math.h" 20#include "common/vector_math.h"
16 21
17#include "core/tracer/recorder.h"
18
19#include "video_core/pica.h" 22#include "video_core/pica.h"
20#include "video_core/shader/shader.h" 23
24namespace CiTrace {
25class Recorder;
26}
21 27
22namespace Pica { 28namespace Pica {
23 29
30namespace Shader {
31struct ShaderSetup;
32}
33
24class DebugContext { 34class DebugContext {
25public: 35public:
26 enum class Event { 36 enum class Event {
@@ -30,7 +40,7 @@ public:
30 PicaCommandProcessed, 40 PicaCommandProcessed,
31 IncomingPrimitiveBatch, 41 IncomingPrimitiveBatch,
32 FinishedPrimitiveBatch, 42 FinishedPrimitiveBatch,
33 VertexLoaded, 43 VertexShaderInvocation,
34 IncomingDisplayTransfer, 44 IncomingDisplayTransfer,
35 GSPCommandProcessed, 45 GSPCommandProcessed,
36 BufferSwapped, 46 BufferSwapped,
@@ -114,7 +124,15 @@ public:
114 * @param event Event which has happened 124 * @param event Event which has happened
115 * @param data Optional data pointer (pass nullptr if unused). Needs to remain valid until Resume() is called. 125 * @param data Optional data pointer (pass nullptr if unused). Needs to remain valid until Resume() is called.
116 */ 126 */
117 void OnEvent(Event event, void* data); 127 void OnEvent(Event event, void* data) {
128 // This check is left in the header to allow the compiler to inline it.
129 if (!breakpoints[(int)event].enabled)
130 return;
131 // For the rest of event handling, call a separate function.
132 DoOnEvent(event, data);
133 }
134
135 void DoOnEvent(Event event, void *data);
118 136
119 /** 137 /**
120 * Resume from the current breakpoint. 138 * Resume from the current breakpoint.
@@ -126,12 +144,14 @@ public:
126 * Delete all set breakpoints and resume emulation. 144 * Delete all set breakpoints and resume emulation.
127 */ 145 */
128 void ClearBreakpoints() { 146 void ClearBreakpoints() {
129 breakpoints.clear(); 147 for (auto &bp : breakpoints) {
148 bp.enabled = false;
149 }
130 Resume(); 150 Resume();
131 } 151 }
132 152
133 // TODO: Evaluate if access to these members should be hidden behind a public interface. 153 // TODO: Evaluate if access to these members should be hidden behind a public interface.
134 std::map<Event, BreakPoint> breakpoints; 154 std::array<BreakPoint, (int)Event::NumEvents> breakpoints;
135 Event active_breakpoint; 155 Event active_breakpoint;
136 bool at_breakpoint = false; 156 bool at_breakpoint = false;
137 157
@@ -158,30 +178,9 @@ extern std::shared_ptr<DebugContext> g_debug_context; // TODO: Get rid of this g
158 178
159namespace DebugUtils { 179namespace DebugUtils {
160 180
161#define PICA_DUMP_GEOMETRY 0
162#define PICA_DUMP_TEXTURES 0 181#define PICA_DUMP_TEXTURES 0
163#define PICA_LOG_TEV 0 182#define PICA_LOG_TEV 0
164 183
165// Simple utility class for dumping geometry data to an OBJ file
166class GeometryDumper {
167public:
168 struct Vertex {
169 std::array<float,3> pos;
170 };
171
172 void AddTriangle(Vertex& v0, Vertex& v1, Vertex& v2);
173
174 void Dump();
175
176private:
177 struct Face {
178 int index[3];
179 };
180
181 std::vector<Vertex> vertices;
182 std::vector<Face> faces;
183};
184
185void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, 184void DumpShader(const std::string& filename, const Regs::ShaderConfig& config,
186 const Shader::ShaderSetup& setup, const Regs::VSOutputAttributes* output_attributes); 185 const Shader::ShaderSetup& setup, const Regs::VSOutputAttributes* output_attributes);
187 186
@@ -225,7 +224,41 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int s, int t, const Texture
225 224
226void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data); 225void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data);
227 226
228void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages); 227std::string GetTevStageConfigColorCombinerString(const Pica::Regs::TevStageConfig& tev_stage);
228std::string GetTevStageConfigAlphaCombinerString(const Pica::Regs::TevStageConfig& tev_stage);
229
230/// Dumps the Tev stage config to log at trace level
231void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig, 6>& stages);
232
233/**
234 * Used in the vertex loader to merge access records. TODO: Investigate if actually useful.
235 */
236class MemoryAccessTracker {
237 /// Combine overlapping and close ranges
238 void SimplifyRanges() {
239 for (auto it = ranges.begin(); it != ranges.end(); ++it) {
240 // NOTE: We add 32 to the range end address to make sure "close" ranges are combined, too
241 auto it2 = std::next(it);
242 while (it2 != ranges.end() && it->first + it->second + 32 >= it2->first) {
243 it->second = std::max(it->second, it2->first + it2->second - it->first);
244 it2 = ranges.erase(it2);
245 }
246 }
247 }
248
249public:
250 /// Record a particular memory access in the list
251 void AddAccess(u32 paddr, u32 size) {
252 // Create new range or extend existing one
253 ranges[paddr] = std::max(ranges[paddr], size);
254
255 // Simplify ranges...
256 SimplifyRanges();
257 }
258
259 /// Map of accessed ranges (mapping start address to range size)
260 std::map<u32, u32> ranges;
261};
229 262
230} // namespace 263} // namespace
231 264
diff --git a/src/video_core/pica.cpp b/src/video_core/pica.cpp
index ccbaf071b..ec78f9593 100644
--- a/src/video_core/pica.cpp
+++ b/src/video_core/pica.cpp
@@ -3,10 +3,13 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cstring> 5#include <cstring>
6#include <iterator>
6#include <unordered_map> 7#include <unordered_map>
8#include <utility>
7 9
8#include "video_core/pica.h" 10#include "video_core/pica.h"
9#include "video_core/pica_state.h" 11#include "video_core/pica_state.h"
12#include "video_core/primitive_assembly.h"
10#include "video_core/shader/shader.h" 13#include "video_core/shader/shader.h"
11 14
12namespace Pica { 15namespace Pica {
@@ -480,7 +483,7 @@ std::string Regs::GetCommandName(int index) {
480 static std::unordered_map<u32, const char*> map; 483 static std::unordered_map<u32, const char*> map;
481 484
482 if (map.empty()) { 485 if (map.empty()) {
483 map.insert(begin(register_names), end(register_names)); 486 map.insert(std::begin(register_names), std::end(register_names));
484 } 487 }
485 488
486 // Return empty string if no match is found 489 // Return empty string if no match is found
@@ -497,7 +500,7 @@ void Init() {
497} 500}
498 501
499void Shutdown() { 502void Shutdown() {
500 Shader::Shutdown(); 503 Shader::ClearCache();
501} 504}
502 505
503template <typename T> 506template <typename T>
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 16f9e4006..86c0a0096 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -5,10 +5,13 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <cmath>
9#include <cstddef> 8#include <cstddef>
10#include <string> 9#include <string>
11 10
11#ifndef _MSC_VER
12#include <type_traits> // for std::enable_if
13#endif
14
12#include "common/assert.h" 15#include "common/assert.h"
13#include "common/bit_field.h" 16#include "common/bit_field.h"
14#include "common/common_funcs.h" 17#include "common/common_funcs.h"
@@ -16,8 +19,6 @@
16#include "common/vector_math.h" 19#include "common/vector_math.h"
17#include "common/logging/log.h" 20#include "common/logging/log.h"
18 21
19#include "pica_types.h"
20
21namespace Pica { 22namespace Pica {
22 23
23// Returns index corresponding to the Regs member labeled by field_name 24// Returns index corresponding to the Regs member labeled by field_name
@@ -69,7 +70,7 @@ struct Regs {
69 INSERT_PADDING_WORDS(0x9); 70 INSERT_PADDING_WORDS(0x9);
70 71
71 BitField<0, 24, u32> viewport_depth_range; // float24 72 BitField<0, 24, u32> viewport_depth_range; // float24
72 BitField<0, 24, u32> viewport_depth_far_plane; // float24 73 BitField<0, 24, u32> viewport_depth_near_plane; // float24
73 74
74 BitField<0, 3, u32> vs_output_total; 75 BitField<0, 3, u32> vs_output_total;
75 76
@@ -121,9 +122,31 @@ struct Regs {
121 BitField<16, 10, s32> y; 122 BitField<16, 10, s32> y;
122 } viewport_corner; 123 } viewport_corner;
123 124
124 INSERT_PADDING_WORDS(0x17); 125 INSERT_PADDING_WORDS(0x1);
126
127 //TODO: early depth
128 INSERT_PADDING_WORDS(0x1);
129
130 INSERT_PADDING_WORDS(0x2);
131
132 enum DepthBuffering : u32 {
133 WBuffering = 0,
134 ZBuffering = 1,
135 };
136 BitField< 0, 1, DepthBuffering> depthmap_enable;
137
138 INSERT_PADDING_WORDS(0x12);
125 139
126 struct TextureConfig { 140 struct TextureConfig {
141 enum TextureType : u32 {
142 Texture2D = 0,
143 TextureCube = 1,
144 Shadow2D = 2,
145 Projection2D = 3,
146 ShadowCube = 4,
147 Disabled = 5,
148 };
149
127 enum WrapMode : u32 { 150 enum WrapMode : u32 {
128 ClampToEdge = 0, 151 ClampToEdge = 0,
129 ClampToBorder = 1, 152 ClampToBorder = 1,
@@ -154,6 +177,7 @@ struct Regs {
154 BitField< 2, 1, TextureFilter> min_filter; 177 BitField< 2, 1, TextureFilter> min_filter;
155 BitField< 8, 2, WrapMode> wrap_t; 178 BitField< 8, 2, WrapMode> wrap_t;
156 BitField<12, 2, WrapMode> wrap_s; 179 BitField<12, 2, WrapMode> wrap_s;
180 BitField<28, 2, TextureType> type; ///< @note Only valid for texture 0 according to 3DBrew.
157 }; 181 };
158 182
159 INSERT_PADDING_WORDS(0x1); 183 INSERT_PADDING_WORDS(0x1);
@@ -577,8 +601,18 @@ struct Regs {
577 } 601 }
578 } 602 }
579 603
580 struct { 604 struct FramebufferConfig {
581 INSERT_PADDING_WORDS(0x6); 605 INSERT_PADDING_WORDS(0x3);
606
607 union {
608 BitField<0, 4, u32> allow_color_write; // 0 = disable, else enable
609 };
610
611 INSERT_PADDING_WORDS(0x1);
612
613 union {
614 BitField<0, 2, u32> allow_depth_stencil_write; // 0 = disable, else enable
615 };
582 616
583 DepthFormat depth_format; // TODO: Should be a BitField! 617 DepthFormat depth_format; // TODO: Should be a BitField!
584 BitField<16, 3, ColorFormat> color_format; 618 BitField<16, 3, ColorFormat> color_format;
@@ -737,8 +771,13 @@ struct Regs {
737 case LightingSampler::ReflectGreen: 771 case LightingSampler::ReflectGreen:
738 case LightingSampler::ReflectBlue: 772 case LightingSampler::ReflectBlue:
739 return (config == LightingConfig::Config4) || (config == LightingConfig::Config5) || (config == LightingConfig::Config7); 773 return (config == LightingConfig::Config4) || (config == LightingConfig::Config5) || (config == LightingConfig::Config7);
774 default:
775 UNREACHABLE_MSG("Regs::IsLightingSamplerSupported: Reached "
776 "unreachable section, sampler should be one "
777 "of Distribution0, Distribution1, Fresnel, "
778 "ReflectRed, ReflectGreen or ReflectBlue, instead "
779 "got %i", static_cast<int>(config));
740 } 780 }
741 return false;
742 } 781 }
743 782
744 struct { 783 struct {
@@ -1263,10 +1302,11 @@ ASSERT_REG_POSITION(cull_mode, 0x40);
1263ASSERT_REG_POSITION(viewport_size_x, 0x41); 1302ASSERT_REG_POSITION(viewport_size_x, 0x41);
1264ASSERT_REG_POSITION(viewport_size_y, 0x43); 1303ASSERT_REG_POSITION(viewport_size_y, 0x43);
1265ASSERT_REG_POSITION(viewport_depth_range, 0x4d); 1304ASSERT_REG_POSITION(viewport_depth_range, 0x4d);
1266ASSERT_REG_POSITION(viewport_depth_far_plane, 0x4e); 1305ASSERT_REG_POSITION(viewport_depth_near_plane, 0x4e);
1267ASSERT_REG_POSITION(vs_output_attributes[0], 0x50); 1306ASSERT_REG_POSITION(vs_output_attributes[0], 0x50);
1268ASSERT_REG_POSITION(vs_output_attributes[1], 0x51); 1307ASSERT_REG_POSITION(vs_output_attributes[1], 0x51);
1269ASSERT_REG_POSITION(viewport_corner, 0x68); 1308ASSERT_REG_POSITION(viewport_corner, 0x68);
1309ASSERT_REG_POSITION(depthmap_enable, 0x6D);
1270ASSERT_REG_POSITION(texture0_enable, 0x80); 1310ASSERT_REG_POSITION(texture0_enable, 0x80);
1271ASSERT_REG_POSITION(texture0, 0x81); 1311ASSERT_REG_POSITION(texture0, 0x81);
1272ASSERT_REG_POSITION(texture0_format, 0x8e); 1312ASSERT_REG_POSITION(texture0_format, 0x8e);
diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h
index 323290054..495174c25 100644
--- a/src/video_core/pica_state.h
+++ b/src/video_core/pica_state.h
@@ -4,6 +4,11 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8
9#include "common/bit_field.h"
10#include "common/common_types.h"
11
7#include "video_core/pica.h" 12#include "video_core/pica.h"
8#include "video_core/primitive_assembly.h" 13#include "video_core/primitive_assembly.h"
9#include "video_core/shader/shader.h" 14#include "video_core/shader/shader.h"
@@ -20,6 +25,8 @@ struct State {
20 Shader::ShaderSetup vs; 25 Shader::ShaderSetup vs;
21 Shader::ShaderSetup gs; 26 Shader::ShaderSetup gs;
22 27
28 std::array<Math::Vec4<float24>, 16> vs_default_attributes;
29
23 struct { 30 struct {
24 union LutEntry { 31 union LutEntry {
25 // Used for raw access 32 // Used for raw access
@@ -51,7 +58,7 @@ struct State {
51 // Used to buffer partial vertices for immediate-mode rendering. 58 // Used to buffer partial vertices for immediate-mode rendering.
52 Shader::InputVertex input_vertex; 59 Shader::InputVertex input_vertex;
53 // Index of the next attribute to be loaded into `input_vertex`. 60 // Index of the next attribute to be loaded into `input_vertex`.
54 int current_attribute = 0; 61 u32 current_attribute = 0;
55 } immediate; 62 } immediate;
56 63
57 // This is constructed with a dummy triangle topology 64 // This is constructed with a dummy triangle topology
diff --git a/src/video_core/pica_types.h b/src/video_core/pica_types.h
index ecf45654b..3b7bfbdca 100644
--- a/src/video_core/pica_types.h
+++ b/src/video_core/pica_types.h
@@ -4,6 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <cmath>
7#include <cstring> 8#include <cstring>
8 9
9#include "common/common_types.h" 10#include "common/common_types.h"
diff --git a/src/video_core/primitive_assembly.cpp b/src/video_core/primitive_assembly.cpp
index 0061690f1..68ea3c08a 100644
--- a/src/video_core/primitive_assembly.cpp
+++ b/src/video_core/primitive_assembly.cpp
@@ -6,8 +6,7 @@
6 6
7#include "video_core/pica.h" 7#include "video_core/pica.h"
8#include "video_core/primitive_assembly.h" 8#include "video_core/primitive_assembly.h"
9#include "video_core/debug_utils/debug_utils.h" 9#include "video_core/shader/shader.h"
10#include "video_core/shader/shader_interpreter.h"
11 10
12namespace Pica { 11namespace Pica {
13 12
@@ -68,7 +67,5 @@ void PrimitiveAssembler<VertexType>::Reconfigure(Regs::TriangleTopology topology
68// explicitly instantiate use cases 67// explicitly instantiate use cases
69template 68template
70struct PrimitiveAssembler<Shader::OutputVertex>; 69struct PrimitiveAssembler<Shader::OutputVertex>;
71template
72struct PrimitiveAssembler<DebugUtils::GeometryDumper::Vertex>;
73 70
74} // namespace 71} // namespace
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index fd02aa652..65168f05a 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -3,23 +3,28 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include <algorithm>
6#include <array>
6#include <cmath> 7#include <cmath>
7 8
9#include "common/assert.h"
10#include "common/bit_field.h"
8#include "common/color.h" 11#include "common/color.h"
9#include "common/common_types.h" 12#include "common/common_types.h"
13#include "common/logging/log.h"
10#include "common/math_util.h" 14#include "common/math_util.h"
11#include "common/microprofile.h" 15#include "common/microprofile.h"
12#include "common/profiler.h" 16#include "common/vector_math.h"
13 17
14#include "core/memory.h" 18#include "core/memory.h"
15#include "core/hw/gpu.h" 19#include "core/hw/gpu.h"
16 20
21#include "video_core/debug_utils/debug_utils.h"
17#include "video_core/pica.h" 22#include "video_core/pica.h"
18#include "video_core/pica_state.h" 23#include "video_core/pica_state.h"
24#include "video_core/pica_types.h"
19#include "video_core/rasterizer.h" 25#include "video_core/rasterizer.h"
20#include "video_core/utils.h" 26#include "video_core/utils.h"
21#include "video_core/debug_utils/debug_utils.h" 27#include "video_core/shader/shader.h"
22#include "video_core/shader/shader_interpreter.h"
23 28
24namespace Pica { 29namespace Pica {
25 30
@@ -287,7 +292,6 @@ static int SignedArea (const Math::Vec2<Fix12P4>& vtx1,
287 return Math::Cross(vec1, vec2).z; 292 return Math::Cross(vec1, vec2).z;
288}; 293};
289 294
290static Common::Profiling::TimingCategory rasterization_category("Rasterization");
291MICROPROFILE_DEFINE(GPU_Rasterization, "GPU", "Rasterization", MP_RGB(50, 50, 240)); 295MICROPROFILE_DEFINE(GPU_Rasterization, "GPU", "Rasterization", MP_RGB(50, 50, 240));
292 296
293/** 297/**
@@ -300,7 +304,6 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
300 bool reversed = false) 304 bool reversed = false)
301{ 305{
302 const auto& regs = g_state.regs; 306 const auto& regs = g_state.regs;
303 Common::Profiling::ScopeTimer timer(rasterization_category);
304 MICROPROFILE_SCOPE(GPU_Rasterization); 307 MICROPROFILE_SCOPE(GPU_Rasterization);
305 308
306 // vertex positions in rasterizer coordinates 309 // vertex positions in rasterizer coordinates
@@ -439,8 +442,33 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
439 442
440 DEBUG_ASSERT(0 != texture.config.address); 443 DEBUG_ASSERT(0 != texture.config.address);
441 444
442 int s = (int)(uv[i].u() * float24::FromFloat32(static_cast<float>(texture.config.width))).ToFloat32(); 445 float24 u = uv[i].u();
443 int t = (int)(uv[i].v() * float24::FromFloat32(static_cast<float>(texture.config.height))).ToFloat32(); 446 float24 v = uv[i].v();
447
448 // Only unit 0 respects the texturing type (according to 3DBrew)
449 // TODO: Refactor so cubemaps and shadowmaps can be handled
450 if (i == 0) {
451 switch(texture.config.type) {
452 case Regs::TextureConfig::Texture2D:
453 break;
454 case Regs::TextureConfig::Projection2D: {
455 auto tc0_w = GetInterpolatedAttribute(v0.tc0_w, v1.tc0_w, v2.tc0_w);
456 u /= tc0_w;
457 v /= tc0_w;
458 break;
459 }
460 default:
461 // TODO: Change to LOG_ERROR when more types are handled.
462 LOG_DEBUG(HW_GPU, "Unhandled texture type %x", (int)texture.config.type);
463 UNIMPLEMENTED();
464 break;
465 }
466 }
467
468 int s = (int)(u * float24::FromFloat32(static_cast<float>(texture.config.width))).ToFloat32();
469 int t = (int)(v * float24::FromFloat32(static_cast<float>(texture.config.height))).ToFloat32();
470
471
444 static auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val, unsigned size) { 472 static auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val, unsigned size) {
445 switch (mode) { 473 switch (mode) {
446 case Regs::TextureConfig::ClampToEdge: 474 case Regs::TextureConfig::ClampToEdge:
@@ -809,7 +837,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
809 837
810 auto UpdateStencil = [stencil_test, x, y, &old_stencil](Pica::Regs::StencilAction action) { 838 auto UpdateStencil = [stencil_test, x, y, &old_stencil](Pica::Regs::StencilAction action) {
811 u8 new_stencil = PerformStencilAction(action, old_stencil, stencil_test.reference_value); 839 u8 new_stencil = PerformStencilAction(action, old_stencil, stencil_test.reference_value);
812 SetStencil(x >> 4, y >> 4, (new_stencil & stencil_test.write_mask) | (old_stencil & ~stencil_test.write_mask)); 840 if (g_state.regs.framebuffer.allow_depth_stencil_write != 0)
841 SetStencil(x >> 4, y >> 4, (new_stencil & stencil_test.write_mask) | (old_stencil & ~stencil_test.write_mask));
813 }; 842 };
814 843
815 if (stencil_action_enable) { 844 if (stencil_action_enable) {
@@ -858,10 +887,30 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
858 } 887 }
859 } 888 }
860 889
890 // interpolated_z = z / w
891 float interpolated_z_over_w = (v0.screenpos[2].ToFloat32() * w0 +
892 v1.screenpos[2].ToFloat32() * w1 +
893 v2.screenpos[2].ToFloat32() * w2) / wsum;
894
895 // Not fully accurate. About 3 bits in precision are missing.
896 // Z-Buffer (z / w * scale + offset)
897 float depth_scale = float24::FromRaw(regs.viewport_depth_range).ToFloat32();
898 float depth_offset = float24::FromRaw(regs.viewport_depth_near_plane).ToFloat32();
899 float depth = interpolated_z_over_w * depth_scale + depth_offset;
900
901 // Potentially switch to W-Buffer
902 if (regs.depthmap_enable == Pica::Regs::DepthBuffering::WBuffering) {
903
904 // W-Buffer (z * scale + w * offset = (z / w * scale + offset) * w)
905 depth *= interpolated_w_inverse.ToFloat32() * wsum;
906 }
907
908 // Clamp the result
909 depth = MathUtil::Clamp(depth, 0.0f, 1.0f);
910
911 // Convert float to integer
861 unsigned num_bits = Regs::DepthBitsPerPixel(regs.framebuffer.depth_format); 912 unsigned num_bits = Regs::DepthBitsPerPixel(regs.framebuffer.depth_format);
862 u32 z = (u32)((v0.screenpos[2].ToFloat32() * w0 + 913 u32 z = (u32)(depth * ((1 << num_bits) - 1));
863 v1.screenpos[2].ToFloat32() * w1 +
864 v2.screenpos[2].ToFloat32() * w2) * ((1 << num_bits) - 1) / wsum);
865 914
866 if (output_merger.depth_test_enable) { 915 if (output_merger.depth_test_enable) {
867 u32 ref_z = GetDepth(x >> 4, y >> 4); 916 u32 ref_z = GetDepth(x >> 4, y >> 4);
@@ -909,7 +958,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
909 } 958 }
910 } 959 }
911 960
912 if (output_merger.depth_write_enable) 961 if (regs.framebuffer.allow_depth_stencil_write != 0 && output_merger.depth_write_enable)
913 SetDepth(x >> 4, y >> 4, z); 962 SetDepth(x >> 4, y >> 4, z);
914 963
915 // The stencil depth_pass action is executed even if depth testing is disabled 964 // The stencil depth_pass action is executed even if depth testing is disabled
@@ -922,92 +971,72 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
922 if (output_merger.alphablend_enable) { 971 if (output_merger.alphablend_enable) {
923 auto params = output_merger.alpha_blending; 972 auto params = output_merger.alpha_blending;
924 973
925 auto LookupFactorRGB = [&](Regs::BlendFactor factor) -> Math::Vec3<u8> { 974 auto LookupFactor = [&](unsigned channel, Regs::BlendFactor factor) -> u8 {
975 DEBUG_ASSERT(channel < 4);
976
977 const Math::Vec4<u8> blend_const = {
978 static_cast<u8>(output_merger.blend_const.r),
979 static_cast<u8>(output_merger.blend_const.g),
980 static_cast<u8>(output_merger.blend_const.b),
981 static_cast<u8>(output_merger.blend_const.a)
982 };
983
926 switch (factor) { 984 switch (factor) {
927 case Regs::BlendFactor::Zero : 985 case Regs::BlendFactor::Zero:
928 return Math::Vec3<u8>(0, 0, 0); 986 return 0;
929 987
930 case Regs::BlendFactor::One : 988 case Regs::BlendFactor::One:
931 return Math::Vec3<u8>(255, 255, 255); 989 return 255;
932 990
933 case Regs::BlendFactor::SourceColor: 991 case Regs::BlendFactor::SourceColor:
934 return combiner_output.rgb(); 992 return combiner_output[channel];
935 993
936 case Regs::BlendFactor::OneMinusSourceColor: 994 case Regs::BlendFactor::OneMinusSourceColor:
937 return Math::Vec3<u8>(255 - combiner_output.r(), 255 - combiner_output.g(), 255 - combiner_output.b()); 995 return 255 - combiner_output[channel];
938 996
939 case Regs::BlendFactor::DestColor: 997 case Regs::BlendFactor::DestColor:
940 return dest.rgb(); 998 return dest[channel];
941 999
942 case Regs::BlendFactor::OneMinusDestColor: 1000 case Regs::BlendFactor::OneMinusDestColor:
943 return Math::Vec3<u8>(255 - dest.r(), 255 - dest.g(), 255 - dest.b()); 1001 return 255 - dest[channel];
944 1002
945 case Regs::BlendFactor::SourceAlpha: 1003 case Regs::BlendFactor::SourceAlpha:
946 return Math::Vec3<u8>(combiner_output.a(), combiner_output.a(), combiner_output.a()); 1004 return combiner_output.a();
947 1005
948 case Regs::BlendFactor::OneMinusSourceAlpha: 1006 case Regs::BlendFactor::OneMinusSourceAlpha:
949 return Math::Vec3<u8>(255 - combiner_output.a(), 255 - combiner_output.a(), 255 - combiner_output.a()); 1007 return 255 - combiner_output.a();
950 1008
951 case Regs::BlendFactor::DestAlpha: 1009 case Regs::BlendFactor::DestAlpha:
952 return Math::Vec3<u8>(dest.a(), dest.a(), dest.a()); 1010 return dest.a();
953 1011
954 case Regs::BlendFactor::OneMinusDestAlpha: 1012 case Regs::BlendFactor::OneMinusDestAlpha:
955 return Math::Vec3<u8>(255 - dest.a(), 255 - dest.a(), 255 - dest.a()); 1013 return 255 - dest.a();
956 1014
957 case Regs::BlendFactor::ConstantColor: 1015 case Regs::BlendFactor::ConstantColor:
958 return Math::Vec3<u8>(output_merger.blend_const.r, output_merger.blend_const.g, output_merger.blend_const.b); 1016 return blend_const[channel];
959 1017
960 case Regs::BlendFactor::OneMinusConstantColor: 1018 case Regs::BlendFactor::OneMinusConstantColor:
961 return Math::Vec3<u8>(255 - output_merger.blend_const.r, 255 - output_merger.blend_const.g, 255 - output_merger.blend_const.b); 1019 return 255 - blend_const[channel];
962 1020
963 case Regs::BlendFactor::ConstantAlpha: 1021 case Regs::BlendFactor::ConstantAlpha:
964 return Math::Vec3<u8>(output_merger.blend_const.a, output_merger.blend_const.a, output_merger.blend_const.a); 1022 return blend_const.a();
965 1023
966 case Regs::BlendFactor::OneMinusConstantAlpha: 1024 case Regs::BlendFactor::OneMinusConstantAlpha:
967 return Math::Vec3<u8>(255 - output_merger.blend_const.a, 255 - output_merger.blend_const.a, 255 - output_merger.blend_const.a); 1025 return 255 - blend_const.a();
968 1026
969 default: 1027 case Regs::BlendFactor::SourceAlphaSaturate:
970 LOG_CRITICAL(HW_GPU, "Unknown color blend factor %x", factor); 1028 // Returns 1.0 for the alpha channel
971 UNIMPLEMENTED(); 1029 if (channel == 3)
972 break; 1030 return 255;
973 } 1031 return std::min(combiner_output.a(), static_cast<u8>(255 - dest.a()));
974
975 return {};
976 };
977
978 auto LookupFactorA = [&](Regs::BlendFactor factor) -> u8 {
979 switch (factor) {
980 case Regs::BlendFactor::Zero:
981 return 0;
982
983 case Regs::BlendFactor::One:
984 return 255;
985
986 case Regs::BlendFactor::SourceAlpha:
987 return combiner_output.a();
988
989 case Regs::BlendFactor::OneMinusSourceAlpha:
990 return 255 - combiner_output.a();
991
992 case Regs::BlendFactor::DestAlpha:
993 return dest.a();
994
995 case Regs::BlendFactor::OneMinusDestAlpha:
996 return 255 - dest.a();
997
998 case Regs::BlendFactor::ConstantAlpha:
999 return output_merger.blend_const.a;
1000
1001 case Regs::BlendFactor::OneMinusConstantAlpha:
1002 return 255 - output_merger.blend_const.a;
1003 1032
1004 default: 1033 default:
1005 LOG_CRITICAL(HW_GPU, "Unknown alpha blend factor %x", factor); 1034 LOG_CRITICAL(HW_GPU, "Unknown blend factor %x", factor);
1006 UNIMPLEMENTED(); 1035 UNIMPLEMENTED();
1007 break; 1036 break;
1008 } 1037 }
1009 1038
1010 return {}; 1039 return combiner_output[channel];
1011 }; 1040 };
1012 1041
1013 static auto EvaluateBlendEquation = [](const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor, 1042 static auto EvaluateBlendEquation = [](const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor,
@@ -1059,10 +1088,15 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
1059 MathUtil::Clamp(result.a(), 0, 255)); 1088 MathUtil::Clamp(result.a(), 0, 255));
1060 }; 1089 };
1061 1090
1062 auto srcfactor = Math::MakeVec(LookupFactorRGB(params.factor_source_rgb), 1091 auto srcfactor = Math::MakeVec(LookupFactor(0, params.factor_source_rgb),
1063 LookupFactorA(params.factor_source_a)); 1092 LookupFactor(1, params.factor_source_rgb),
1064 auto dstfactor = Math::MakeVec(LookupFactorRGB(params.factor_dest_rgb), 1093 LookupFactor(2, params.factor_source_rgb),
1065 LookupFactorA(params.factor_dest_a)); 1094 LookupFactor(3, params.factor_source_a));
1095
1096 auto dstfactor = Math::MakeVec(LookupFactor(0, params.factor_dest_rgb),
1097 LookupFactor(1, params.factor_dest_rgb),
1098 LookupFactor(2, params.factor_dest_rgb),
1099 LookupFactor(3, params.factor_dest_a));
1066 1100
1067 blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_rgb); 1101 blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_rgb);
1068 blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_a).a(); 1102 blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_a).a();
@@ -1133,7 +1167,8 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
1133 output_merger.alpha_enable ? blend_output.a() : dest.a() 1167 output_merger.alpha_enable ? blend_output.a() : dest.a()
1134 }; 1168 };
1135 1169
1136 DrawPixel(x >> 4, y >> 4, result); 1170 if (regs.framebuffer.allow_color_write != 0)
1171 DrawPixel(x >> 4, y >> 4, result);
1137 } 1172 }
1138 } 1173 }
1139} 1174}
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 008c5827b..bf7101665 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -6,6 +6,10 @@
6 6
7#include "common/common_types.h" 7#include "common/common_types.h"
8 8
9#include "core/hw/gpu.h"
10
11struct ScreenInfo;
12
9namespace Pica { 13namespace Pica {
10namespace Shader { 14namespace Shader {
11struct OutputVertex; 15struct OutputVertex;
@@ -18,12 +22,6 @@ class RasterizerInterface {
18public: 22public:
19 virtual ~RasterizerInterface() {} 23 virtual ~RasterizerInterface() {}
20 24
21 /// Initialize API-specific GPU objects
22 virtual void InitObjects() = 0;
23
24 /// Reset the rasterizer, such as flushing all caches and updating all state
25 virtual void Reset() = 0;
26
27 /// Queues the primitive formed by the given vertices for rendering 25 /// Queues the primitive formed by the given vertices for rendering
28 virtual void AddTriangle(const Pica::Shader::OutputVertex& v0, 26 virtual void AddTriangle(const Pica::Shader::OutputVertex& v0,
29 const Pica::Shader::OutputVertex& v1, 27 const Pica::Shader::OutputVertex& v1,
@@ -32,17 +30,26 @@ public:
32 /// Draw the current batch of triangles 30 /// Draw the current batch of triangles
33 virtual void DrawTriangles() = 0; 31 virtual void DrawTriangles() = 0;
34 32
35 /// Commit the rasterizer's framebuffer contents immediately to the current 3DS memory framebuffer
36 virtual void FlushFramebuffer() = 0;
37
38 /// Notify rasterizer that the specified PICA register has been changed 33 /// Notify rasterizer that the specified PICA register has been changed
39 virtual void NotifyPicaRegisterChanged(u32 id) = 0; 34 virtual void NotifyPicaRegisterChanged(u32 id) = 0;
40 35
41 /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory. 36 /// Notify rasterizer that all caches should be flushed to 3DS memory
37 virtual void FlushAll() = 0;
38
39 /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory
42 virtual void FlushRegion(PAddr addr, u32 size) = 0; 40 virtual void FlushRegion(PAddr addr, u32 size) = 0;
43 41
44 /// Notify rasterizer that any caches of the specified region should be discraded and reloaded from 3DS memory. 42 /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory and invalidated
45 virtual void InvalidateRegion(PAddr addr, u32 size) = 0; 43 virtual void FlushAndInvalidateRegion(PAddr addr, u32 size) = 0;
44
45 /// Attempt to use a faster method to perform a display transfer
46 virtual bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { return false; }
47
48 /// Attempt to use a faster method to fill a region
49 virtual bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { return false; }
50
51 /// Attempt to use a faster method to display the framebuffer to screen
52 virtual bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) { return false; }
46}; 53};
47 54
48} 55}
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp
index 101f84eb9..3f451e062 100644
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -2,10 +2,9 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <atomic>
5#include <memory> 6#include <memory>
6 7
7#include "core/settings.h"
8
9#include "video_core/renderer_base.h" 8#include "video_core/renderer_base.h"
10#include "video_core/video_core.h" 9#include "video_core/video_core.h"
11#include "video_core/swrasterizer.h" 10#include "video_core/swrasterizer.h"
@@ -21,7 +20,5 @@ void RendererBase::RefreshRasterizerSetting() {
21 } else { 20 } else {
22 rasterizer = std::make_unique<VideoCore::SWRasterizer>(); 21 rasterizer = std::make_unique<VideoCore::SWRasterizer>();
23 } 22 }
24 rasterizer->InitObjects();
25 rasterizer->Reset();
26 } 23 }
27} 24}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 4fdf93a3e..bcd1ae78d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -2,28 +2,28 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cstring>
6#include <memory> 5#include <memory>
6#include <string>
7#include <tuple>
8#include <utility>
7 9
8#include <glad/glad.h> 10#include <glad/glad.h>
9 11
12#include "common/assert.h"
10#include "common/color.h" 13#include "common/color.h"
11#include "common/file_util.h" 14#include "common/logging/log.h"
12#include "common/math_util.h" 15#include "common/math_util.h"
13#include "common/microprofile.h" 16#include "common/vector_math.h"
14#include "common/profiler.h"
15 17
16#include "core/memory.h"
17#include "core/settings.h"
18#include "core/hw/gpu.h" 18#include "core/hw/gpu.h"
19 19
20#include "video_core/pica.h" 20#include "video_core/pica.h"
21#include "video_core/pica_state.h" 21#include "video_core/pica_state.h"
22#include "video_core/utils.h"
23#include "video_core/renderer_opengl/gl_rasterizer.h" 22#include "video_core/renderer_opengl/gl_rasterizer.h"
24#include "video_core/renderer_opengl/gl_shader_gen.h" 23#include "video_core/renderer_opengl/gl_shader_gen.h"
25#include "video_core/renderer_opengl/gl_shader_util.h" 24#include "video_core/renderer_opengl/gl_shader_util.h"
26#include "video_core/renderer_opengl/pica_to_gl.h" 25#include "video_core/renderer_opengl/pica_to_gl.h"
26#include "video_core/renderer_opengl/renderer_opengl.h"
27 27
28static bool IsPassThroughTevStage(const Pica::Regs::TevStageConfig& stage) { 28static bool IsPassThroughTevStage(const Pica::Regs::TevStageConfig& stage) {
29 return (stage.color_op == Pica::Regs::TevStageConfig::Operation::Replace && 29 return (stage.color_op == Pica::Regs::TevStageConfig::Operation::Replace &&
@@ -36,10 +36,7 @@ static bool IsPassThroughTevStage(const Pica::Regs::TevStageConfig& stage) {
36 stage.GetAlphaMultiplier() == 1); 36 stage.GetAlphaMultiplier() == 1);
37} 37}
38 38
39RasterizerOpenGL::RasterizerOpenGL() : cached_fb_color_addr(0), cached_fb_depth_addr(0) { } 39RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
40RasterizerOpenGL::~RasterizerOpenGL() { }
41
42void RasterizerOpenGL::InitObjects() {
43 // Create sampler objects 40 // Create sampler objects
44 for (size_t i = 0; i < texture_samplers.size(); ++i) { 41 for (size_t i = 0; i < texture_samplers.size(); ++i) {
45 texture_samplers[i].Create(); 42 texture_samplers[i].Create();
@@ -61,6 +58,10 @@ void RasterizerOpenGL::InitObjects() {
61 58
62 uniform_block_data.dirty = true; 59 uniform_block_data.dirty = true;
63 60
61 for (unsigned index = 0; index < lighting_luts.size(); index++) {
62 uniform_block_data.lut_dirty[index] = true;
63 }
64
64 // Set vertex attributes 65 // Set vertex attributes
65 glVertexAttribPointer(GLShader::ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position)); 66 glVertexAttribPointer(GLShader::ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position));
66 glEnableVertexAttribArray(GLShader::ATTRIBUTE_POSITION); 67 glEnableVertexAttribArray(GLShader::ATTRIBUTE_POSITION);
@@ -75,88 +76,47 @@ void RasterizerOpenGL::InitObjects() {
75 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD1); 76 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD1);
76 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD2); 77 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD2);
77 78
79 glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD0_W, 1, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord0_w));
80 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD0_W);
81
78 glVertexAttribPointer(GLShader::ATTRIBUTE_NORMQUAT, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, normquat)); 82 glVertexAttribPointer(GLShader::ATTRIBUTE_NORMQUAT, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, normquat));
79 glEnableVertexAttribArray(GLShader::ATTRIBUTE_NORMQUAT); 83 glEnableVertexAttribArray(GLShader::ATTRIBUTE_NORMQUAT);
80 84
81 glVertexAttribPointer(GLShader::ATTRIBUTE_VIEW, 3, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, view)); 85 glVertexAttribPointer(GLShader::ATTRIBUTE_VIEW, 3, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, view));
82 glEnableVertexAttribArray(GLShader::ATTRIBUTE_VIEW); 86 glEnableVertexAttribArray(GLShader::ATTRIBUTE_VIEW);
83 87
84 SetShader(); 88 // Create render framebuffer
85
86 // Create textures for OGL framebuffer that will be rendered to, initially 1x1 to succeed in framebuffer creation
87 fb_color_texture.texture.Create();
88 ReconfigureColorTexture(fb_color_texture, Pica::Regs::ColorFormat::RGBA8, 1, 1);
89
90 state.texture_units[0].texture_2d = fb_color_texture.texture.handle;
91 state.Apply();
92
93 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
94 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
95 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
96 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
97 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
98
99 state.texture_units[0].texture_2d = 0;
100 state.Apply();
101
102 fb_depth_texture.texture.Create();
103 ReconfigureDepthTexture(fb_depth_texture, Pica::Regs::DepthFormat::D16, 1, 1);
104
105 state.texture_units[0].texture_2d = fb_depth_texture.texture.handle;
106 state.Apply();
107
108 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
109 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
110 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
111 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
112 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
113 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_FUNC, GL_LEQUAL);
114 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_MODE, GL_NONE);
115
116 state.texture_units[0].texture_2d = 0;
117 state.Apply();
118
119 // Configure OpenGL framebuffer
120 framebuffer.Create(); 89 framebuffer.Create();
121 90
122 state.draw.framebuffer = framebuffer.handle; 91 // Allocate and bind lighting lut textures
92 for (size_t i = 0; i < lighting_luts.size(); ++i) {
93 lighting_luts[i].Create();
94 state.lighting_luts[i].texture_1d = lighting_luts[i].handle;
95 }
123 state.Apply(); 96 state.Apply();
124 97
125 glActiveTexture(GL_TEXTURE0); 98 for (size_t i = 0; i < lighting_luts.size(); ++i) {
126 glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, fb_color_texture.texture.handle, 0); 99 glActiveTexture(static_cast<GLenum>(GL_TEXTURE3 + i));
127 glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, fb_depth_texture.texture.handle, 0);
128
129 for (size_t i = 0; i < lighting_lut.size(); ++i) {
130 lighting_lut[i].Create();
131 state.lighting_lut[i].texture_1d = lighting_lut[i].handle;
132
133 glActiveTexture(GL_TEXTURE3 + i);
134 glBindTexture(GL_TEXTURE_1D, state.lighting_lut[i].texture_1d);
135
136 glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr); 100 glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr);
137 glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); 101 glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
138 glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); 102 glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
139 } 103 }
140 state.Apply();
141 104
142 GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER); 105 // Sync fixed function OpenGL state
143 ASSERT_MSG(status == GL_FRAMEBUFFER_COMPLETE,
144 "OpenGL rasterizer framebuffer setup failed, status %X", status);
145}
146
147void RasterizerOpenGL::Reset() {
148 SyncCullMode(); 106 SyncCullMode();
149 SyncDepthModifiers();
150 SyncBlendEnabled(); 107 SyncBlendEnabled();
151 SyncBlendFuncs(); 108 SyncBlendFuncs();
152 SyncBlendColor(); 109 SyncBlendColor();
153 SyncLogicOp(); 110 SyncLogicOp();
154 SyncStencilTest(); 111 SyncStencilTest();
155 SyncDepthTest(); 112 SyncDepthTest();
113 SyncColorWriteMask();
114 SyncStencilWriteMask();
115 SyncDepthWriteMask();
116}
156 117
157 SetShader(); 118RasterizerOpenGL::~RasterizerOpenGL() {
158 119
159 res_cache.InvalidateAll();
160} 120}
161 121
162/** 122/**
@@ -193,47 +153,98 @@ void RasterizerOpenGL::DrawTriangles() {
193 if (vertex_batch.empty()) 153 if (vertex_batch.empty())
194 return; 154 return;
195 155
196 SyncFramebuffer(); 156 const auto& regs = Pica::g_state.regs;
197 SyncDrawState(); 157
158 // Sync and bind the framebuffer surfaces
159 CachedSurface* color_surface;
160 CachedSurface* depth_surface;
161 MathUtil::Rectangle<int> rect;
162 std::tie(color_surface, depth_surface, rect) = res_cache.GetFramebufferSurfaces(regs.framebuffer);
163
164 state.draw.draw_framebuffer = framebuffer.handle;
165 state.Apply();
166
167 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, color_surface != nullptr ? color_surface->texture.handle : 0, 0);
168 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, depth_surface != nullptr ? depth_surface->texture.handle : 0, 0);
169 bool has_stencil = regs.framebuffer.depth_format == Pica::Regs::DepthFormat::D24S8;
170 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, (has_stencil && depth_surface != nullptr) ? depth_surface->texture.handle : 0, 0);
198 171
199 if (state.draw.shader_dirty) { 172 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
173 return;
174 }
175
176 // Sync the viewport
177 // These registers hold half-width and half-height, so must be multiplied by 2
178 GLsizei viewport_width = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_x).ToFloat32() * 2;
179 GLsizei viewport_height = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_y).ToFloat32() * 2;
180
181 glViewport((GLint)(rect.left + regs.viewport_corner.x * color_surface->res_scale_width),
182 (GLint)(rect.bottom + regs.viewport_corner.y * color_surface->res_scale_height),
183 (GLsizei)(viewport_width * color_surface->res_scale_width), (GLsizei)(viewport_height * color_surface->res_scale_height));
184
185 // Sync and bind the texture surfaces
186 const auto pica_textures = regs.GetTextures();
187 for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) {
188 const auto& texture = pica_textures[texture_index];
189
190 if (texture.enabled) {
191 texture_samplers[texture_index].SyncWithConfig(texture.config);
192 CachedSurface* surface = res_cache.GetTextureSurface(texture);
193 if (surface != nullptr) {
194 state.texture_units[texture_index].texture_2d = surface->texture.handle;
195 } else {
196 // Can occur when texture addr is null or its memory is unmapped/invalid
197 state.texture_units[texture_index].texture_2d = 0;
198 }
199 } else {
200 state.texture_units[texture_index].texture_2d = 0;
201 }
202 }
203
204 // Sync and bind the shader
205 if (shader_dirty) {
200 SetShader(); 206 SetShader();
201 state.draw.shader_dirty = false; 207 shader_dirty = false;
202 } 208 }
203 209
204 for (unsigned index = 0; index < lighting_lut.size(); index++) { 210 // Sync the lighting luts
211 for (unsigned index = 0; index < lighting_luts.size(); index++) {
205 if (uniform_block_data.lut_dirty[index]) { 212 if (uniform_block_data.lut_dirty[index]) {
206 SyncLightingLUT(index); 213 SyncLightingLUT(index);
207 uniform_block_data.lut_dirty[index] = false; 214 uniform_block_data.lut_dirty[index] = false;
208 } 215 }
209 } 216 }
210 217
218 // Sync the uniform data
211 if (uniform_block_data.dirty) { 219 if (uniform_block_data.dirty) {
212 glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data, GL_STATIC_DRAW); 220 glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data, GL_STATIC_DRAW);
213 uniform_block_data.dirty = false; 221 uniform_block_data.dirty = false;
214 } 222 }
215 223
224 state.Apply();
225
226 // Draw the vertex batch
216 glBufferData(GL_ARRAY_BUFFER, vertex_batch.size() * sizeof(HardwareVertex), vertex_batch.data(), GL_STREAM_DRAW); 227 glBufferData(GL_ARRAY_BUFFER, vertex_batch.size() * sizeof(HardwareVertex), vertex_batch.data(), GL_STREAM_DRAW);
217 glDrawArrays(GL_TRIANGLES, 0, (GLsizei)vertex_batch.size()); 228 glDrawArrays(GL_TRIANGLES, 0, (GLsizei)vertex_batch.size());
218 229
219 vertex_batch.clear(); 230 // Mark framebuffer surfaces as dirty
220 231 // TODO: Restrict invalidation area to the viewport
221 // Flush the resource cache at the current depth and color framebuffer addresses for render-to-texture 232 if (color_surface != nullptr) {
222 const auto& regs = Pica::g_state.regs; 233 color_surface->dirty = true;
223 234 res_cache.FlushRegion(color_surface->addr, color_surface->size, color_surface, true);
224 u32 cached_fb_color_size = Pica::Regs::BytesPerColorPixel(fb_color_texture.format) 235 }
225 * fb_color_texture.width * fb_color_texture.height; 236 if (depth_surface != nullptr) {
226 237 depth_surface->dirty = true;
227 u32 cached_fb_depth_size = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format) 238 res_cache.FlushRegion(depth_surface->addr, depth_surface->size, depth_surface, true);
228 * fb_depth_texture.width * fb_depth_texture.height; 239 }
229 240
230 res_cache.InvalidateInRange(cached_fb_color_addr, cached_fb_color_size, true); 241 vertex_batch.clear();
231 res_cache.InvalidateInRange(cached_fb_depth_addr, cached_fb_depth_size, true);
232}
233 242
234void RasterizerOpenGL::FlushFramebuffer() { 243 // Unbind textures for potential future use as framebuffer attachments
235 CommitColorBuffer(); 244 for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) {
236 CommitDepthBuffer(); 245 state.texture_units[texture_index].texture_2d = 0;
246 }
247 state.Apply();
237} 248}
238 249
239void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { 250void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
@@ -247,8 +258,15 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
247 258
248 // Depth modifiers 259 // Depth modifiers
249 case PICA_REG_INDEX(viewport_depth_range): 260 case PICA_REG_INDEX(viewport_depth_range):
250 case PICA_REG_INDEX(viewport_depth_far_plane): 261 SyncDepthScale();
251 SyncDepthModifiers(); 262 break;
263 case PICA_REG_INDEX(viewport_depth_near_plane):
264 SyncDepthOffset();
265 break;
266
267 // Depth buffering
268 case PICA_REG_INDEX(depthmap_enable):
269 shader_dirty = true;
252 break; 270 break;
253 271
254 // Blending 272 // Blending
@@ -265,18 +283,39 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
265 // Alpha test 283 // Alpha test
266 case PICA_REG_INDEX(output_merger.alpha_test): 284 case PICA_REG_INDEX(output_merger.alpha_test):
267 SyncAlphaTest(); 285 SyncAlphaTest();
268 state.draw.shader_dirty = true; 286 shader_dirty = true;
269 break; 287 break;
270 288
271 // Stencil test 289 // Sync GL stencil test + stencil write mask
290 // (Pica stencil test function register also contains a stencil write mask)
272 case PICA_REG_INDEX(output_merger.stencil_test.raw_func): 291 case PICA_REG_INDEX(output_merger.stencil_test.raw_func):
292 SyncStencilTest();
293 SyncStencilWriteMask();
294 break;
273 case PICA_REG_INDEX(output_merger.stencil_test.raw_op): 295 case PICA_REG_INDEX(output_merger.stencil_test.raw_op):
296 case PICA_REG_INDEX(framebuffer.depth_format):
274 SyncStencilTest(); 297 SyncStencilTest();
275 break; 298 break;
276 299
277 // Depth test 300 // Sync GL depth test + depth and color write mask
301 // (Pica depth test function register also contains a depth and color write mask)
278 case PICA_REG_INDEX(output_merger.depth_test_enable): 302 case PICA_REG_INDEX(output_merger.depth_test_enable):
279 SyncDepthTest(); 303 SyncDepthTest();
304 SyncDepthWriteMask();
305 SyncColorWriteMask();
306 break;
307
308 // Sync GL depth and stencil write mask
309 // (This is a dedicated combined depth / stencil write-enable register)
310 case PICA_REG_INDEX(framebuffer.allow_depth_stencil_write):
311 SyncDepthWriteMask();
312 SyncStencilWriteMask();
313 break;
314
315 // Sync GL color write mask
316 // (This is a dedicated color write-enable register)
317 case PICA_REG_INDEX(framebuffer.allow_color_write):
318 SyncColorWriteMask();
280 break; 319 break;
281 320
282 // Logic op 321 // Logic op
@@ -284,6 +323,11 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
284 SyncLogicOp(); 323 SyncLogicOp();
285 break; 324 break;
286 325
326 // Texture 0 type
327 case PICA_REG_INDEX(texture0.type):
328 shader_dirty = true;
329 break;
330
287 // TEV stages 331 // TEV stages
288 case PICA_REG_INDEX(tev_stage0.color_source1): 332 case PICA_REG_INDEX(tev_stage0.color_source1):
289 case PICA_REG_INDEX(tev_stage0.color_modifier1): 333 case PICA_REG_INDEX(tev_stage0.color_modifier1):
@@ -310,7 +354,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
310 case PICA_REG_INDEX(tev_stage5.color_op): 354 case PICA_REG_INDEX(tev_stage5.color_op):
311 case PICA_REG_INDEX(tev_stage5.color_scale): 355 case PICA_REG_INDEX(tev_stage5.color_scale):
312 case PICA_REG_INDEX(tev_combiner_buffer_input): 356 case PICA_REG_INDEX(tev_combiner_buffer_input):
313 state.draw.shader_dirty = true; 357 shader_dirty = true;
314 break; 358 break;
315 case PICA_REG_INDEX(tev_stage0.const_r): 359 case PICA_REG_INDEX(tev_stage0.const_r):
316 SyncTevConstColor(0, regs.tev_stage0); 360 SyncTevConstColor(0, regs.tev_stage0);
@@ -497,41 +541,257 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
497 } 541 }
498} 542}
499 543
544void RasterizerOpenGL::FlushAll() {
545 res_cache.FlushAll();
546}
547
500void RasterizerOpenGL::FlushRegion(PAddr addr, u32 size) { 548void RasterizerOpenGL::FlushRegion(PAddr addr, u32 size) {
501 const auto& regs = Pica::g_state.regs; 549 res_cache.FlushRegion(addr, size, nullptr, false);
550}
551
552void RasterizerOpenGL::FlushAndInvalidateRegion(PAddr addr, u32 size) {
553 res_cache.FlushRegion(addr, size, nullptr, true);
554}
502 555
503 u32 cached_fb_color_size = Pica::Regs::BytesPerColorPixel(fb_color_texture.format) 556bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) {
504 * fb_color_texture.width * fb_color_texture.height; 557 using PixelFormat = CachedSurface::PixelFormat;
558 using SurfaceType = CachedSurface::SurfaceType;
505 559
506 u32 cached_fb_depth_size = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format) 560 if (config.is_texture_copy) {
507 * fb_depth_texture.width * fb_depth_texture.height; 561 // TODO(tfarley): Try to hardware accelerate this
562 return false;
563 }
564
565 CachedSurface src_params;
566 src_params.addr = config.GetPhysicalInputAddress();
567 src_params.width = config.output_width;
568 src_params.height = config.output_height;
569 src_params.is_tiled = !config.input_linear;
570 src_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.input_format);
571
572 CachedSurface dst_params;
573 dst_params.addr = config.GetPhysicalOutputAddress();
574 dst_params.width = config.scaling != config.NoScale ? config.output_width / 2 : config.output_width.Value();
575 dst_params.height = config.scaling == config.ScaleXY ? config.output_height / 2 : config.output_height.Value();
576 dst_params.is_tiled = config.input_linear != config.dont_swizzle;
577 dst_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.output_format);
578
579 MathUtil::Rectangle<int> src_rect;
580 CachedSurface* src_surface = res_cache.GetSurfaceRect(src_params, false, true, src_rect);
581
582 if (src_surface == nullptr) {
583 return false;
584 }
508 585
509 // If source memory region overlaps 3DS framebuffers, commit them before the copy happens 586 // Require destination surface to have same resolution scale as source to preserve scaling
510 if (MathUtil::IntervalsIntersect(addr, size, cached_fb_color_addr, cached_fb_color_size)) 587 dst_params.res_scale_width = src_surface->res_scale_width;
511 CommitColorBuffer(); 588 dst_params.res_scale_height = src_surface->res_scale_height;
512 589
513 if (MathUtil::IntervalsIntersect(addr, size, cached_fb_depth_addr, cached_fb_depth_size)) 590 MathUtil::Rectangle<int> dst_rect;
514 CommitDepthBuffer(); 591 CachedSurface* dst_surface = res_cache.GetSurfaceRect(dst_params, true, false, dst_rect);
592
593 if (dst_surface == nullptr) {
594 return false;
595 }
596
597 // Don't accelerate if the src and dst surfaces are the same
598 if (src_surface == dst_surface) {
599 return false;
600 }
601
602 if (config.flip_vertically) {
603 std::swap(dst_rect.top, dst_rect.bottom);
604 }
605
606 if (!res_cache.TryBlitSurfaces(src_surface, src_rect, dst_surface, dst_rect)) {
607 return false;
608 }
609
610 u32 dst_size = dst_params.width * dst_params.height * CachedSurface::GetFormatBpp(dst_params.pixel_format) / 8;
611 dst_surface->dirty = true;
612 res_cache.FlushRegion(config.GetPhysicalOutputAddress(), dst_size, dst_surface, true);
613 return true;
515} 614}
516 615
517void RasterizerOpenGL::InvalidateRegion(PAddr addr, u32 size) { 616bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) {
518 const auto& regs = Pica::g_state.regs; 617 using PixelFormat = CachedSurface::PixelFormat;
618 using SurfaceType = CachedSurface::SurfaceType;
619
620 CachedSurface* dst_surface = res_cache.TryGetFillSurface(config);
621
622 if (dst_surface == nullptr) {
623 return false;
624 }
625
626 OpenGLState cur_state = OpenGLState::GetCurState();
627
628 SurfaceType dst_type = CachedSurface::GetFormatType(dst_surface->pixel_format);
629
630 GLuint old_fb = cur_state.draw.draw_framebuffer;
631 cur_state.draw.draw_framebuffer = framebuffer.handle;
632 // TODO: When scissor test is implemented, need to disable scissor test in cur_state here so Clear call isn't affected
633 cur_state.Apply();
634
635 if (dst_type == SurfaceType::Color || dst_type == SurfaceType::Texture) {
636 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_surface->texture.handle, 0);
637 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
638
639 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
640 return false;
641 }
642
643 GLfloat color_values[4] = {0.0f, 0.0f, 0.0f, 0.0f};
644
645 // TODO: Handle additional pixel format and fill value size combinations to accelerate more cases
646 // For instance, checking if fill value's bytes/bits repeat to allow filling I8/A8/I4/A4/...
647 // Currently only handles formats that are multiples of the fill value size
648
649 if (config.fill_24bit) {
650 switch (dst_surface->pixel_format) {
651 case PixelFormat::RGB8:
652 color_values[0] = config.value_24bit_r / 255.0f;
653 color_values[1] = config.value_24bit_g / 255.0f;
654 color_values[2] = config.value_24bit_b / 255.0f;
655 break;
656 default:
657 return false;
658 }
659 } else if (config.fill_32bit) {
660 u32 value = config.value_32bit;
661
662 switch (dst_surface->pixel_format) {
663 case PixelFormat::RGBA8:
664 color_values[0] = (value >> 24) / 255.0f;
665 color_values[1] = ((value >> 16) & 0xFF) / 255.0f;
666 color_values[2] = ((value >> 8) & 0xFF) / 255.0f;
667 color_values[3] = (value & 0xFF) / 255.0f;
668 break;
669 default:
670 return false;
671 }
672 } else {
673 u16 value_16bit = config.value_16bit.Value();
674 Math::Vec4<u8> color;
675
676 switch (dst_surface->pixel_format) {
677 case PixelFormat::RGBA8:
678 color_values[0] = (value_16bit >> 8) / 255.0f;
679 color_values[1] = (value_16bit & 0xFF) / 255.0f;
680 color_values[2] = color_values[0];
681 color_values[3] = color_values[1];
682 break;
683 case PixelFormat::RGB5A1:
684 color = Color::DecodeRGB5A1((const u8*)&value_16bit);
685 color_values[0] = color[0] / 31.0f;
686 color_values[1] = color[1] / 31.0f;
687 color_values[2] = color[2] / 31.0f;
688 color_values[3] = color[3];
689 break;
690 case PixelFormat::RGB565:
691 color = Color::DecodeRGB565((const u8*)&value_16bit);
692 color_values[0] = color[0] / 31.0f;
693 color_values[1] = color[1] / 63.0f;
694 color_values[2] = color[2] / 31.0f;
695 break;
696 case PixelFormat::RGBA4:
697 color = Color::DecodeRGBA4((const u8*)&value_16bit);
698 color_values[0] = color[0] / 15.0f;
699 color_values[1] = color[1] / 15.0f;
700 color_values[2] = color[2] / 15.0f;
701 color_values[3] = color[3] / 15.0f;
702 break;
703 case PixelFormat::IA8:
704 case PixelFormat::RG8:
705 color_values[0] = (value_16bit >> 8) / 255.0f;
706 color_values[1] = (value_16bit & 0xFF) / 255.0f;
707 break;
708 default:
709 return false;
710 }
711 }
712
713 cur_state.color_mask.red_enabled = true;
714 cur_state.color_mask.green_enabled = true;
715 cur_state.color_mask.blue_enabled = true;
716 cur_state.color_mask.alpha_enabled = true;
717 cur_state.Apply();
718 glClearBufferfv(GL_COLOR, 0, color_values);
719 } else if (dst_type == SurfaceType::Depth) {
720 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
721 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_surface->texture.handle, 0);
722 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
723
724 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
725 return false;
726 }
727
728 GLfloat value_float;
729 if (dst_surface->pixel_format == CachedSurface::PixelFormat::D16) {
730 value_float = config.value_32bit / 65535.0f; // 2^16 - 1
731 } else if (dst_surface->pixel_format == CachedSurface::PixelFormat::D24) {
732 value_float = config.value_32bit / 16777215.0f; // 2^24 - 1
733 }
734
735 cur_state.depth.write_mask = true;
736 cur_state.Apply();
737 glClearBufferfv(GL_DEPTH, 0, &value_float);
738 } else if (dst_type == SurfaceType::DepthStencil) {
739 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
740 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, dst_surface->texture.handle, 0);
519 741
520 u32 cached_fb_color_size = Pica::Regs::BytesPerColorPixel(fb_color_texture.format) 742 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
521 * fb_color_texture.width * fb_color_texture.height; 743 return false;
744 }
522 745
523 u32 cached_fb_depth_size = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format) 746 GLfloat value_float = (config.value_32bit & 0xFFFFFF) / 16777215.0f; // 2^24 - 1
524 * fb_depth_texture.width * fb_depth_texture.height; 747 GLint value_int = (config.value_32bit >> 24);
525 748
526 // If modified memory region overlaps 3DS framebuffers, reload their contents into OpenGL 749 cur_state.depth.write_mask = true;
527 if (MathUtil::IntervalsIntersect(addr, size, cached_fb_color_addr, cached_fb_color_size)) 750 cur_state.stencil.write_mask = true;
528 ReloadColorBuffer(); 751 cur_state.Apply();
752 glClearBufferfi(GL_DEPTH_STENCIL, 0, value_float, value_int);
753 }
529 754
530 if (MathUtil::IntervalsIntersect(addr, size, cached_fb_depth_addr, cached_fb_depth_size)) 755 cur_state.draw.draw_framebuffer = old_fb;
531 ReloadDepthBuffer(); 756 // TODO: Return scissor test to previous value when scissor test is implemented
757 cur_state.Apply();
532 758
533 // Notify cache of flush in case the region touches a cached resource 759 dst_surface->dirty = true;
534 res_cache.InvalidateInRange(addr, size); 760 res_cache.FlushRegion(dst_surface->addr, dst_surface->size, dst_surface, true);
761 return true;
762}
763
764bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) {
765 if (framebuffer_addr == 0) {
766 return false;
767 }
768
769 CachedSurface src_params;
770 src_params.addr = framebuffer_addr;
771 src_params.width = config.width;
772 src_params.height = config.height;
773 src_params.stride = pixel_stride;
774 src_params.is_tiled = false;
775 src_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.color_format);
776
777 MathUtil::Rectangle<int> src_rect;
778 CachedSurface* src_surface = res_cache.GetSurfaceRect(src_params, false, true, src_rect);
779
780 if (src_surface == nullptr) {
781 return false;
782 }
783
784 u32 scaled_width = src_surface->GetScaledWidth();
785 u32 scaled_height = src_surface->GetScaledHeight();
786
787 screen_info.display_texcoords = MathUtil::Rectangle<float>((float)src_rect.top / (float)scaled_height,
788 (float)src_rect.left / (float)scaled_width,
789 (float)src_rect.bottom / (float)scaled_height,
790 (float)src_rect.right / (float)scaled_width);
791
792 screen_info.display_texture = src_surface->texture.handle;
793
794 return true;
535} 795}
536 796
537void RasterizerOpenGL::SamplerInfo::Create() { 797void RasterizerOpenGL::SamplerInfo::Create() {
@@ -567,114 +827,13 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Pica::Regs::TextureConf
567 827
568 if (wrap_s == TextureConfig::ClampToBorder || wrap_t == TextureConfig::ClampToBorder) { 828 if (wrap_s == TextureConfig::ClampToBorder || wrap_t == TextureConfig::ClampToBorder) {
569 if (border_color != config.border_color.raw) { 829 if (border_color != config.border_color.raw) {
830 border_color = config.border_color.raw;
570 auto gl_color = PicaToGL::ColorRGBA8(border_color); 831 auto gl_color = PicaToGL::ColorRGBA8(border_color);
571 glSamplerParameterfv(s, GL_TEXTURE_BORDER_COLOR, gl_color.data()); 832 glSamplerParameterfv(s, GL_TEXTURE_BORDER_COLOR, gl_color.data());
572 } 833 }
573 } 834 }
574} 835}
575 836
576void RasterizerOpenGL::ReconfigureColorTexture(TextureInfo& texture, Pica::Regs::ColorFormat format, u32 width, u32 height) {
577 GLint internal_format;
578
579 texture.format = format;
580 texture.width = width;
581 texture.height = height;
582
583 switch (format) {
584 case Pica::Regs::ColorFormat::RGBA8:
585 internal_format = GL_RGBA;
586 texture.gl_format = GL_RGBA;
587 texture.gl_type = GL_UNSIGNED_INT_8_8_8_8;
588 break;
589
590 case Pica::Regs::ColorFormat::RGB8:
591 // This pixel format uses BGR since GL_UNSIGNED_BYTE specifies byte-order, unlike every
592 // specific OpenGL type used in this function using native-endian (that is, little-endian
593 // mostly everywhere) for words or half-words.
594 // TODO: check how those behave on big-endian processors.
595 internal_format = GL_RGB;
596 texture.gl_format = GL_BGR;
597 texture.gl_type = GL_UNSIGNED_BYTE;
598 break;
599
600 case Pica::Regs::ColorFormat::RGB5A1:
601 internal_format = GL_RGBA;
602 texture.gl_format = GL_RGBA;
603 texture.gl_type = GL_UNSIGNED_SHORT_5_5_5_1;
604 break;
605
606 case Pica::Regs::ColorFormat::RGB565:
607 internal_format = GL_RGB;
608 texture.gl_format = GL_RGB;
609 texture.gl_type = GL_UNSIGNED_SHORT_5_6_5;
610 break;
611
612 case Pica::Regs::ColorFormat::RGBA4:
613 internal_format = GL_RGBA;
614 texture.gl_format = GL_RGBA;
615 texture.gl_type = GL_UNSIGNED_SHORT_4_4_4_4;
616 break;
617
618 default:
619 LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer texture color format %x", format);
620 UNIMPLEMENTED();
621 break;
622 }
623
624 state.texture_units[0].texture_2d = texture.texture.handle;
625 state.Apply();
626
627 glActiveTexture(GL_TEXTURE0);
628 glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0,
629 texture.gl_format, texture.gl_type, nullptr);
630
631 state.texture_units[0].texture_2d = 0;
632 state.Apply();
633}
634
635void RasterizerOpenGL::ReconfigureDepthTexture(DepthTextureInfo& texture, Pica::Regs::DepthFormat format, u32 width, u32 height) {
636 GLint internal_format;
637
638 texture.format = format;
639 texture.width = width;
640 texture.height = height;
641
642 switch (format) {
643 case Pica::Regs::DepthFormat::D16:
644 internal_format = GL_DEPTH_COMPONENT16;
645 texture.gl_format = GL_DEPTH_COMPONENT;
646 texture.gl_type = GL_UNSIGNED_SHORT;
647 break;
648
649 case Pica::Regs::DepthFormat::D24:
650 internal_format = GL_DEPTH_COMPONENT24;
651 texture.gl_format = GL_DEPTH_COMPONENT;
652 texture.gl_type = GL_UNSIGNED_INT;
653 break;
654
655 case Pica::Regs::DepthFormat::D24S8:
656 internal_format = GL_DEPTH24_STENCIL8;
657 texture.gl_format = GL_DEPTH_STENCIL;
658 texture.gl_type = GL_UNSIGNED_INT_24_8;
659 break;
660
661 default:
662 LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer texture depth format %x", format);
663 UNIMPLEMENTED();
664 break;
665 }
666
667 state.texture_units[0].texture_2d = texture.texture.handle;
668 state.Apply();
669
670 glActiveTexture(GL_TEXTURE0);
671 glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0,
672 texture.gl_format, texture.gl_type, nullptr);
673
674 state.texture_units[0].texture_2d = 0;
675 state.Apply();
676}
677
678void RasterizerOpenGL::SetShader() { 837void RasterizerOpenGL::SetShader() {
679 PicaShaderConfig config = PicaShaderConfig::CurrentConfig(); 838 PicaShaderConfig config = PicaShaderConfig::CurrentConfig();
680 std::unique_ptr<PicaShader> shader = std::make_unique<PicaShader>(); 839 std::unique_ptr<PicaShader> shader = std::make_unique<PicaShader>();
@@ -722,6 +881,8 @@ void RasterizerOpenGL::SetShader() {
722 glUniformBlockBinding(current_shader->shader.handle, block_index, 0); 881 glUniformBlockBinding(current_shader->shader.handle, block_index, 0);
723 882
724 // Update uniforms 883 // Update uniforms
884 SyncDepthScale();
885 SyncDepthOffset();
725 SyncAlphaTest(); 886 SyncAlphaTest();
726 SyncCombinerColor(); 887 SyncCombinerColor();
727 auto& tev_stages = Pica::g_state.regs.GetTevStages(); 888 auto& tev_stages = Pica::g_state.regs.GetTevStages();
@@ -730,6 +891,8 @@ void RasterizerOpenGL::SetShader() {
730 891
731 SyncGlobalAmbient(); 892 SyncGlobalAmbient();
732 for (int light_index = 0; light_index < 8; light_index++) { 893 for (int light_index = 0; light_index < 8; light_index++) {
894 SyncLightSpecular0(light_index);
895 SyncLightSpecular1(light_index);
733 SyncLightDiffuse(light_index); 896 SyncLightDiffuse(light_index);
734 SyncLightAmbient(light_index); 897 SyncLightAmbient(light_index);
735 SyncLightPosition(light_index); 898 SyncLightPosition(light_index);
@@ -737,83 +900,6 @@ void RasterizerOpenGL::SetShader() {
737 } 900 }
738} 901}
739 902
740void RasterizerOpenGL::SyncFramebuffer() {
741 const auto& regs = Pica::g_state.regs;
742
743 PAddr new_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress();
744 Pica::Regs::ColorFormat new_fb_color_format = regs.framebuffer.color_format;
745
746 PAddr new_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress();
747 Pica::Regs::DepthFormat new_fb_depth_format = regs.framebuffer.depth_format;
748
749 bool fb_size_changed = fb_color_texture.width != static_cast<GLsizei>(regs.framebuffer.GetWidth()) ||
750 fb_color_texture.height != static_cast<GLsizei>(regs.framebuffer.GetHeight());
751
752 bool color_fb_prop_changed = fb_color_texture.format != new_fb_color_format ||
753 fb_size_changed;
754
755 bool depth_fb_prop_changed = fb_depth_texture.format != new_fb_depth_format ||
756 fb_size_changed;
757
758 bool color_fb_modified = cached_fb_color_addr != new_fb_color_addr ||
759 color_fb_prop_changed;
760
761 bool depth_fb_modified = cached_fb_depth_addr != new_fb_depth_addr ||
762 depth_fb_prop_changed;
763
764 // Commit if framebuffer modified in any way
765 if (color_fb_modified)
766 CommitColorBuffer();
767
768 if (depth_fb_modified)
769 CommitDepthBuffer();
770
771 // Reconfigure framebuffer textures if any property has changed
772 if (color_fb_prop_changed) {
773 ReconfigureColorTexture(fb_color_texture, new_fb_color_format,
774 regs.framebuffer.GetWidth(), regs.framebuffer.GetHeight());
775 }
776
777 if (depth_fb_prop_changed) {
778 ReconfigureDepthTexture(fb_depth_texture, new_fb_depth_format,
779 regs.framebuffer.GetWidth(), regs.framebuffer.GetHeight());
780
781 // Only attach depth buffer as stencil if it supports stencil
782 switch (new_fb_depth_format) {
783 case Pica::Regs::DepthFormat::D16:
784 case Pica::Regs::DepthFormat::D24:
785 glFramebufferTexture2D(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
786 break;
787
788 case Pica::Regs::DepthFormat::D24S8:
789 glFramebufferTexture2D(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, fb_depth_texture.texture.handle, 0);
790 break;
791
792 default:
793 LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer depth format %x", new_fb_depth_format);
794 UNIMPLEMENTED();
795 break;
796 }
797 }
798
799 // Load buffer data again if fb modified in any way
800 if (color_fb_modified) {
801 cached_fb_color_addr = new_fb_color_addr;
802
803 ReloadColorBuffer();
804 }
805
806 if (depth_fb_modified) {
807 cached_fb_depth_addr = new_fb_depth_addr;
808
809 ReloadDepthBuffer();
810 }
811
812 GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER);
813 ASSERT_MSG(status == GL_FRAMEBUFFER_COMPLETE,
814 "OpenGL rasterizer framebuffer setup failed, status %X", status);
815}
816
817void RasterizerOpenGL::SyncCullMode() { 903void RasterizerOpenGL::SyncCullMode() {
818 const auto& regs = Pica::g_state.regs; 904 const auto& regs = Pica::g_state.regs;
819 905
@@ -839,13 +925,20 @@ void RasterizerOpenGL::SyncCullMode() {
839 } 925 }
840} 926}
841 927
842void RasterizerOpenGL::SyncDepthModifiers() { 928void RasterizerOpenGL::SyncDepthScale() {
843 float depth_scale = -Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_range).ToFloat32(); 929 float depth_scale = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_range).ToFloat32();
844 float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_far_plane).ToFloat32() / 2.0f; 930 if (depth_scale != uniform_block_data.data.depth_scale) {
931 uniform_block_data.data.depth_scale = depth_scale;
932 uniform_block_data.dirty = true;
933 }
934}
845 935
846 // TODO: Implement scale modifier 936void RasterizerOpenGL::SyncDepthOffset() {
847 uniform_block_data.data.depth_offset = depth_offset; 937 float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_near_plane).ToFloat32();
848 uniform_block_data.dirty = true; 938 if (depth_offset != uniform_block_data.data.depth_offset) {
939 uniform_block_data.data.depth_offset = depth_offset;
940 uniform_block_data.dirty = true;
941 }
849} 942}
850 943
851void RasterizerOpenGL::SyncBlendEnabled() { 944void RasterizerOpenGL::SyncBlendEnabled() {
@@ -854,6 +947,8 @@ void RasterizerOpenGL::SyncBlendEnabled() {
854 947
855void RasterizerOpenGL::SyncBlendFuncs() { 948void RasterizerOpenGL::SyncBlendFuncs() {
856 const auto& regs = Pica::g_state.regs; 949 const auto& regs = Pica::g_state.regs;
950 state.blend.rgb_equation = PicaToGL::BlendEquation(regs.output_merger.alpha_blending.blend_equation_rgb);
951 state.blend.a_equation = PicaToGL::BlendEquation(regs.output_merger.alpha_blending.blend_equation_a);
857 state.blend.src_rgb_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_rgb); 952 state.blend.src_rgb_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_rgb);
858 state.blend.dst_rgb_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_dest_rgb); 953 state.blend.dst_rgb_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_dest_rgb);
859 state.blend.src_a_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_a); 954 state.blend.src_a_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_a);
@@ -880,13 +975,39 @@ void RasterizerOpenGL::SyncLogicOp() {
880 state.logic_op = PicaToGL::LogicOp(Pica::g_state.regs.output_merger.logic_op); 975 state.logic_op = PicaToGL::LogicOp(Pica::g_state.regs.output_merger.logic_op);
881} 976}
882 977
978void RasterizerOpenGL::SyncColorWriteMask() {
979 const auto& regs = Pica::g_state.regs;
980
981 auto IsColorWriteEnabled = [&](u32 value) {
982 return (regs.framebuffer.allow_color_write != 0 && value != 0) ? GL_TRUE : GL_FALSE;
983 };
984
985 state.color_mask.red_enabled = IsColorWriteEnabled(regs.output_merger.red_enable);
986 state.color_mask.green_enabled = IsColorWriteEnabled(regs.output_merger.green_enable);
987 state.color_mask.blue_enabled = IsColorWriteEnabled(regs.output_merger.blue_enable);
988 state.color_mask.alpha_enabled = IsColorWriteEnabled(regs.output_merger.alpha_enable);
989}
990
991void RasterizerOpenGL::SyncStencilWriteMask() {
992 const auto& regs = Pica::g_state.regs;
993 state.stencil.write_mask = (regs.framebuffer.allow_depth_stencil_write != 0)
994 ? static_cast<GLuint>(regs.output_merger.stencil_test.write_mask)
995 : 0;
996}
997
998void RasterizerOpenGL::SyncDepthWriteMask() {
999 const auto& regs = Pica::g_state.regs;
1000 state.depth.write_mask = (regs.framebuffer.allow_depth_stencil_write != 0 && regs.output_merger.depth_write_enable)
1001 ? GL_TRUE
1002 : GL_FALSE;
1003}
1004
883void RasterizerOpenGL::SyncStencilTest() { 1005void RasterizerOpenGL::SyncStencilTest() {
884 const auto& regs = Pica::g_state.regs; 1006 const auto& regs = Pica::g_state.regs;
885 state.stencil.test_enabled = regs.output_merger.stencil_test.enable && regs.framebuffer.depth_format == Pica::Regs::DepthFormat::D24S8; 1007 state.stencil.test_enabled = regs.output_merger.stencil_test.enable && regs.framebuffer.depth_format == Pica::Regs::DepthFormat::D24S8;
886 state.stencil.test_func = PicaToGL::CompareFunc(regs.output_merger.stencil_test.func); 1008 state.stencil.test_func = PicaToGL::CompareFunc(regs.output_merger.stencil_test.func);
887 state.stencil.test_ref = regs.output_merger.stencil_test.reference_value; 1009 state.stencil.test_ref = regs.output_merger.stencil_test.reference_value;
888 state.stencil.test_mask = regs.output_merger.stencil_test.input_mask; 1010 state.stencil.test_mask = regs.output_merger.stencil_test.input_mask;
889 state.stencil.write_mask = regs.output_merger.stencil_test.write_mask;
890 state.stencil.action_stencil_fail = PicaToGL::StencilOp(regs.output_merger.stencil_test.action_stencil_fail); 1011 state.stencil.action_stencil_fail = PicaToGL::StencilOp(regs.output_merger.stencil_test.action_stencil_fail);
891 state.stencil.action_depth_fail = PicaToGL::StencilOp(regs.output_merger.stencil_test.action_depth_fail); 1012 state.stencil.action_depth_fail = PicaToGL::StencilOp(regs.output_merger.stencil_test.action_depth_fail);
892 state.stencil.action_depth_pass = PicaToGL::StencilOp(regs.output_merger.stencil_test.action_depth_pass); 1013 state.stencil.action_depth_pass = PicaToGL::StencilOp(regs.output_merger.stencil_test.action_depth_pass);
@@ -898,11 +1019,6 @@ void RasterizerOpenGL::SyncDepthTest() {
898 regs.output_merger.depth_write_enable == 1; 1019 regs.output_merger.depth_write_enable == 1;
899 state.depth.test_func = regs.output_merger.depth_test_enable == 1 ? 1020 state.depth.test_func = regs.output_merger.depth_test_enable == 1 ?
900 PicaToGL::CompareFunc(regs.output_merger.depth_test_func) : GL_ALWAYS; 1021 PicaToGL::CompareFunc(regs.output_merger.depth_test_func) : GL_ALWAYS;
901 state.color_mask.red_enabled = regs.output_merger.red_enable;
902 state.color_mask.green_enabled = regs.output_merger.green_enable;
903 state.color_mask.blue_enabled = regs.output_merger.blue_enable;
904 state.color_mask.alpha_enabled = regs.output_merger.alpha_enable;
905 state.depth.write_mask = regs.output_merger.depth_write_enable ? GL_TRUE : GL_FALSE;
906} 1022}
907 1023
908void RasterizerOpenGL::SyncCombinerColor() { 1024void RasterizerOpenGL::SyncCombinerColor() {
@@ -989,229 +1105,3 @@ void RasterizerOpenGL::SyncLightPosition(int light_index) {
989 uniform_block_data.dirty = true; 1105 uniform_block_data.dirty = true;
990 } 1106 }
991} 1107}
992
993void RasterizerOpenGL::SyncDrawState() {
994 const auto& regs = Pica::g_state.regs;
995
996 // Sync the viewport
997 GLsizei viewport_width = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_x).ToFloat32() * 2;
998 GLsizei viewport_height = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_y).ToFloat32() * 2;
999
1000 // OpenGL uses different y coordinates, so negate corner offset and flip origin
1001 // TODO: Ensure viewport_corner.x should not be negated or origin flipped
1002 // TODO: Use floating-point viewports for accuracy if supported
1003 glViewport((GLsizei)regs.viewport_corner.x,
1004 (GLsizei)regs.viewport_corner.y,
1005 viewport_width, viewport_height);
1006
1007 // Sync bound texture(s), upload if not cached
1008 const auto pica_textures = regs.GetTextures();
1009 for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) {
1010 const auto& texture = pica_textures[texture_index];
1011
1012 if (texture.enabled) {
1013 texture_samplers[texture_index].SyncWithConfig(texture.config);
1014 res_cache.LoadAndBindTexture(state, texture_index, texture);
1015 } else {
1016 state.texture_units[texture_index].texture_2d = 0;
1017 }
1018 }
1019
1020 state.draw.uniform_buffer = uniform_buffer.handle;
1021 state.Apply();
1022}
1023
1024MICROPROFILE_DEFINE(OpenGL_FramebufferReload, "OpenGL", "FB Reload", MP_RGB(70, 70, 200));
1025
1026void RasterizerOpenGL::ReloadColorBuffer() {
1027 u8* color_buffer = Memory::GetPhysicalPointer(cached_fb_color_addr);
1028
1029 if (color_buffer == nullptr)
1030 return;
1031
1032 MICROPROFILE_SCOPE(OpenGL_FramebufferReload);
1033
1034 u32 bytes_per_pixel = Pica::Regs::BytesPerColorPixel(fb_color_texture.format);
1035
1036 std::unique_ptr<u8[]> temp_fb_color_buffer(new u8[fb_color_texture.width * fb_color_texture.height * bytes_per_pixel]);
1037
1038 // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion is necessary.
1039 for (int y = 0; y < fb_color_texture.height; ++y) {
1040 for (int x = 0; x < fb_color_texture.width; ++x) {
1041 const u32 coarse_y = y & ~7;
1042 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_color_texture.width * bytes_per_pixel;
1043 u32 gl_pixel_index = (x + (fb_color_texture.height - 1 - y) * fb_color_texture.width) * bytes_per_pixel;
1044
1045 u8* pixel = color_buffer + dst_offset;
1046 memcpy(&temp_fb_color_buffer[gl_pixel_index], pixel, bytes_per_pixel);
1047 }
1048 }
1049
1050 state.texture_units[0].texture_2d = fb_color_texture.texture.handle;
1051 state.Apply();
1052
1053 glActiveTexture(GL_TEXTURE0);
1054 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, fb_color_texture.width, fb_color_texture.height,
1055 fb_color_texture.gl_format, fb_color_texture.gl_type, temp_fb_color_buffer.get());
1056
1057 state.texture_units[0].texture_2d = 0;
1058 state.Apply();
1059}
1060
1061void RasterizerOpenGL::ReloadDepthBuffer() {
1062 if (cached_fb_depth_addr == 0)
1063 return;
1064
1065 // TODO: Appears to work, but double-check endianness of depth values and order of depth-stencil
1066 u8* depth_buffer = Memory::GetPhysicalPointer(cached_fb_depth_addr);
1067
1068 if (depth_buffer == nullptr)
1069 return;
1070
1071 MICROPROFILE_SCOPE(OpenGL_FramebufferReload);
1072
1073 u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format);
1074
1075 // OpenGL needs 4 bpp alignment for D24
1076 u32 gl_bpp = bytes_per_pixel == 3 ? 4 : bytes_per_pixel;
1077
1078 std::unique_ptr<u8[]> temp_fb_depth_buffer(new u8[fb_depth_texture.width * fb_depth_texture.height * gl_bpp]);
1079
1080 u8* temp_fb_depth_data = bytes_per_pixel == 3 ? (temp_fb_depth_buffer.get() + 1) : temp_fb_depth_buffer.get();
1081
1082 if (fb_depth_texture.format == Pica::Regs::DepthFormat::D24S8) {
1083 for (int y = 0; y < fb_depth_texture.height; ++y) {
1084 for (int x = 0; x < fb_depth_texture.width; ++x) {
1085 const u32 coarse_y = y & ~7;
1086 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel;
1087 u32 gl_pixel_index = (x + (fb_depth_texture.height - 1 - y) * fb_depth_texture.width);
1088
1089 u8* pixel = depth_buffer + dst_offset;
1090 u32 depth_stencil = *(u32*)pixel;
1091 ((u32*)temp_fb_depth_data)[gl_pixel_index] = (depth_stencil << 8) | (depth_stencil >> 24);
1092 }
1093 }
1094 } else {
1095 for (int y = 0; y < fb_depth_texture.height; ++y) {
1096 for (int x = 0; x < fb_depth_texture.width; ++x) {
1097 const u32 coarse_y = y & ~7;
1098 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel;
1099 u32 gl_pixel_index = (x + (fb_depth_texture.height - 1 - y) * fb_depth_texture.width) * gl_bpp;
1100
1101 u8* pixel = depth_buffer + dst_offset;
1102 memcpy(&temp_fb_depth_data[gl_pixel_index], pixel, bytes_per_pixel);
1103 }
1104 }
1105 }
1106
1107 state.texture_units[0].texture_2d = fb_depth_texture.texture.handle;
1108 state.Apply();
1109
1110 glActiveTexture(GL_TEXTURE0);
1111 if (fb_depth_texture.format == Pica::Regs::DepthFormat::D24S8) {
1112 // TODO(Subv): There is a bug with Intel Windows drivers that makes glTexSubImage2D not change the stencil buffer.
1113 // The bug has been reported to Intel (https://communities.intel.com/message/324464)
1114 glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH24_STENCIL8, fb_depth_texture.width, fb_depth_texture.height, 0,
1115 GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, temp_fb_depth_buffer.get());
1116 } else {
1117 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, fb_depth_texture.width, fb_depth_texture.height,
1118 fb_depth_texture.gl_format, fb_depth_texture.gl_type, temp_fb_depth_buffer.get());
1119 }
1120
1121 state.texture_units[0].texture_2d = 0;
1122 state.Apply();
1123}
1124
1125Common::Profiling::TimingCategory buffer_commit_category("Framebuffer Commit");
1126MICROPROFILE_DEFINE(OpenGL_FramebufferCommit, "OpenGL", "FB Commit", MP_RGB(70, 70, 200));
1127
1128void RasterizerOpenGL::CommitColorBuffer() {
1129 if (cached_fb_color_addr != 0) {
1130 u8* color_buffer = Memory::GetPhysicalPointer(cached_fb_color_addr);
1131
1132 if (color_buffer != nullptr) {
1133 Common::Profiling::ScopeTimer timer(buffer_commit_category);
1134 MICROPROFILE_SCOPE(OpenGL_FramebufferCommit);
1135
1136 u32 bytes_per_pixel = Pica::Regs::BytesPerColorPixel(fb_color_texture.format);
1137
1138 std::unique_ptr<u8[]> temp_gl_color_buffer(new u8[fb_color_texture.width * fb_color_texture.height * bytes_per_pixel]);
1139
1140 state.texture_units[0].texture_2d = fb_color_texture.texture.handle;
1141 state.Apply();
1142
1143 glActiveTexture(GL_TEXTURE0);
1144 glGetTexImage(GL_TEXTURE_2D, 0, fb_color_texture.gl_format, fb_color_texture.gl_type, temp_gl_color_buffer.get());
1145
1146 state.texture_units[0].texture_2d = 0;
1147 state.Apply();
1148
1149 // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion is necessary.
1150 for (int y = 0; y < fb_color_texture.height; ++y) {
1151 for (int x = 0; x < fb_color_texture.width; ++x) {
1152 const u32 coarse_y = y & ~7;
1153 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_color_texture.width * bytes_per_pixel;
1154 u32 gl_pixel_index = x * bytes_per_pixel + (fb_color_texture.height - 1 - y) * fb_color_texture.width * bytes_per_pixel;
1155
1156 u8* pixel = color_buffer + dst_offset;
1157 memcpy(pixel, &temp_gl_color_buffer[gl_pixel_index], bytes_per_pixel);
1158 }
1159 }
1160 }
1161 }
1162}
1163
1164void RasterizerOpenGL::CommitDepthBuffer() {
1165 if (cached_fb_depth_addr != 0) {
1166 // TODO: Output seems correct visually, but doesn't quite match sw renderer output. One of them is wrong.
1167 u8* depth_buffer = Memory::GetPhysicalPointer(cached_fb_depth_addr);
1168
1169 if (depth_buffer != nullptr) {
1170 Common::Profiling::ScopeTimer timer(buffer_commit_category);
1171 MICROPROFILE_SCOPE(OpenGL_FramebufferCommit);
1172
1173 u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format);
1174
1175 // OpenGL needs 4 bpp alignment for D24
1176 u32 gl_bpp = bytes_per_pixel == 3 ? 4 : bytes_per_pixel;
1177
1178 std::unique_ptr<u8[]> temp_gl_depth_buffer(new u8[fb_depth_texture.width * fb_depth_texture.height * gl_bpp]);
1179
1180 state.texture_units[0].texture_2d = fb_depth_texture.texture.handle;
1181 state.Apply();
1182
1183 glActiveTexture(GL_TEXTURE0);
1184 glGetTexImage(GL_TEXTURE_2D, 0, fb_depth_texture.gl_format, fb_depth_texture.gl_type, temp_gl_depth_buffer.get());
1185
1186 state.texture_units[0].texture_2d = 0;
1187 state.Apply();
1188
1189 u8* temp_gl_depth_data = bytes_per_pixel == 3 ? (temp_gl_depth_buffer.get() + 1) : temp_gl_depth_buffer.get();
1190
1191 if (fb_depth_texture.format == Pica::Regs::DepthFormat::D24S8) {
1192 for (int y = 0; y < fb_depth_texture.height; ++y) {
1193 for (int x = 0; x < fb_depth_texture.width; ++x) {
1194 const u32 coarse_y = y & ~7;
1195 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel;
1196 u32 gl_pixel_index = (x + (fb_depth_texture.height - 1 - y) * fb_depth_texture.width);
1197
1198 u8* pixel = depth_buffer + dst_offset;
1199 u32 depth_stencil = ((u32*)temp_gl_depth_data)[gl_pixel_index];
1200 *(u32*)pixel = (depth_stencil >> 8) | (depth_stencil << 24);
1201 }
1202 }
1203 } else {
1204 for (int y = 0; y < fb_depth_texture.height; ++y) {
1205 for (int x = 0; x < fb_depth_texture.width; ++x) {
1206 const u32 coarse_y = y & ~7;
1207 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel;
1208 u32 gl_pixel_index = (x + (fb_depth_texture.height - 1 - y) * fb_depth_texture.width) * gl_bpp;
1209
1210 u8* pixel = depth_buffer + dst_offset;
1211 memcpy(pixel, &temp_gl_depth_data[gl_pixel_index], bytes_per_pixel);
1212 }
1213 }
1214 }
1215 }
1216 }
1217}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index fc85aa3ff..d70369400 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -4,22 +4,33 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
7#include <cstddef> 8#include <cstddef>
8#include <cstring> 9#include <cstring>
9#include <memory> 10#include <memory>
10#include <vector> 11#include <vector>
11#include <unordered_map> 12#include <unordered_map>
12 13
14#include <glad/glad.h>
15
16#include "common/bit_field.h"
13#include "common/common_types.h" 17#include "common/common_types.h"
14#include "common/hash.h" 18#include "common/hash.h"
19#include "common/vector_math.h"
20
21#include "core/hw/gpu.h"
15 22
16#include "video_core/pica.h" 23#include "video_core/pica.h"
17#include "video_core/pica_state.h" 24#include "video_core/pica_state.h"
25#include "video_core/pica_types.h"
18#include "video_core/rasterizer_interface.h" 26#include "video_core/rasterizer_interface.h"
19#include "video_core/renderer_opengl/gl_rasterizer_cache.h" 27#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
28#include "video_core/renderer_opengl/gl_resource_manager.h"
20#include "video_core/renderer_opengl/gl_state.h" 29#include "video_core/renderer_opengl/gl_state.h"
21#include "video_core/renderer_opengl/pica_to_gl.h" 30#include "video_core/renderer_opengl/pica_to_gl.h"
22#include "video_core/shader/shader_interpreter.h" 31#include "video_core/shader/shader.h"
32
33struct ScreenInfo;
23 34
24/** 35/**
25 * This struct contains all state used to generate the GLSL shader program that emulates the current 36 * This struct contains all state used to generate the GLSL shader program that emulates the current
@@ -28,158 +39,185 @@
28 * directly accessing Pica registers. This should reduce the risk of bugs in shader generation where 39 * directly accessing Pica registers. This should reduce the risk of bugs in shader generation where
29 * Pica state is not being captured in the shader cache key, thereby resulting in (what should be) 40 * Pica state is not being captured in the shader cache key, thereby resulting in (what should be)
30 * two separate shaders sharing the same key. 41 * two separate shaders sharing the same key.
42 *
43 * We use a union because "implicitly-defined copy/move constructor for a union X copies the object representation of X."
44 * and "implicitly-defined copy assignment operator for a union X copies the object representation (3.9) of X."
45 * = Bytewise copy instead of memberwise copy.
46 * This is important because the padding bytes are included in the hash and comparison between objects.
31 */ 47 */
32struct PicaShaderConfig { 48union PicaShaderConfig {
49
33 /// Construct a PicaShaderConfig with the current Pica register configuration. 50 /// Construct a PicaShaderConfig with the current Pica register configuration.
34 static PicaShaderConfig CurrentConfig() { 51 static PicaShaderConfig CurrentConfig() {
35 PicaShaderConfig res; 52 PicaShaderConfig res;
53
54 auto& state = res.state;
55 std::memset(&state, 0, sizeof(PicaShaderConfig::State));
56
36 const auto& regs = Pica::g_state.regs; 57 const auto& regs = Pica::g_state.regs;
37 58
38 res.alpha_test_func = regs.output_merger.alpha_test.enable ? 59 state.depthmap_enable = regs.depthmap_enable;
60
61 state.alpha_test_func = regs.output_merger.alpha_test.enable ?
39 regs.output_merger.alpha_test.func.Value() : Pica::Regs::CompareFunc::Always; 62 regs.output_merger.alpha_test.func.Value() : Pica::Regs::CompareFunc::Always;
40 63
41 // Copy relevant TevStageConfig fields only. We're doing this manually (instead of calling 64 state.texture0_type = regs.texture0.type;
42 // the GetTevStages() function) because BitField explicitly disables copies. 65
43 66 // Copy relevant tev stages fields.
44 res.tev_stages[0].sources_raw = regs.tev_stage0.sources_raw; 67 // We don't sync const_color here because of the high variance, it is a
45 res.tev_stages[1].sources_raw = regs.tev_stage1.sources_raw; 68 // shader uniform instead.
46 res.tev_stages[2].sources_raw = regs.tev_stage2.sources_raw; 69 const auto& tev_stages = regs.GetTevStages();
47 res.tev_stages[3].sources_raw = regs.tev_stage3.sources_raw; 70 DEBUG_ASSERT(state.tev_stages.size() == tev_stages.size());
48 res.tev_stages[4].sources_raw = regs.tev_stage4.sources_raw; 71 for (size_t i = 0; i < tev_stages.size(); i++) {
49 res.tev_stages[5].sources_raw = regs.tev_stage5.sources_raw; 72 const auto& tev_stage = tev_stages[i];
50 73 state.tev_stages[i].sources_raw = tev_stage.sources_raw;
51 res.tev_stages[0].modifiers_raw = regs.tev_stage0.modifiers_raw; 74 state.tev_stages[i].modifiers_raw = tev_stage.modifiers_raw;
52 res.tev_stages[1].modifiers_raw = regs.tev_stage1.modifiers_raw; 75 state.tev_stages[i].ops_raw = tev_stage.ops_raw;
53 res.tev_stages[2].modifiers_raw = regs.tev_stage2.modifiers_raw; 76 state.tev_stages[i].scales_raw = tev_stage.scales_raw;
54 res.tev_stages[3].modifiers_raw = regs.tev_stage3.modifiers_raw; 77 }
55 res.tev_stages[4].modifiers_raw = regs.tev_stage4.modifiers_raw; 78
56 res.tev_stages[5].modifiers_raw = regs.tev_stage5.modifiers_raw; 79 state.combiner_buffer_input =
57
58 res.tev_stages[0].ops_raw = regs.tev_stage0.ops_raw;
59 res.tev_stages[1].ops_raw = regs.tev_stage1.ops_raw;
60 res.tev_stages[2].ops_raw = regs.tev_stage2.ops_raw;
61 res.tev_stages[3].ops_raw = regs.tev_stage3.ops_raw;
62 res.tev_stages[4].ops_raw = regs.tev_stage4.ops_raw;
63 res.tev_stages[5].ops_raw = regs.tev_stage5.ops_raw;
64
65 res.tev_stages[0].scales_raw = regs.tev_stage0.scales_raw;
66 res.tev_stages[1].scales_raw = regs.tev_stage1.scales_raw;
67 res.tev_stages[2].scales_raw = regs.tev_stage2.scales_raw;
68 res.tev_stages[3].scales_raw = regs.tev_stage3.scales_raw;
69 res.tev_stages[4].scales_raw = regs.tev_stage4.scales_raw;
70 res.tev_stages[5].scales_raw = regs.tev_stage5.scales_raw;
71
72 res.combiner_buffer_input =
73 regs.tev_combiner_buffer_input.update_mask_rgb.Value() | 80 regs.tev_combiner_buffer_input.update_mask_rgb.Value() |
74 regs.tev_combiner_buffer_input.update_mask_a.Value() << 4; 81 regs.tev_combiner_buffer_input.update_mask_a.Value() << 4;
75 82
76 // Fragment lighting 83 // Fragment lighting
77 84
78 res.lighting.enable = !regs.lighting.disable; 85 state.lighting.enable = !regs.lighting.disable;
79 res.lighting.src_num = regs.lighting.num_lights + 1; 86 state.lighting.src_num = regs.lighting.num_lights + 1;
80 87
81 for (unsigned light_index = 0; light_index < res.lighting.src_num; ++light_index) { 88 for (unsigned light_index = 0; light_index < state.lighting.src_num; ++light_index) {
82 unsigned num = regs.lighting.light_enable.GetNum(light_index); 89 unsigned num = regs.lighting.light_enable.GetNum(light_index);
83 const auto& light = regs.lighting.light[num]; 90 const auto& light = regs.lighting.light[num];
84 res.lighting.light[light_index].num = num; 91 state.lighting.light[light_index].num = num;
85 res.lighting.light[light_index].directional = light.directional != 0; 92 state.lighting.light[light_index].directional = light.directional != 0;
86 res.lighting.light[light_index].two_sided_diffuse = light.two_sided_diffuse != 0; 93 state.lighting.light[light_index].two_sided_diffuse = light.two_sided_diffuse != 0;
87 res.lighting.light[light_index].dist_atten_enable = !regs.lighting.IsDistAttenDisabled(num); 94 state.lighting.light[light_index].dist_atten_enable = !regs.lighting.IsDistAttenDisabled(num);
88 res.lighting.light[light_index].dist_atten_bias = Pica::float20::FromRaw(light.dist_atten_bias).ToFloat32(); 95 state.lighting.light[light_index].dist_atten_bias = Pica::float20::FromRaw(light.dist_atten_bias).ToFloat32();
89 res.lighting.light[light_index].dist_atten_scale = Pica::float20::FromRaw(light.dist_atten_scale).ToFloat32(); 96 state.lighting.light[light_index].dist_atten_scale = Pica::float20::FromRaw(light.dist_atten_scale).ToFloat32();
90 } 97 }
91 98
92 res.lighting.lut_d0.enable = regs.lighting.disable_lut_d0 == 0; 99 state.lighting.lut_d0.enable = regs.lighting.disable_lut_d0 == 0;
93 res.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0; 100 state.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0;
94 res.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value(); 101 state.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value();
95 res.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0); 102 state.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0);
96 103
97 res.lighting.lut_d1.enable = regs.lighting.disable_lut_d1 == 0; 104 state.lighting.lut_d1.enable = regs.lighting.disable_lut_d1 == 0;
98 res.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0; 105 state.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0;
99 res.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value(); 106 state.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value();
100 res.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1); 107 state.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1);
101 108
102 res.lighting.lut_fr.enable = regs.lighting.disable_lut_fr == 0; 109 state.lighting.lut_fr.enable = regs.lighting.disable_lut_fr == 0;
103 res.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0; 110 state.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0;
104 res.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value(); 111 state.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value();
105 res.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr); 112 state.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr);
106 113
107 res.lighting.lut_rr.enable = regs.lighting.disable_lut_rr == 0; 114 state.lighting.lut_rr.enable = regs.lighting.disable_lut_rr == 0;
108 res.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0; 115 state.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0;
109 res.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value(); 116 state.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value();
110 res.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr); 117 state.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr);
111 118
112 res.lighting.lut_rg.enable = regs.lighting.disable_lut_rg == 0; 119 state.lighting.lut_rg.enable = regs.lighting.disable_lut_rg == 0;
113 res.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0; 120 state.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0;
114 res.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value(); 121 state.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value();
115 res.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg); 122 state.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg);
116 123
117 res.lighting.lut_rb.enable = regs.lighting.disable_lut_rb == 0; 124 state.lighting.lut_rb.enable = regs.lighting.disable_lut_rb == 0;
118 res.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0; 125 state.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0;
119 res.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value(); 126 state.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value();
120 res.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb); 127 state.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb);
121 128
122 res.lighting.config = regs.lighting.config; 129 state.lighting.config = regs.lighting.config;
123 res.lighting.fresnel_selector = regs.lighting.fresnel_selector; 130 state.lighting.fresnel_selector = regs.lighting.fresnel_selector;
124 res.lighting.bump_mode = regs.lighting.bump_mode; 131 state.lighting.bump_mode = regs.lighting.bump_mode;
125 res.lighting.bump_selector = regs.lighting.bump_selector; 132 state.lighting.bump_selector = regs.lighting.bump_selector;
126 res.lighting.bump_renorm = regs.lighting.disable_bump_renorm == 0; 133 state.lighting.bump_renorm = regs.lighting.disable_bump_renorm == 0;
127 res.lighting.clamp_highlights = regs.lighting.clamp_highlights != 0; 134 state.lighting.clamp_highlights = regs.lighting.clamp_highlights != 0;
128 135
129 return res; 136 return res;
130 } 137 }
131 138
132 bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const { 139 bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const {
133 return (stage_index < 4) && (combiner_buffer_input & (1 << stage_index)); 140 return (stage_index < 4) && (state.combiner_buffer_input & (1 << stage_index));
134 } 141 }
135 142
136 bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const { 143 bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const {
137 return (stage_index < 4) && ((combiner_buffer_input >> 4) & (1 << stage_index)); 144 return (stage_index < 4) && ((state.combiner_buffer_input >> 4) & (1 << stage_index));
138 } 145 }
139 146
140 bool operator ==(const PicaShaderConfig& o) const { 147 bool operator ==(const PicaShaderConfig& o) const {
141 return std::memcmp(this, &o, sizeof(PicaShaderConfig)) == 0; 148 return std::memcmp(&state, &o.state, sizeof(PicaShaderConfig::State)) == 0;
142 }; 149 };
143 150
144 Pica::Regs::CompareFunc alpha_test_func = Pica::Regs::CompareFunc::Never; 151 // NOTE: MSVC15 (Update 2) doesn't think `delete`'d constructors and operators are TC.
145 std::array<Pica::Regs::TevStageConfig, 6> tev_stages = {}; 152 // This makes BitField not TC when used in a union or struct so we have to resort
146 u8 combiner_buffer_input = 0; 153 // to this ugly hack.
154 // Once that bug is fixed we can use Pica::Regs::TevStageConfig here.
155 // Doesn't include const_color because we don't sync it, see comment in CurrentConfig()
156 struct TevStageConfigRaw {
157 u32 sources_raw;
158 u32 modifiers_raw;
159 u32 ops_raw;
160 u32 scales_raw;
161 explicit operator Pica::Regs::TevStageConfig() const noexcept {
162 Pica::Regs::TevStageConfig stage;
163 stage.sources_raw = sources_raw;
164 stage.modifiers_raw = modifiers_raw;
165 stage.ops_raw = ops_raw;
166 stage.const_color = 0;
167 stage.scales_raw = scales_raw;
168 return stage;
169 }
170 };
147 171
148 struct { 172 struct State {
149 struct { 173
150 unsigned num = 0; 174 Pica::Regs::CompareFunc alpha_test_func;
151 bool directional = false; 175 Pica::Regs::TextureConfig::TextureType texture0_type;
152 bool two_sided_diffuse = false; 176 std::array<TevStageConfigRaw, 6> tev_stages;
153 bool dist_atten_enable = false; 177 u8 combiner_buffer_input;
154 GLfloat dist_atten_scale = 0.0f; 178
155 GLfloat dist_atten_bias = 0.0f; 179 Pica::Regs::DepthBuffering depthmap_enable;
156 } light[8];
157
158 bool enable = false;
159 unsigned src_num = 0;
160 Pica::Regs::LightingBumpMode bump_mode = Pica::Regs::LightingBumpMode::None;
161 unsigned bump_selector = 0;
162 bool bump_renorm = false;
163 bool clamp_highlights = false;
164
165 Pica::Regs::LightingConfig config = Pica::Regs::LightingConfig::Config0;
166 Pica::Regs::LightingFresnelSelector fresnel_selector = Pica::Regs::LightingFresnelSelector::None;
167 180
168 struct { 181 struct {
169 bool enable = false; 182 struct {
170 bool abs_input = false; 183 unsigned num;
171 Pica::Regs::LightingLutInput type = Pica::Regs::LightingLutInput::NH; 184 bool directional;
172 float scale = 1.0f; 185 bool two_sided_diffuse;
173 } lut_d0, lut_d1, lut_fr, lut_rr, lut_rg, lut_rb; 186 bool dist_atten_enable;
174 } lighting; 187 GLfloat dist_atten_scale;
188 GLfloat dist_atten_bias;
189 } light[8];
190
191 bool enable;
192 unsigned src_num;
193 Pica::Regs::LightingBumpMode bump_mode;
194 unsigned bump_selector;
195 bool bump_renorm;
196 bool clamp_highlights;
197
198 Pica::Regs::LightingConfig config;
199 Pica::Regs::LightingFresnelSelector fresnel_selector;
200
201 struct {
202 bool enable;
203 bool abs_input;
204 Pica::Regs::LightingLutInput type;
205 float scale;
206 } lut_d0, lut_d1, lut_fr, lut_rr, lut_rg, lut_rb;
207 } lighting;
208
209 } state;
175}; 210};
211#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER)
212static_assert(std::is_trivially_copyable<PicaShaderConfig::State>::value, "PicaShaderConfig::State must be trivially copyable");
213#endif
176 214
177namespace std { 215namespace std {
178 216
179template <> 217template <>
180struct hash<PicaShaderConfig> { 218struct hash<PicaShaderConfig> {
181 size_t operator()(const PicaShaderConfig& k) const { 219 size_t operator()(const PicaShaderConfig& k) const {
182 return Common::ComputeHash64(&k, sizeof(PicaShaderConfig)); 220 return Common::ComputeHash64(&k.state, sizeof(PicaShaderConfig::State));
183 } 221 }
184}; 222};
185 223
@@ -191,16 +229,17 @@ public:
191 RasterizerOpenGL(); 229 RasterizerOpenGL();
192 ~RasterizerOpenGL() override; 230 ~RasterizerOpenGL() override;
193 231
194 void InitObjects() override;
195 void Reset() override;
196 void AddTriangle(const Pica::Shader::OutputVertex& v0, 232 void AddTriangle(const Pica::Shader::OutputVertex& v0,
197 const Pica::Shader::OutputVertex& v1, 233 const Pica::Shader::OutputVertex& v1,
198 const Pica::Shader::OutputVertex& v2) override; 234 const Pica::Shader::OutputVertex& v2) override;
199 void DrawTriangles() override; 235 void DrawTriangles() override;
200 void FlushFramebuffer() override;
201 void NotifyPicaRegisterChanged(u32 id) override; 236 void NotifyPicaRegisterChanged(u32 id) override;
237 void FlushAll() override;
202 void FlushRegion(PAddr addr, u32 size) override; 238 void FlushRegion(PAddr addr, u32 size) override;
203 void InvalidateRegion(PAddr addr, u32 size) override; 239 void FlushAndInvalidateRegion(PAddr addr, u32 size) override;
240 bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) override;
241 bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override;
242 bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) override;
204 243
205 /// OpenGL shader generated for a given Pica register state 244 /// OpenGL shader generated for a given Pica register state
206 struct PicaShader { 245 struct PicaShader {
@@ -210,26 +249,6 @@ public:
210 249
211private: 250private:
212 251
213 /// Structure used for storing information about color textures
214 struct TextureInfo {
215 OGLTexture texture;
216 GLsizei width;
217 GLsizei height;
218 Pica::Regs::ColorFormat format;
219 GLenum gl_format;
220 GLenum gl_type;
221 };
222
223 /// Structure used for storing information about depth textures
224 struct DepthTextureInfo {
225 OGLTexture texture;
226 GLsizei width;
227 GLsizei height;
228 Pica::Regs::DepthFormat format;
229 GLenum gl_format;
230 GLenum gl_type;
231 };
232
233 struct SamplerInfo { 252 struct SamplerInfo {
234 using TextureConfig = Pica::Regs::TextureConfig; 253 using TextureConfig = Pica::Regs::TextureConfig;
235 254
@@ -265,6 +284,7 @@ private:
265 tex_coord1[1] = v.tc1.y.ToFloat32(); 284 tex_coord1[1] = v.tc1.y.ToFloat32();
266 tex_coord2[0] = v.tc2.x.ToFloat32(); 285 tex_coord2[0] = v.tc2.x.ToFloat32();
267 tex_coord2[1] = v.tc2.y.ToFloat32(); 286 tex_coord2[1] = v.tc2.y.ToFloat32();
287 tex_coord0_w = v.tc0_w.ToFloat32();
268 normquat[0] = v.quat.x.ToFloat32(); 288 normquat[0] = v.quat.x.ToFloat32();
269 normquat[1] = v.quat.y.ToFloat32(); 289 normquat[1] = v.quat.y.ToFloat32();
270 normquat[2] = v.quat.z.ToFloat32(); 290 normquat[2] = v.quat.z.ToFloat32();
@@ -285,6 +305,7 @@ private:
285 GLfloat tex_coord0[2]; 305 GLfloat tex_coord0[2];
286 GLfloat tex_coord1[2]; 306 GLfloat tex_coord1[2];
287 GLfloat tex_coord2[2]; 307 GLfloat tex_coord2[2];
308 GLfloat tex_coord0_w;
288 GLfloat normquat[4]; 309 GLfloat normquat[4];
289 GLfloat view[3]; 310 GLfloat view[3];
290 }; 311 };
@@ -303,6 +324,7 @@ private:
303 GLvec4 const_color[6]; 324 GLvec4 const_color[6];
304 GLvec4 tev_combiner_buffer_color; 325 GLvec4 tev_combiner_buffer_color;
305 GLint alphatest_ref; 326 GLint alphatest_ref;
327 GLfloat depth_scale;
306 GLfloat depth_offset; 328 GLfloat depth_offset;
307 alignas(16) GLvec3 lighting_global_ambient; 329 alignas(16) GLvec3 lighting_global_ambient;
308 LightSrc light_src[8]; 330 LightSrc light_src[8];
@@ -311,23 +333,17 @@ private:
311 static_assert(sizeof(UniformData) == 0x310, "The size of the UniformData structure has changed, update the structure in the shader"); 333 static_assert(sizeof(UniformData) == 0x310, "The size of the UniformData structure has changed, update the structure in the shader");
312 static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec"); 334 static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec");
313 335
314 /// Reconfigure the OpenGL color texture to use the given format and dimensions
315 void ReconfigureColorTexture(TextureInfo& texture, Pica::Regs::ColorFormat format, u32 width, u32 height);
316
317 /// Reconfigure the OpenGL depth texture to use the given format and dimensions
318 void ReconfigureDepthTexture(DepthTextureInfo& texture, Pica::Regs::DepthFormat format, u32 width, u32 height);
319
320 /// Sets the OpenGL shader in accordance with the current PICA register state 336 /// Sets the OpenGL shader in accordance with the current PICA register state
321 void SetShader(); 337 void SetShader();
322 338
323 /// Syncs the state and contents of the OpenGL framebuffer to match the current PICA framebuffer
324 void SyncFramebuffer();
325
326 /// Syncs the cull mode to match the PICA register 339 /// Syncs the cull mode to match the PICA register
327 void SyncCullMode(); 340 void SyncCullMode();
328 341
329 /// Syncs the depth scale and offset to match the PICA registers 342 /// Syncs the depth scale to match the PICA register
330 void SyncDepthModifiers(); 343 void SyncDepthScale();
344
345 /// Syncs the depth offset to match the PICA register
346 void SyncDepthOffset();
331 347
332 /// Syncs the blend enabled status to match the PICA register 348 /// Syncs the blend enabled status to match the PICA register
333 void SyncBlendEnabled(); 349 void SyncBlendEnabled();
@@ -344,90 +360,70 @@ private:
344 /// Syncs the logic op states to match the PICA register 360 /// Syncs the logic op states to match the PICA register
345 void SyncLogicOp(); 361 void SyncLogicOp();
346 362
363 /// Syncs the color write mask to match the PICA register state
364 void SyncColorWriteMask();
365
366 /// Syncs the stencil write mask to match the PICA register state
367 void SyncStencilWriteMask();
368
369 /// Syncs the depth write mask to match the PICA register state
370 void SyncDepthWriteMask();
371
347 /// Syncs the stencil test states to match the PICA register 372 /// Syncs the stencil test states to match the PICA register
348 void SyncStencilTest(); 373 void SyncStencilTest();
349 374
350 /// Syncs the depth test states to match the PICA register 375 /// Syncs the depth test states to match the PICA register
351 void SyncDepthTest(); 376 void SyncDepthTest();
352 377
353 /// Syncs the TEV constant color to match the PICA register
354 void SyncTevConstColor(int tev_index, const Pica::Regs::TevStageConfig& tev_stage);
355
356 /// Syncs the TEV combiner color buffer to match the PICA register 378 /// Syncs the TEV combiner color buffer to match the PICA register
357 void SyncCombinerColor(); 379 void SyncCombinerColor();
358 380
381 /// Syncs the TEV constant color to match the PICA register
382 void SyncTevConstColor(int tev_index, const Pica::Regs::TevStageConfig& tev_stage);
383
359 /// Syncs the lighting global ambient color to match the PICA register 384 /// Syncs the lighting global ambient color to match the PICA register
360 void SyncGlobalAmbient(); 385 void SyncGlobalAmbient();
361 386
362 /// Syncs the lighting lookup tables 387 /// Syncs the lighting lookup tables
363 void SyncLightingLUT(unsigned index); 388 void SyncLightingLUT(unsigned index);
364 389
365 /// Syncs the specified light's diffuse color to match the PICA register
366 void SyncLightDiffuse(int light_index);
367
368 /// Syncs the specified light's ambient color to match the PICA register
369 void SyncLightAmbient(int light_index);
370
371 /// Syncs the specified light's position to match the PICA register
372 void SyncLightPosition(int light_index);
373
374 /// Syncs the specified light's specular 0 color to match the PICA register 390 /// Syncs the specified light's specular 0 color to match the PICA register
375 void SyncLightSpecular0(int light_index); 391 void SyncLightSpecular0(int light_index);
376 392
377 /// Syncs the specified light's specular 1 color to match the PICA register 393 /// Syncs the specified light's specular 1 color to match the PICA register
378 void SyncLightSpecular1(int light_index); 394 void SyncLightSpecular1(int light_index);
379 395
380 /// Syncs the remaining OpenGL drawing state to match the current PICA state 396 /// Syncs the specified light's diffuse color to match the PICA register
381 void SyncDrawState(); 397 void SyncLightDiffuse(int light_index);
382
383 /// Copies the 3DS color framebuffer into the OpenGL color framebuffer texture
384 void ReloadColorBuffer();
385 398
386 /// Copies the 3DS depth framebuffer into the OpenGL depth framebuffer texture 399 /// Syncs the specified light's ambient color to match the PICA register
387 void ReloadDepthBuffer(); 400 void SyncLightAmbient(int light_index);
388 401
389 /** 402 /// Syncs the specified light's position to match the PICA register
390 * Save the current OpenGL color framebuffer to the current PICA framebuffer in 3DS memory 403 void SyncLightPosition(int light_index);
391 * Loads the OpenGL framebuffer textures into temporary buffers
392 * Then copies into the 3DS framebuffer using proper Morton order
393 */
394 void CommitColorBuffer();
395 404
396 /** 405 OpenGLState state;
397 * Save the current OpenGL depth framebuffer to the current PICA framebuffer in 3DS memory
398 * Loads the OpenGL framebuffer textures into temporary buffers
399 * Then copies into the 3DS framebuffer using proper Morton order
400 */
401 void CommitDepthBuffer();
402 406
403 RasterizerCacheOpenGL res_cache; 407 RasterizerCacheOpenGL res_cache;
404 408
405 std::vector<HardwareVertex> vertex_batch; 409 std::vector<HardwareVertex> vertex_batch;
406 410
407 OpenGLState state;
408
409 PAddr cached_fb_color_addr;
410 PAddr cached_fb_depth_addr;
411
412 // Hardware rasterizer
413 std::array<SamplerInfo, 3> texture_samplers;
414 TextureInfo fb_color_texture;
415 DepthTextureInfo fb_depth_texture;
416
417 std::unordered_map<PicaShaderConfig, std::unique_ptr<PicaShader>> shader_cache; 411 std::unordered_map<PicaShaderConfig, std::unique_ptr<PicaShader>> shader_cache;
418 const PicaShader* current_shader = nullptr; 412 const PicaShader* current_shader = nullptr;
413 bool shader_dirty;
419 414
420 struct { 415 struct {
421 UniformData data; 416 UniformData data;
422 bool lut_dirty[6]; 417 bool lut_dirty[6];
423 bool dirty; 418 bool dirty;
424 } uniform_block_data; 419 } uniform_block_data = {};
425 420
421 std::array<SamplerInfo, 3> texture_samplers;
426 OGLVertexArray vertex_array; 422 OGLVertexArray vertex_array;
427 OGLBuffer vertex_buffer; 423 OGLBuffer vertex_buffer;
428 OGLBuffer uniform_buffer; 424 OGLBuffer uniform_buffer;
429 OGLFramebuffer framebuffer; 425 OGLFramebuffer framebuffer;
430 426
431 std::array<OGLTexture, 6> lighting_lut; 427 std::array<OGLTexture, 6> lighting_luts;
432 std::array<std::array<GLvec4, 256>, 6> lighting_lut_data; 428 std::array<std::array<GLvec4, 256>, 6> lighting_lut_data{};
433}; 429};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 1323c12e4..7efd0038a 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -2,9 +2,19 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <memory> 5#include <algorithm>
6#include <atomic>
7#include <cstring>
8#include <iterator>
9#include <unordered_set>
10#include <utility>
11#include <vector>
6 12
7#include "common/hash.h" 13#include <glad/glad.h>
14
15#include "common/bit_field.h"
16#include "common/emu_window.h"
17#include "common/logging/log.h"
8#include "common/math_util.h" 18#include "common/math_util.h"
9#include "common/microprofile.h" 19#include "common/microprofile.h"
10#include "common/vector_math.h" 20#include "common/vector_math.h"
@@ -12,71 +22,693 @@
12#include "core/memory.h" 22#include "core/memory.h"
13 23
14#include "video_core/debug_utils/debug_utils.h" 24#include "video_core/debug_utils/debug_utils.h"
25#include "video_core/pica_state.h"
15#include "video_core/renderer_opengl/gl_rasterizer_cache.h" 26#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
16#include "video_core/renderer_opengl/pica_to_gl.h" 27#include "video_core/renderer_opengl/gl_state.h"
28#include "video_core/utils.h"
29#include "video_core/video_core.h"
30
31struct FormatTuple {
32 GLint internal_format;
33 GLenum format;
34 GLenum type;
35};
36
37static const std::array<FormatTuple, 5> fb_format_tuples = {{
38 { GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8 }, // RGBA8
39 { GL_RGB8, GL_BGR, GL_UNSIGNED_BYTE }, // RGB8
40 { GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1 }, // RGB5A1
41 { GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5 }, // RGB565
42 { GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4 }, // RGBA4
43}};
44
45static const std::array<FormatTuple, 4> depth_format_tuples = {{
46 { GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT }, // D16
47 {},
48 { GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT }, // D24
49 { GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8 }, // D24S8
50}};
51
52RasterizerCacheOpenGL::RasterizerCacheOpenGL() {
53 transfer_framebuffers[0].Create();
54 transfer_framebuffers[1].Create();
55}
17 56
18RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { 57RasterizerCacheOpenGL::~RasterizerCacheOpenGL() {
19 InvalidateAll(); 58 FlushAll();
59}
60
61static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width, u32 height, u32 bytes_per_pixel, u32 gl_bytes_per_pixel, u8* morton_data, u8* gl_data, bool morton_to_gl) {
62 using PixelFormat = CachedSurface::PixelFormat;
63
64 u8* data_ptrs[2];
65 u32 depth_stencil_shifts[2] = {24, 8};
66
67 if (morton_to_gl) {
68 std::swap(depth_stencil_shifts[0], depth_stencil_shifts[1]);
69 }
70
71 if (pixel_format == PixelFormat::D24S8) {
72 for (unsigned y = 0; y < height; ++y) {
73 for (unsigned x = 0; x < width; ++x) {
74 const u32 coarse_y = y & ~7;
75 u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel;
76 u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel;
77
78 data_ptrs[morton_to_gl] = morton_data + morton_offset;
79 data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index];
80
81 // Swap depth and stencil value ordering since 3DS does not match OpenGL
82 u32 depth_stencil;
83 memcpy(&depth_stencil, data_ptrs[1], sizeof(u32));
84 depth_stencil = (depth_stencil << depth_stencil_shifts[0]) | (depth_stencil >> depth_stencil_shifts[1]);
85
86 memcpy(data_ptrs[0], &depth_stencil, sizeof(u32));
87 }
88 }
89 } else {
90 for (unsigned y = 0; y < height; ++y) {
91 for (unsigned x = 0; x < width; ++x) {
92 const u32 coarse_y = y & ~7;
93 u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel;
94 u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel;
95
96 data_ptrs[morton_to_gl] = morton_data + morton_offset;
97 data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index];
98
99 memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
100 }
101 }
102 }
103}
104
105bool RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, CachedSurface::SurfaceType type, const MathUtil::Rectangle<int>& src_rect, const MathUtil::Rectangle<int>& dst_rect) {
106 using SurfaceType = CachedSurface::SurfaceType;
107
108 OpenGLState cur_state = OpenGLState::GetCurState();
109
110 // Make sure textures aren't bound to texture units, since going to bind them to framebuffer components
111 OpenGLState::ResetTexture(src_tex);
112 OpenGLState::ResetTexture(dst_tex);
113
114 // Keep track of previous framebuffer bindings
115 GLuint old_fbs[2] = { cur_state.draw.read_framebuffer, cur_state.draw.draw_framebuffer };
116 cur_state.draw.read_framebuffer = transfer_framebuffers[0].handle;
117 cur_state.draw.draw_framebuffer = transfer_framebuffers[1].handle;
118 cur_state.Apply();
119
120 u32 buffers = 0;
121
122 if (type == SurfaceType::Color || type == SurfaceType::Texture) {
123 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex, 0);
124 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
125
126 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex, 0);
127 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
128
129 buffers = GL_COLOR_BUFFER_BIT;
130 } else if (type == SurfaceType::Depth) {
131 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
132 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0);
133 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
134
135 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
136 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0);
137 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
138
139 buffers = GL_DEPTH_BUFFER_BIT;
140 } else if (type == SurfaceType::DepthStencil) {
141 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
142 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0);
143
144 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
145 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0);
146
147 buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
148 }
149
150 if (OpenGLState::CheckFBStatus(GL_READ_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
151 return false;
152 }
153
154 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
155 return false;
156 }
157
158 glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
159 dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom,
160 buffers, buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST);
161
162 // Restore previous framebuffer bindings
163 cur_state.draw.read_framebuffer = old_fbs[0];
164 cur_state.draw.draw_framebuffer = old_fbs[1];
165 cur_state.Apply();
166
167 return true;
168}
169
170bool RasterizerCacheOpenGL::TryBlitSurfaces(CachedSurface* src_surface, const MathUtil::Rectangle<int>& src_rect, CachedSurface* dst_surface, const MathUtil::Rectangle<int>& dst_rect) {
171 using SurfaceType = CachedSurface::SurfaceType;
172
173 if (!CachedSurface::CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format)) {
174 return false;
175 }
176
177 return BlitTextures(src_surface->texture.handle, dst_surface->texture.handle, CachedSurface::GetFormatType(src_surface->pixel_format), src_rect, dst_rect);
178}
179
180static void AllocateSurfaceTexture(GLuint texture, CachedSurface::PixelFormat pixel_format, u32 width, u32 height) {
181 // Allocate an uninitialized texture of appropriate size and format for the surface
182 using SurfaceType = CachedSurface::SurfaceType;
183
184 OpenGLState cur_state = OpenGLState::GetCurState();
185
186 // Keep track of previous texture bindings
187 GLuint old_tex = cur_state.texture_units[0].texture_2d;
188 cur_state.texture_units[0].texture_2d = texture;
189 cur_state.Apply();
190 glActiveTexture(GL_TEXTURE0);
191
192 SurfaceType type = CachedSurface::GetFormatType(pixel_format);
193
194 FormatTuple tuple;
195 if (type == SurfaceType::Color) {
196 ASSERT((size_t)pixel_format < fb_format_tuples.size());
197 tuple = fb_format_tuples[(unsigned int)pixel_format];
198 } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) {
199 size_t tuple_idx = (size_t)pixel_format - 14;
200 ASSERT(tuple_idx < depth_format_tuples.size());
201 tuple = depth_format_tuples[tuple_idx];
202 } else {
203 tuple = { GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE };
204 }
205
206 glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, width, height, 0,
207 tuple.format, tuple.type, nullptr);
208
209 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
210 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
211 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
212 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
213
214 // Restore previous texture bindings
215 cur_state.texture_units[0].texture_2d = old_tex;
216 cur_state.Apply();
20} 217}
21 218
22MICROPROFILE_DEFINE(OpenGL_TextureUpload, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192)); 219MICROPROFILE_DEFINE(OpenGL_SurfaceUpload, "OpenGL", "Surface Upload", MP_RGB(128, 64, 192));
220CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bool match_res_scale, bool load_if_create) {
221 using PixelFormat = CachedSurface::PixelFormat;
222 using SurfaceType = CachedSurface::SurfaceType;
223
224 if (params.addr == 0) {
225 return nullptr;
226 }
227
228 u32 params_size = params.width * params.height * CachedSurface::GetFormatBpp(params.pixel_format) / 8;
229
230 // Check for an exact match in existing surfaces
231 CachedSurface* best_exact_surface = nullptr;
232 float exact_surface_goodness = -1.f;
233
234 auto surface_interval = boost::icl::interval<PAddr>::right_open(params.addr, params.addr + params_size);
235 auto range = surface_cache.equal_range(surface_interval);
236 for (auto it = range.first; it != range.second; ++it) {
237 for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) {
238 CachedSurface* surface = it2->get();
239
240 // Check if the request matches the surface exactly
241 if (params.addr == surface->addr &&
242 params.width == surface->width && params.height == surface->height &&
243 params.pixel_format == surface->pixel_format)
244 {
245 // Make sure optional param-matching criteria are fulfilled
246 bool tiling_match = (params.is_tiled == surface->is_tiled);
247 bool res_scale_match = (params.res_scale_width == surface->res_scale_width && params.res_scale_height == surface->res_scale_height);
248 if (!match_res_scale || res_scale_match) {
249 // Prioritize same-tiling and highest resolution surfaces
250 float match_goodness = (float)tiling_match + surface->res_scale_width * surface->res_scale_height;
251 if (match_goodness > exact_surface_goodness || surface->dirty) {
252 exact_surface_goodness = match_goodness;
253 best_exact_surface = surface;
254 }
255 }
256 }
257 }
258 }
259
260 // Return the best exact surface if found
261 if (best_exact_surface != nullptr) {
262 return best_exact_surface;
263 }
264
265 // No matching surfaces found, so create a new one
266 u8* texture_src_data = Memory::GetPhysicalPointer(params.addr);
267 if (texture_src_data == nullptr) {
268 return nullptr;
269 }
270
271 MICROPROFILE_SCOPE(OpenGL_SurfaceUpload);
272
273 std::shared_ptr<CachedSurface> new_surface = std::make_shared<CachedSurface>();
23 274
24void RasterizerCacheOpenGL::LoadAndBindTexture(OpenGLState &state, unsigned texture_unit, const Pica::DebugUtils::TextureInfo& info) { 275 new_surface->addr = params.addr;
25 const auto cached_texture = texture_cache.find(info.physical_address); 276 new_surface->size = params_size;
26 277
27 if (cached_texture != texture_cache.end()) { 278 new_surface->texture.Create();
28 state.texture_units[texture_unit].texture_2d = cached_texture->second->texture.handle; 279 new_surface->width = params.width;
29 state.Apply(); 280 new_surface->height = params.height;
281 new_surface->stride = params.stride;
282 new_surface->res_scale_width = params.res_scale_width;
283 new_surface->res_scale_height = params.res_scale_height;
284
285 new_surface->is_tiled = params.is_tiled;
286 new_surface->pixel_format = params.pixel_format;
287 new_surface->dirty = false;
288
289 if (!load_if_create) {
290 // Don't load any data; just allocate the surface's texture
291 AllocateSurfaceTexture(new_surface->texture.handle, new_surface->pixel_format, new_surface->GetScaledWidth(), new_surface->GetScaledHeight());
30 } else { 292 } else {
31 MICROPROFILE_SCOPE(OpenGL_TextureUpload); 293 // TODO: Consider attempting subrect match in existing surfaces and direct blit here instead of memory upload below if that's a common scenario in some game
294
295 Memory::RasterizerFlushRegion(params.addr, params_size);
296
297 // Load data from memory to the new surface
298 OpenGLState cur_state = OpenGLState::GetCurState();
299
300 GLuint old_tex = cur_state.texture_units[0].texture_2d;
301 cur_state.texture_units[0].texture_2d = new_surface->texture.handle;
302 cur_state.Apply();
303 glActiveTexture(GL_TEXTURE0);
304
305 glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)new_surface->stride);
306 if (!new_surface->is_tiled) {
307 // TODO: Ensure this will always be a color format, not a depth or other format
308 ASSERT((size_t)new_surface->pixel_format < fb_format_tuples.size());
309 const FormatTuple& tuple = fb_format_tuples[(unsigned int)params.pixel_format];
310
311 glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0,
312 tuple.format, tuple.type, texture_src_data);
313 } else {
314 SurfaceType type = CachedSurface::GetFormatType(new_surface->pixel_format);
315 if (type != SurfaceType::Depth && type != SurfaceType::DepthStencil) {
316 FormatTuple tuple;
317 if ((size_t)params.pixel_format < fb_format_tuples.size()) {
318 tuple = fb_format_tuples[(unsigned int)params.pixel_format];
319 } else {
320 // Texture
321 tuple = { GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE };
322 }
323
324 std::vector<Math::Vec4<u8>> tex_buffer(params.width * params.height);
32 325
33 std::unique_ptr<CachedTexture> new_texture = std::make_unique<CachedTexture>(); 326 Pica::DebugUtils::TextureInfo tex_info;
327 tex_info.width = params.width;
328 tex_info.height = params.height;
329 tex_info.stride = params.width * CachedSurface::GetFormatBpp(params.pixel_format) / 8;
330 tex_info.format = (Pica::Regs::TextureFormat)params.pixel_format;
331 tex_info.physical_address = params.addr;
34 332
35 new_texture->texture.Create(); 333 for (unsigned y = 0; y < params.height; ++y) {
36 state.texture_units[texture_unit].texture_2d = new_texture->texture.handle; 334 for (unsigned x = 0; x < params.width; ++x) {
37 state.Apply(); 335 tex_buffer[x + params.width * y] = Pica::DebugUtils::LookupTexture(texture_src_data, x, params.height - 1 - y, tex_info);
38 glActiveTexture(GL_TEXTURE0 + texture_unit); 336 }
337 }
39 338
40 u8* texture_src_data = Memory::GetPhysicalPointer(info.physical_address); 339 glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0, GL_RGBA, GL_UNSIGNED_BYTE, tex_buffer.data());
340 } else {
341 // Depth/Stencil formats need special treatment since they aren't sampleable using LookupTexture and can't use RGBA format
342 size_t tuple_idx = (size_t)params.pixel_format - 14;
343 ASSERT(tuple_idx < depth_format_tuples.size());
344 const FormatTuple& tuple = depth_format_tuples[tuple_idx];
41 345
42 new_texture->width = info.width; 346 u32 bytes_per_pixel = CachedSurface::GetFormatBpp(params.pixel_format) / 8;
43 new_texture->height = info.height;
44 new_texture->size = info.stride * info.height;
45 new_texture->addr = info.physical_address;
46 new_texture->hash = Common::ComputeHash64(texture_src_data, new_texture->size);
47 347
48 std::unique_ptr<Math::Vec4<u8>[]> temp_texture_buffer_rgba(new Math::Vec4<u8>[info.width * info.height]); 348 // OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type
349 bool use_4bpp = (params.pixel_format == PixelFormat::D24);
49 350
50 for (int y = 0; y < info.height; ++y) { 351 u32 gl_bytes_per_pixel = use_4bpp ? 4 : bytes_per_pixel;
51 for (int x = 0; x < info.width; ++x) { 352
52 temp_texture_buffer_rgba[x + info.width * y] = Pica::DebugUtils::LookupTexture(texture_src_data, x, info.height - 1 - y, info); 353 std::vector<u8> temp_fb_depth_buffer(params.width * params.height * gl_bytes_per_pixel);
354
355 u8* temp_fb_depth_buffer_ptr = use_4bpp ? temp_fb_depth_buffer.data() + 1 : temp_fb_depth_buffer.data();
356
357 MortonCopyPixels(params.pixel_format, params.width, params.height, bytes_per_pixel, gl_bytes_per_pixel, texture_src_data, temp_fb_depth_buffer_ptr, true);
358
359 glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0,
360 tuple.format, tuple.type, temp_fb_depth_buffer.data());
53 } 361 }
54 } 362 }
363 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
364
365 // If not 1x scale, blit 1x texture to a new scaled texture and replace texture in surface
366 if (new_surface->res_scale_width != 1.f || new_surface->res_scale_height != 1.f) {
367 OGLTexture scaled_texture;
368 scaled_texture.Create();
369
370 AllocateSurfaceTexture(scaled_texture.handle, new_surface->pixel_format, new_surface->GetScaledWidth(), new_surface->GetScaledHeight());
371 BlitTextures(new_surface->texture.handle, scaled_texture.handle, CachedSurface::GetFormatType(new_surface->pixel_format),
372 MathUtil::Rectangle<int>(0, 0, new_surface->width, new_surface->height),
373 MathUtil::Rectangle<int>(0, 0, new_surface->GetScaledWidth(), new_surface->GetScaledHeight()));
374
375 new_surface->texture.Release();
376 new_surface->texture.handle = scaled_texture.handle;
377 scaled_texture.handle = 0;
378 cur_state.texture_units[0].texture_2d = new_surface->texture.handle;
379 cur_state.Apply();
380 }
55 381
56 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, info.width, info.height, 0, GL_RGBA, GL_UNSIGNED_BYTE, temp_texture_buffer_rgba.get()); 382 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
383 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
384 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
385 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
57 386
58 texture_cache.emplace(info.physical_address, std::move(new_texture)); 387 cur_state.texture_units[0].texture_2d = old_tex;
388 cur_state.Apply();
59 } 389 }
390
391 Memory::RasterizerMarkRegionCached(new_surface->addr, new_surface->size, 1);
392 surface_cache.add(std::make_pair(boost::icl::interval<PAddr>::right_open(new_surface->addr, new_surface->addr + new_surface->size), std::set<std::shared_ptr<CachedSurface>>({ new_surface })));
393 return new_surface.get();
60} 394}
61 395
62void RasterizerCacheOpenGL::InvalidateInRange(PAddr addr, u32 size, bool ignore_hash) { 396CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params, bool match_res_scale, bool load_if_create, MathUtil::Rectangle<int>& out_rect) {
63 // TODO: Optimize by also inserting upper bound (addr + size) of each texture into the same map and also narrow using lower_bound 397 if (params.addr == 0) {
64 auto cache_upper_bound = texture_cache.upper_bound(addr + size); 398 return nullptr;
399 }
400
401 u32 total_pixels = params.width * params.height;
402 u32 params_size = total_pixels * CachedSurface::GetFormatBpp(params.pixel_format) / 8;
65 403
66 for (auto it = texture_cache.begin(); it != cache_upper_bound;) { 404 // Attempt to find encompassing surfaces
67 const auto& info = *it->second; 405 CachedSurface* best_subrect_surface = nullptr;
406 float subrect_surface_goodness = -1.f;
68 407
69 // Flush the texture only if the memory region intersects and a change is detected 408 auto surface_interval = boost::icl::interval<PAddr>::right_open(params.addr, params.addr + params_size);
70 if (MathUtil::IntervalsIntersect(addr, size, info.addr, info.size) && 409 auto cache_upper_bound = surface_cache.upper_bound(surface_interval);
71 (ignore_hash || info.hash != Common::ComputeHash64(Memory::GetPhysicalPointer(info.addr), info.size))) { 410 for (auto it = surface_cache.lower_bound(surface_interval); it != cache_upper_bound; ++it) {
411 for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) {
412 CachedSurface* surface = it2->get();
72 413
73 it = texture_cache.erase(it); 414 // Check if the request is contained in the surface
415 if (params.addr >= surface->addr &&
416 params.addr + params_size - 1 <= surface->addr + surface->size - 1 &&
417 params.pixel_format == surface->pixel_format)
418 {
419 // Make sure optional param-matching criteria are fulfilled
420 bool tiling_match = (params.is_tiled == surface->is_tiled);
421 bool res_scale_match = (params.res_scale_width == surface->res_scale_width && params.res_scale_height == surface->res_scale_height);
422 if (!match_res_scale || res_scale_match) {
423 // Prioritize same-tiling and highest resolution surfaces
424 float match_goodness = (float)tiling_match + surface->res_scale_width * surface->res_scale_height;
425 if (match_goodness > subrect_surface_goodness || surface->dirty) {
426 subrect_surface_goodness = match_goodness;
427 best_subrect_surface = surface;
428 }
429 }
430 }
431 }
432 }
433
434 // Return the best subrect surface if found
435 if (best_subrect_surface != nullptr) {
436 unsigned int bytes_per_pixel = (CachedSurface::GetFormatBpp(best_subrect_surface->pixel_format) / 8);
437
438 int x0, y0;
439
440 if (!params.is_tiled) {
441 u32 begin_pixel_index = (params.addr - best_subrect_surface->addr) / bytes_per_pixel;
442 x0 = begin_pixel_index % best_subrect_surface->width;
443 y0 = begin_pixel_index / best_subrect_surface->width;
444
445 out_rect = MathUtil::Rectangle<int>(x0, y0, x0 + params.width, y0 + params.height);
446 } else {
447 u32 bytes_per_tile = 8 * 8 * bytes_per_pixel;
448 u32 tiles_per_row = best_subrect_surface->width / 8;
449
450 u32 begin_tile_index = (params.addr - best_subrect_surface->addr) / bytes_per_tile;
451 x0 = begin_tile_index % tiles_per_row * 8;
452 y0 = begin_tile_index / tiles_per_row * 8;
453
454 // Tiled surfaces are flipped vertically in the rasterizer vs. 3DS memory.
455 out_rect = MathUtil::Rectangle<int>(x0, best_subrect_surface->height - y0, x0 + params.width, best_subrect_surface->height - (y0 + params.height));
456 }
457
458 out_rect.left = (int)(out_rect.left * best_subrect_surface->res_scale_width);
459 out_rect.right = (int)(out_rect.right * best_subrect_surface->res_scale_width);
460 out_rect.top = (int)(out_rect.top * best_subrect_surface->res_scale_height);
461 out_rect.bottom = (int)(out_rect.bottom * best_subrect_surface->res_scale_height);
462
463 return best_subrect_surface;
464 }
465
466 // No subrect found - create and return a new surface
467 if (!params.is_tiled) {
468 out_rect = MathUtil::Rectangle<int>(0, 0, (int)(params.width * params.res_scale_width), (int)(params.height * params.res_scale_height));
469 } else {
470 out_rect = MathUtil::Rectangle<int>(0, (int)(params.height * params.res_scale_height), (int)(params.width * params.res_scale_width), 0);
471 }
472
473 return GetSurface(params, match_res_scale, load_if_create);
474}
475
476CachedSurface* RasterizerCacheOpenGL::GetTextureSurface(const Pica::Regs::FullTextureConfig& config) {
477 Pica::DebugUtils::TextureInfo info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config.config, config.format);
478
479 CachedSurface params;
480 params.addr = info.physical_address;
481 params.width = info.width;
482 params.height = info.height;
483 params.is_tiled = true;
484 params.pixel_format = CachedSurface::PixelFormatFromTextureFormat(info.format);
485 return GetSurface(params, false, true);
486}
487
488std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> RasterizerCacheOpenGL::GetFramebufferSurfaces(const Pica::Regs::FramebufferConfig& config) {
489 const auto& regs = Pica::g_state.regs;
490
491 // Make sur that framebuffers don't overlap if both color and depth are being used
492 u32 fb_area = config.GetWidth() * config.GetHeight();
493 bool framebuffers_overlap = config.GetColorBufferPhysicalAddress() != 0 &&
494 config.GetDepthBufferPhysicalAddress() != 0 &&
495 MathUtil::IntervalsIntersect(config.GetColorBufferPhysicalAddress(), fb_area * GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(config.color_format.Value())),
496 config.GetDepthBufferPhysicalAddress(), fb_area * Pica::Regs::BytesPerDepthPixel(config.depth_format));
497 bool using_color_fb = config.GetColorBufferPhysicalAddress() != 0;
498 bool using_depth_fb = config.GetDepthBufferPhysicalAddress() != 0 && (regs.output_merger.depth_test_enable || regs.output_merger.depth_write_enable || !framebuffers_overlap);
499
500 if (framebuffers_overlap && using_color_fb && using_depth_fb) {
501 LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; overlapping framebuffers not supported!");
502 using_depth_fb = false;
503 }
504
505 // get color and depth surfaces
506 CachedSurface color_params;
507 CachedSurface depth_params;
508 color_params.width = depth_params.width = config.GetWidth();
509 color_params.height = depth_params.height = config.GetHeight();
510 color_params.is_tiled = depth_params.is_tiled = true;
511 if (VideoCore::g_scaled_resolution_enabled) {
512 auto layout = VideoCore::g_emu_window->GetFramebufferLayout();
513
514 // Assume same scaling factor for top and bottom screens
515 color_params.res_scale_width = depth_params.res_scale_width = (float)layout.top_screen.GetWidth() / VideoCore::kScreenTopWidth;
516 color_params.res_scale_height = depth_params.res_scale_height = (float)layout.top_screen.GetHeight() / VideoCore::kScreenTopHeight;
517 }
518
519 color_params.addr = config.GetColorBufferPhysicalAddress();
520 color_params.pixel_format = CachedSurface::PixelFormatFromColorFormat(config.color_format);
521
522 depth_params.addr = config.GetDepthBufferPhysicalAddress();
523 depth_params.pixel_format = CachedSurface::PixelFormatFromDepthFormat(config.depth_format);
524
525 MathUtil::Rectangle<int> color_rect;
526 CachedSurface* color_surface = using_color_fb ? GetSurfaceRect(color_params, true, true, color_rect) : nullptr;
527
528 MathUtil::Rectangle<int> depth_rect;
529 CachedSurface* depth_surface = using_depth_fb ? GetSurfaceRect(depth_params, true, true, depth_rect) : nullptr;
530
531 // Sanity check to make sure found surfaces aren't the same
532 if (using_depth_fb && using_color_fb && color_surface == depth_surface) {
533 LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer surfaces overlap; overlapping surfaces not supported!");
534 using_depth_fb = false;
535 depth_surface = nullptr;
536 }
537
538 MathUtil::Rectangle<int> rect;
539
540 if (color_surface != nullptr && depth_surface != nullptr && (depth_rect.left != color_rect.left || depth_rect.top != color_rect.top)) {
541 // Can't specify separate color and depth viewport offsets in OpenGL, so re-zero both if they don't match
542 if (color_rect.left != 0 || color_rect.top != 0) {
543 color_surface = GetSurface(color_params, true, true);
544 }
545
546 if (depth_rect.left != 0 || depth_rect.top != 0) {
547 depth_surface = GetSurface(depth_params, true, true);
548 }
549
550 if (!color_surface->is_tiled) {
551 rect = MathUtil::Rectangle<int>(0, 0, (int)(color_params.width * color_params.res_scale_width), (int)(color_params.height * color_params.res_scale_height));
74 } else { 552 } else {
75 ++it; 553 rect = MathUtil::Rectangle<int>(0, (int)(color_params.height * color_params.res_scale_height), (int)(color_params.width * color_params.res_scale_width), 0);
76 } 554 }
555 } else if (color_surface != nullptr) {
556 rect = color_rect;
557 } else if (depth_surface != nullptr) {
558 rect = depth_rect;
559 } else {
560 rect = MathUtil::Rectangle<int>(0, 0, 0, 0);
77 } 561 }
562
563 return std::make_tuple(color_surface, depth_surface, rect);
78} 564}
79 565
80void RasterizerCacheOpenGL::InvalidateAll() { 566CachedSurface* RasterizerCacheOpenGL::TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config) {
81 texture_cache.clear(); 567 auto surface_interval = boost::icl::interval<PAddr>::right_open(config.GetStartAddress(), config.GetEndAddress());
568 auto range = surface_cache.equal_range(surface_interval);
569 for (auto it = range.first; it != range.second; ++it) {
570 for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) {
571 int bits_per_value = 0;
572 if (config.fill_24bit) {
573 bits_per_value = 24;
574 } else if (config.fill_32bit) {
575 bits_per_value = 32;
576 } else {
577 bits_per_value = 16;
578 }
579
580 CachedSurface* surface = it2->get();
581
582 if (surface->addr == config.GetStartAddress() &&
583 CachedSurface::GetFormatBpp(surface->pixel_format) == bits_per_value &&
584 (surface->width * surface->height * CachedSurface::GetFormatBpp(surface->pixel_format) / 8) == (config.GetEndAddress() - config.GetStartAddress()))
585 {
586 return surface;
587 }
588 }
589 }
590
591 return nullptr;
592}
593
594MICROPROFILE_DEFINE(OpenGL_SurfaceDownload, "OpenGL", "Surface Download", MP_RGB(128, 192, 64));
595void RasterizerCacheOpenGL::FlushSurface(CachedSurface* surface) {
596 using PixelFormat = CachedSurface::PixelFormat;
597 using SurfaceType = CachedSurface::SurfaceType;
598
599 if (!surface->dirty) {
600 return;
601 }
602
603 MICROPROFILE_SCOPE(OpenGL_SurfaceDownload);
604
605 u8* dst_buffer = Memory::GetPhysicalPointer(surface->addr);
606 if (dst_buffer == nullptr) {
607 return;
608 }
609
610 OpenGLState cur_state = OpenGLState::GetCurState();
611 GLuint old_tex = cur_state.texture_units[0].texture_2d;
612
613 OGLTexture unscaled_tex;
614 GLuint texture_to_flush = surface->texture.handle;
615
616 // If not 1x scale, blit scaled texture to a new 1x texture and use that to flush
617 if (surface->res_scale_width != 1.f || surface->res_scale_height != 1.f) {
618 unscaled_tex.Create();
619
620 AllocateSurfaceTexture(unscaled_tex.handle, surface->pixel_format, surface->width, surface->height);
621 BlitTextures(surface->texture.handle, unscaled_tex.handle, CachedSurface::GetFormatType(surface->pixel_format),
622 MathUtil::Rectangle<int>(0, 0, surface->GetScaledWidth(), surface->GetScaledHeight()),
623 MathUtil::Rectangle<int>(0, 0, surface->width, surface->height));
624
625 texture_to_flush = unscaled_tex.handle;
626 }
627
628 cur_state.texture_units[0].texture_2d = texture_to_flush;
629 cur_state.Apply();
630 glActiveTexture(GL_TEXTURE0);
631
632 glPixelStorei(GL_PACK_ROW_LENGTH, (GLint)surface->stride);
633 if (!surface->is_tiled) {
634 // TODO: Ensure this will always be a color format, not a depth or other format
635 ASSERT((size_t)surface->pixel_format < fb_format_tuples.size());
636 const FormatTuple& tuple = fb_format_tuples[(unsigned int)surface->pixel_format];
637
638 glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, dst_buffer);
639 } else {
640 SurfaceType type = CachedSurface::GetFormatType(surface->pixel_format);
641 if (type != SurfaceType::Depth && type != SurfaceType::DepthStencil) {
642 ASSERT((size_t)surface->pixel_format < fb_format_tuples.size());
643 const FormatTuple& tuple = fb_format_tuples[(unsigned int)surface->pixel_format];
644
645 u32 bytes_per_pixel = CachedSurface::GetFormatBpp(surface->pixel_format) / 8;
646
647 std::vector<u8> temp_gl_buffer(surface->width * surface->height * bytes_per_pixel);
648
649 glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data());
650
651 // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion is necessary.
652 MortonCopyPixels(surface->pixel_format, surface->width, surface->height, bytes_per_pixel, bytes_per_pixel, dst_buffer, temp_gl_buffer.data(), false);
653 } else {
654 // Depth/Stencil formats need special treatment since they aren't sampleable using LookupTexture and can't use RGBA format
655 size_t tuple_idx = (size_t)surface->pixel_format - 14;
656 ASSERT(tuple_idx < depth_format_tuples.size());
657 const FormatTuple& tuple = depth_format_tuples[tuple_idx];
658
659 u32 bytes_per_pixel = CachedSurface::GetFormatBpp(surface->pixel_format) / 8;
660
661 // OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type
662 bool use_4bpp = (surface->pixel_format == PixelFormat::D24);
663
664 u32 gl_bytes_per_pixel = use_4bpp ? 4 : bytes_per_pixel;
665
666 std::vector<u8> temp_gl_buffer(surface->width * surface->height * gl_bytes_per_pixel);
667
668 glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data());
669
670 u8* temp_gl_buffer_ptr = use_4bpp ? temp_gl_buffer.data() + 1 : temp_gl_buffer.data();
671
672 MortonCopyPixels(surface->pixel_format, surface->width, surface->height, bytes_per_pixel, gl_bytes_per_pixel, dst_buffer, temp_gl_buffer_ptr, false);
673 }
674 }
675 glPixelStorei(GL_PACK_ROW_LENGTH, 0);
676
677 surface->dirty = false;
678
679 cur_state.texture_units[0].texture_2d = old_tex;
680 cur_state.Apply();
681}
682
683void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, bool invalidate) {
684 if (size == 0) {
685 return;
686 }
687
688 // Gather up unique surfaces that touch the region
689 std::unordered_set<std::shared_ptr<CachedSurface>> touching_surfaces;
690
691 auto surface_interval = boost::icl::interval<PAddr>::right_open(addr, addr + size);
692 auto cache_upper_bound = surface_cache.upper_bound(surface_interval);
693 for (auto it = surface_cache.lower_bound(surface_interval); it != cache_upper_bound; ++it) {
694 std::copy_if(it->second.begin(), it->second.end(), std::inserter(touching_surfaces, touching_surfaces.end()),
695 [skip_surface](std::shared_ptr<CachedSurface> surface) { return (surface.get() != skip_surface); });
696 }
697
698 // Flush and invalidate surfaces
699 for (auto surface : touching_surfaces) {
700 FlushSurface(surface.get());
701 if (invalidate) {
702 Memory::RasterizerMarkRegionCached(surface->addr, surface->size, -1);
703 surface_cache.subtract(std::make_pair(boost::icl::interval<PAddr>::right_open(surface->addr, surface->addr + surface->size), std::set<std::shared_ptr<CachedSurface>>({ surface })));
704 }
705 }
706}
707
708void RasterizerCacheOpenGL::FlushAll() {
709 for (auto& surfaces : surface_cache) {
710 for (auto& surface : surfaces.second) {
711 FlushSurface(surface.get());
712 }
713 }
82} 714}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index b69651427..225596415 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -4,40 +4,219 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <map> 7#include <array>
8#include <memory> 8#include <memory>
9#include <set>
10#include <tuple>
11
12#include <boost/icl/interval_map.hpp>
13#include <glad/glad.h>
14
15#include "common/assert.h"
16#include "common/common_funcs.h"
17#include "common/common_types.h"
18
19#include "core/hw/gpu.h"
9 20
10#include "video_core/pica.h" 21#include "video_core/pica.h"
11#include "video_core/debug_utils/debug_utils.h"
12#include "video_core/renderer_opengl/gl_resource_manager.h" 22#include "video_core/renderer_opengl/gl_resource_manager.h"
13#include "video_core/renderer_opengl/gl_state.h" 23
24namespace MathUtil {
25template <class T> struct Rectangle;
26}
27
28struct CachedSurface;
29
30using SurfaceCache = boost::icl::interval_map<PAddr, std::set<std::shared_ptr<CachedSurface>>>;
31
32struct CachedSurface {
33 enum class PixelFormat {
34 // First 5 formats are shared between textures and color buffers
35 RGBA8 = 0,
36 RGB8 = 1,
37 RGB5A1 = 2,
38 RGB565 = 3,
39 RGBA4 = 4,
40
41 // Texture-only formats
42 IA8 = 5,
43 RG8 = 6,
44 I8 = 7,
45 A8 = 8,
46 IA4 = 9,
47 I4 = 10,
48 A4 = 11,
49 ETC1 = 12,
50 ETC1A4 = 13,
51
52 // Depth buffer-only formats
53 D16 = 14,
54 // gap
55 D24 = 16,
56 D24S8 = 17,
57
58 Invalid = 255,
59 };
60
61 enum class SurfaceType {
62 Color = 0,
63 Texture = 1,
64 Depth = 2,
65 DepthStencil = 3,
66 Invalid = 4,
67 };
68
69 static unsigned int GetFormatBpp(CachedSurface::PixelFormat format) {
70 static const std::array<unsigned int, 18> bpp_table = {
71 32, // RGBA8
72 24, // RGB8
73 16, // RGB5A1
74 16, // RGB565
75 16, // RGBA4
76 16, // IA8
77 16, // RG8
78 8, // I8
79 8, // A8
80 8, // IA4
81 4, // I4
82 4, // A4
83 4, // ETC1
84 8, // ETC1A4
85 16, // D16
86 0,
87 24, // D24
88 32, // D24S8
89 };
90
91 ASSERT((unsigned int)format < ARRAY_SIZE(bpp_table));
92 return bpp_table[(unsigned int)format];
93 }
94
95 static PixelFormat PixelFormatFromTextureFormat(Pica::Regs::TextureFormat format) {
96 return ((unsigned int)format < 14) ? (PixelFormat)format : PixelFormat::Invalid;
97 }
98
99 static PixelFormat PixelFormatFromColorFormat(Pica::Regs::ColorFormat format) {
100 return ((unsigned int)format < 5) ? (PixelFormat)format : PixelFormat::Invalid;
101 }
102
103 static PixelFormat PixelFormatFromDepthFormat(Pica::Regs::DepthFormat format) {
104 return ((unsigned int)format < 4) ? (PixelFormat)((unsigned int)format + 14) : PixelFormat::Invalid;
105 }
106
107 static PixelFormat PixelFormatFromGPUPixelFormat(GPU::Regs::PixelFormat format) {
108 switch (format) {
109 // RGB565 and RGB5A1 are switched in PixelFormat compared to ColorFormat
110 case GPU::Regs::PixelFormat::RGB565:
111 return PixelFormat::RGB565;
112 case GPU::Regs::PixelFormat::RGB5A1:
113 return PixelFormat::RGB5A1;
114 default:
115 return ((unsigned int)format < 5) ? (PixelFormat)format : PixelFormat::Invalid;
116 }
117 }
118
119 static bool CheckFormatsBlittable(PixelFormat pixel_format_a, PixelFormat pixel_format_b) {
120 SurfaceType a_type = GetFormatType(pixel_format_a);
121 SurfaceType b_type = GetFormatType(pixel_format_b);
122
123 if ((a_type == SurfaceType::Color || a_type == SurfaceType::Texture) && (b_type == SurfaceType::Color || b_type == SurfaceType::Texture)) {
124 return true;
125 }
126
127 if (a_type == SurfaceType::Depth && b_type == SurfaceType::Depth) {
128 return true;
129 }
130
131 if (a_type == SurfaceType::DepthStencil && b_type == SurfaceType::DepthStencil) {
132 return true;
133 }
134
135 return false;
136 }
137
138 static SurfaceType GetFormatType(PixelFormat pixel_format) {
139 if ((unsigned int)pixel_format < 5) {
140 return SurfaceType::Color;
141 }
142
143 if ((unsigned int)pixel_format < 14) {
144 return SurfaceType::Texture;
145 }
146
147 if (pixel_format == PixelFormat::D16 || pixel_format == PixelFormat::D24) {
148 return SurfaceType::Depth;
149 }
150
151 if (pixel_format == PixelFormat::D24S8) {
152 return SurfaceType::DepthStencil;
153 }
154
155 return SurfaceType::Invalid;
156 }
157
158 u32 GetScaledWidth() const {
159 return (u32)(width * res_scale_width);
160 }
161
162 u32 GetScaledHeight() const {
163 return (u32)(height * res_scale_height);
164 }
165
166 PAddr addr;
167 u32 size;
168
169 PAddr min_valid;
170 PAddr max_valid;
171
172 OGLTexture texture;
173 u32 width;
174 u32 height;
175 u32 stride = 0;
176 float res_scale_width = 1.f;
177 float res_scale_height = 1.f;
178
179 bool is_tiled;
180 PixelFormat pixel_format;
181 bool dirty;
182};
14 183
15class RasterizerCacheOpenGL : NonCopyable { 184class RasterizerCacheOpenGL : NonCopyable {
16public: 185public:
186 RasterizerCacheOpenGL();
17 ~RasterizerCacheOpenGL(); 187 ~RasterizerCacheOpenGL();
18 188
189 /// Blits one texture to another
190 bool BlitTextures(GLuint src_tex, GLuint dst_tex, CachedSurface::SurfaceType type, const MathUtil::Rectangle<int>& src_rect, const MathUtil::Rectangle<int>& dst_rect);
191
192 /// Attempt to blit one surface's texture to another
193 bool TryBlitSurfaces(CachedSurface* src_surface, const MathUtil::Rectangle<int>& src_rect, CachedSurface* dst_surface, const MathUtil::Rectangle<int>& dst_rect);
194
19 /// Loads a texture from 3DS memory to OpenGL and caches it (if not already cached) 195 /// Loads a texture from 3DS memory to OpenGL and caches it (if not already cached)
20 void LoadAndBindTexture(OpenGLState &state, unsigned texture_unit, const Pica::DebugUtils::TextureInfo& info); 196 CachedSurface* GetSurface(const CachedSurface& params, bool match_res_scale, bool load_if_create);
21 197
22 void LoadAndBindTexture(OpenGLState &state, unsigned texture_unit, const Pica::Regs::FullTextureConfig& config) { 198 /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from 3DS memory to OpenGL and caches it (if not already cached)
23 LoadAndBindTexture(state, texture_unit, Pica::DebugUtils::TextureInfo::FromPicaRegister(config.config, config.format)); 199 CachedSurface* GetSurfaceRect(const CachedSurface& params, bool match_res_scale, bool load_if_create, MathUtil::Rectangle<int>& out_rect);
24 }
25 200
26 /// Invalidate any cached resource intersecting the specified region. 201 /// Gets a surface based on the texture configuration
27 void InvalidateInRange(PAddr addr, u32 size, bool ignore_hash = false); 202 CachedSurface* GetTextureSurface(const Pica::Regs::FullTextureConfig& config);
28 203
29 /// Invalidate all cached OpenGL resources tracked by this cache manager 204 /// Gets the color and depth surfaces and rect (resolution scaled) based on the framebuffer configuration
30 void InvalidateAll(); 205 std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> GetFramebufferSurfaces(const Pica::Regs::FramebufferConfig& config);
31 206
32private: 207 /// Attempt to get a surface that exactly matches the fill region and format
33 struct CachedTexture { 208 CachedSurface* TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config);
34 OGLTexture texture; 209
35 GLuint width; 210 /// Write the surface back to memory
36 GLuint height; 211 void FlushSurface(CachedSurface* surface);
37 u32 size;
38 u64 hash;
39 PAddr addr;
40 };
41 212
42 std::map<PAddr, std::unique_ptr<CachedTexture>> texture_cache; 213 /// Write any cached resources overlapping the region back to memory (if dirty) and optionally invalidate them in the cache
214 void FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, bool invalidate);
215
216 /// Flush all cached resources tracked by this cache manager
217 void FlushAll();
218
219private:
220 SurfaceCache surface_cache;
221 OGLFramebuffer transfer_framebuffers[2];
43}; 222};
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index ee4b54ab9..71d60e69c 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -2,9 +2,17 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <array>
6#include <cstddef>
7
8#include "common/assert.h"
9#include "common/bit_field.h"
10#include "common/logging/log.h"
11
5#include "video_core/pica.h" 12#include "video_core/pica.h"
6#include "video_core/renderer_opengl/gl_rasterizer.h" 13#include "video_core/renderer_opengl/gl_rasterizer.h"
7#include "video_core/renderer_opengl/gl_shader_gen.h" 14#include "video_core/renderer_opengl/gl_shader_gen.h"
15#include "video_core/renderer_opengl/gl_shader_util.h"
8 16
9using Pica::Regs; 17using Pica::Regs;
10using TevStageConfig = Regs::TevStageConfig; 18using TevStageConfig = Regs::TevStageConfig;
@@ -24,8 +32,9 @@ static bool IsPassThroughTevStage(const TevStageConfig& stage) {
24} 32}
25 33
26/// Writes the specified TEV stage source component(s) 34/// Writes the specified TEV stage source component(s)
27static void AppendSource(std::string& out, TevStageConfig::Source source, 35static void AppendSource(std::string& out, const PicaShaderConfig& config, TevStageConfig::Source source,
28 const std::string& index_name) { 36 const std::string& index_name) {
37 const auto& state = config.state;
29 using Source = TevStageConfig::Source; 38 using Source = TevStageConfig::Source;
30 switch (source) { 39 switch (source) {
31 case Source::PrimaryColor: 40 case Source::PrimaryColor:
@@ -38,7 +47,20 @@ static void AppendSource(std::string& out, TevStageConfig::Source source,
38 out += "secondary_fragment_color"; 47 out += "secondary_fragment_color";
39 break; 48 break;
40 case Source::Texture0: 49 case Source::Texture0:
41 out += "texture(tex[0], texcoord[0])"; 50 // Only unit 0 respects the texturing type (according to 3DBrew)
51 switch(state.texture0_type) {
52 case Pica::Regs::TextureConfig::Texture2D:
53 out += "texture(tex[0], texcoord[0])";
54 break;
55 case Pica::Regs::TextureConfig::Projection2D:
56 out += "textureProj(tex[0], vec3(texcoord[0], texcoord0_w))";
57 break;
58 default:
59 out += "texture(tex[0], texcoord[0])";
60 LOG_CRITICAL(HW_GPU, "Unhandled texture type %x", static_cast<int>(state.texture0_type));
61 UNIMPLEMENTED();
62 break;
63 }
42 break; 64 break;
43 case Source::Texture1: 65 case Source::Texture1:
44 out += "texture(tex[1], texcoord[1])"; 66 out += "texture(tex[1], texcoord[1])";
@@ -63,53 +85,53 @@ static void AppendSource(std::string& out, TevStageConfig::Source source,
63} 85}
64 86
65/// Writes the color components to use for the specified TEV stage color modifier 87/// Writes the color components to use for the specified TEV stage color modifier
66static void AppendColorModifier(std::string& out, TevStageConfig::ColorModifier modifier, 88static void AppendColorModifier(std::string& out, const PicaShaderConfig& config, TevStageConfig::ColorModifier modifier,
67 TevStageConfig::Source source, const std::string& index_name) { 89 TevStageConfig::Source source, const std::string& index_name) {
68 using ColorModifier = TevStageConfig::ColorModifier; 90 using ColorModifier = TevStageConfig::ColorModifier;
69 switch (modifier) { 91 switch (modifier) {
70 case ColorModifier::SourceColor: 92 case ColorModifier::SourceColor:
71 AppendSource(out, source, index_name); 93 AppendSource(out, config, source, index_name);
72 out += ".rgb"; 94 out += ".rgb";
73 break; 95 break;
74 case ColorModifier::OneMinusSourceColor: 96 case ColorModifier::OneMinusSourceColor:
75 out += "vec3(1.0) - "; 97 out += "vec3(1.0) - ";
76 AppendSource(out, source, index_name); 98 AppendSource(out, config, source, index_name);
77 out += ".rgb"; 99 out += ".rgb";
78 break; 100 break;
79 case ColorModifier::SourceAlpha: 101 case ColorModifier::SourceAlpha:
80 AppendSource(out, source, index_name); 102 AppendSource(out, config, source, index_name);
81 out += ".aaa"; 103 out += ".aaa";
82 break; 104 break;
83 case ColorModifier::OneMinusSourceAlpha: 105 case ColorModifier::OneMinusSourceAlpha:
84 out += "vec3(1.0) - "; 106 out += "vec3(1.0) - ";
85 AppendSource(out, source, index_name); 107 AppendSource(out, config, source, index_name);
86 out += ".aaa"; 108 out += ".aaa";
87 break; 109 break;
88 case ColorModifier::SourceRed: 110 case ColorModifier::SourceRed:
89 AppendSource(out, source, index_name); 111 AppendSource(out, config, source, index_name);
90 out += ".rrr"; 112 out += ".rrr";
91 break; 113 break;
92 case ColorModifier::OneMinusSourceRed: 114 case ColorModifier::OneMinusSourceRed:
93 out += "vec3(1.0) - "; 115 out += "vec3(1.0) - ";
94 AppendSource(out, source, index_name); 116 AppendSource(out, config, source, index_name);
95 out += ".rrr"; 117 out += ".rrr";
96 break; 118 break;
97 case ColorModifier::SourceGreen: 119 case ColorModifier::SourceGreen:
98 AppendSource(out, source, index_name); 120 AppendSource(out, config, source, index_name);
99 out += ".ggg"; 121 out += ".ggg";
100 break; 122 break;
101 case ColorModifier::OneMinusSourceGreen: 123 case ColorModifier::OneMinusSourceGreen:
102 out += "vec3(1.0) - "; 124 out += "vec3(1.0) - ";
103 AppendSource(out, source, index_name); 125 AppendSource(out, config, source, index_name);
104 out += ".ggg"; 126 out += ".ggg";
105 break; 127 break;
106 case ColorModifier::SourceBlue: 128 case ColorModifier::SourceBlue:
107 AppendSource(out, source, index_name); 129 AppendSource(out, config, source, index_name);
108 out += ".bbb"; 130 out += ".bbb";
109 break; 131 break;
110 case ColorModifier::OneMinusSourceBlue: 132 case ColorModifier::OneMinusSourceBlue:
111 out += "vec3(1.0) - "; 133 out += "vec3(1.0) - ";
112 AppendSource(out, source, index_name); 134 AppendSource(out, config, source, index_name);
113 out += ".bbb"; 135 out += ".bbb";
114 break; 136 break;
115 default: 137 default:
@@ -120,44 +142,44 @@ static void AppendColorModifier(std::string& out, TevStageConfig::ColorModifier
120} 142}
121 143
122/// Writes the alpha component to use for the specified TEV stage alpha modifier 144/// Writes the alpha component to use for the specified TEV stage alpha modifier
123static void AppendAlphaModifier(std::string& out, TevStageConfig::AlphaModifier modifier, 145static void AppendAlphaModifier(std::string& out, const PicaShaderConfig& config, TevStageConfig::AlphaModifier modifier,
124 TevStageConfig::Source source, const std::string& index_name) { 146 TevStageConfig::Source source, const std::string& index_name) {
125 using AlphaModifier = TevStageConfig::AlphaModifier; 147 using AlphaModifier = TevStageConfig::AlphaModifier;
126 switch (modifier) { 148 switch (modifier) {
127 case AlphaModifier::SourceAlpha: 149 case AlphaModifier::SourceAlpha:
128 AppendSource(out, source, index_name); 150 AppendSource(out, config, source, index_name);
129 out += ".a"; 151 out += ".a";
130 break; 152 break;
131 case AlphaModifier::OneMinusSourceAlpha: 153 case AlphaModifier::OneMinusSourceAlpha:
132 out += "1.0 - "; 154 out += "1.0 - ";
133 AppendSource(out, source, index_name); 155 AppendSource(out, config, source, index_name);
134 out += ".a"; 156 out += ".a";
135 break; 157 break;
136 case AlphaModifier::SourceRed: 158 case AlphaModifier::SourceRed:
137 AppendSource(out, source, index_name); 159 AppendSource(out, config, source, index_name);
138 out += ".r"; 160 out += ".r";
139 break; 161 break;
140 case AlphaModifier::OneMinusSourceRed: 162 case AlphaModifier::OneMinusSourceRed:
141 out += "1.0 - "; 163 out += "1.0 - ";
142 AppendSource(out, source, index_name); 164 AppendSource(out, config, source, index_name);
143 out += ".r"; 165 out += ".r";
144 break; 166 break;
145 case AlphaModifier::SourceGreen: 167 case AlphaModifier::SourceGreen:
146 AppendSource(out, source, index_name); 168 AppendSource(out, config, source, index_name);
147 out += ".g"; 169 out += ".g";
148 break; 170 break;
149 case AlphaModifier::OneMinusSourceGreen: 171 case AlphaModifier::OneMinusSourceGreen:
150 out += "1.0 - "; 172 out += "1.0 - ";
151 AppendSource(out, source, index_name); 173 AppendSource(out, config, source, index_name);
152 out += ".g"; 174 out += ".g";
153 break; 175 break;
154 case AlphaModifier::SourceBlue: 176 case AlphaModifier::SourceBlue:
155 AppendSource(out, source, index_name); 177 AppendSource(out, config, source, index_name);
156 out += ".b"; 178 out += ".b";
157 break; 179 break;
158 case AlphaModifier::OneMinusSourceBlue: 180 case AlphaModifier::OneMinusSourceBlue:
159 out += "1.0 - "; 181 out += "1.0 - ";
160 AppendSource(out, source, index_name); 182 AppendSource(out, config, source, index_name);
161 out += ".b"; 183 out += ".b";
162 break; 184 break;
163 default: 185 default:
@@ -198,6 +220,9 @@ static void AppendColorCombiner(std::string& out, TevStageConfig::Operation oper
198 case Operation::AddThenMultiply: 220 case Operation::AddThenMultiply:
199 out += "min(" + variable_name + "[0] + " + variable_name + "[1], vec3(1.0)) * " + variable_name + "[2]"; 221 out += "min(" + variable_name + "[0] + " + variable_name + "[1], vec3(1.0)) * " + variable_name + "[2]";
200 break; 222 break;
223 case Operation::Dot3_RGB:
224 out += "vec3(dot(" + variable_name + "[0] - vec3(0.5), " + variable_name + "[1] - vec3(0.5)) * 4.0)";
225 break;
201 default: 226 default:
202 out += "vec3(0.0)"; 227 out += "vec3(0.0)";
203 LOG_CRITICAL(Render_OpenGL, "Unknown color combiner operation: %u", operation); 228 LOG_CRITICAL(Render_OpenGL, "Unknown color combiner operation: %u", operation);
@@ -276,16 +301,16 @@ static void AppendAlphaTestCondition(std::string& out, Regs::CompareFunc func) {
276 301
277/// Writes the code to emulate the specified TEV stage 302/// Writes the code to emulate the specified TEV stage
278static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsigned index) { 303static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsigned index) {
279 auto& stage = config.tev_stages[index]; 304 const auto stage = static_cast<const Pica::Regs::TevStageConfig>(config.state.tev_stages[index]);
280 if (!IsPassThroughTevStage(stage)) { 305 if (!IsPassThroughTevStage(stage)) {
281 std::string index_name = std::to_string(index); 306 std::string index_name = std::to_string(index);
282 307
283 out += "vec3 color_results_" + index_name + "[3] = vec3[3]("; 308 out += "vec3 color_results_" + index_name + "[3] = vec3[3](";
284 AppendColorModifier(out, stage.color_modifier1, stage.color_source1, index_name); 309 AppendColorModifier(out, config, stage.color_modifier1, stage.color_source1, index_name);
285 out += ", "; 310 out += ", ";
286 AppendColorModifier(out, stage.color_modifier2, stage.color_source2, index_name); 311 AppendColorModifier(out, config, stage.color_modifier2, stage.color_source2, index_name);
287 out += ", "; 312 out += ", ";
288 AppendColorModifier(out, stage.color_modifier3, stage.color_source3, index_name); 313 AppendColorModifier(out, config, stage.color_modifier3, stage.color_source3, index_name);
289 out += ");\n"; 314 out += ");\n";
290 315
291 out += "vec3 color_output_" + index_name + " = "; 316 out += "vec3 color_output_" + index_name + " = ";
@@ -293,11 +318,11 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi
293 out += ";\n"; 318 out += ";\n";
294 319
295 out += "float alpha_results_" + index_name + "[3] = float[3]("; 320 out += "float alpha_results_" + index_name + "[3] = float[3](";
296 AppendAlphaModifier(out, stage.alpha_modifier1, stage.alpha_source1, index_name); 321 AppendAlphaModifier(out, config, stage.alpha_modifier1, stage.alpha_source1, index_name);
297 out += ", "; 322 out += ", ";
298 AppendAlphaModifier(out, stage.alpha_modifier2, stage.alpha_source2, index_name); 323 AppendAlphaModifier(out, config, stage.alpha_modifier2, stage.alpha_source2, index_name);
299 out += ", "; 324 out += ", ";
300 AppendAlphaModifier(out, stage.alpha_modifier3, stage.alpha_source3, index_name); 325 AppendAlphaModifier(out, config, stage.alpha_modifier3, stage.alpha_source3, index_name);
301 out += ");\n"; 326 out += ");\n";
302 327
303 out += "float alpha_output_" + index_name + " = "; 328 out += "float alpha_output_" + index_name + " = ";
@@ -320,6 +345,8 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi
320 345
321/// Writes the code to emulate fragment lighting 346/// Writes the code to emulate fragment lighting
322static void WriteLighting(std::string& out, const PicaShaderConfig& config) { 347static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
348 const auto& lighting = config.state.lighting;
349
323 // Define lighting globals 350 // Define lighting globals
324 out += "vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" 351 out += "vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);\n"
325 "vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" 352 "vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);\n"
@@ -327,17 +354,17 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
327 "vec3 refl_value = vec3(0.0);\n"; 354 "vec3 refl_value = vec3(0.0);\n";
328 355
329 // Compute fragment normals 356 // Compute fragment normals
330 if (config.lighting.bump_mode == Pica::Regs::LightingBumpMode::NormalMap) { 357 if (lighting.bump_mode == Pica::Regs::LightingBumpMode::NormalMap) {
331 // Bump mapping is enabled using a normal map, read perturbation vector from the selected texture 358 // Bump mapping is enabled using a normal map, read perturbation vector from the selected texture
332 std::string bump_selector = std::to_string(config.lighting.bump_selector); 359 std::string bump_selector = std::to_string(lighting.bump_selector);
333 out += "vec3 surface_normal = 2.0 * texture(tex[" + bump_selector + "], texcoord[" + bump_selector + "]).rgb - 1.0;\n"; 360 out += "vec3 surface_normal = 2.0 * texture(tex[" + bump_selector + "], texcoord[" + bump_selector + "]).rgb - 1.0;\n";
334 361
335 // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher precision result 362 // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher precision result
336 if (config.lighting.bump_renorm) { 363 if (lighting.bump_renorm) {
337 std::string val = "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))"; 364 std::string val = "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))";
338 out += "surface_normal.z = sqrt(max(" + val + ", 0.0));\n"; 365 out += "surface_normal.z = sqrt(max(" + val + ", 0.0));\n";
339 } 366 }
340 } else if (config.lighting.bump_mode == Pica::Regs::LightingBumpMode::TangentMap) { 367 } else if (lighting.bump_mode == Pica::Regs::LightingBumpMode::TangentMap) {
341 // Bump mapping is enabled using a tangent map 368 // Bump mapping is enabled using a tangent map
342 LOG_CRITICAL(HW_GPU, "unimplemented bump mapping mode (tangent mapping)"); 369 LOG_CRITICAL(HW_GPU, "unimplemented bump mapping mode (tangent mapping)");
343 UNIMPLEMENTED(); 370 UNIMPLEMENTED();
@@ -350,7 +377,7 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
350 out += "vec3 normal = normalize(quaternion_rotate(normquat, surface_normal));\n"; 377 out += "vec3 normal = normalize(quaternion_rotate(normquat, surface_normal));\n";
351 378
352 // Gets the index into the specified lookup table for specular lighting 379 // Gets the index into the specified lookup table for specular lighting
353 auto GetLutIndex = [config](unsigned light_num, Regs::LightingLutInput input, bool abs) { 380 auto GetLutIndex = [&lighting](unsigned light_num, Regs::LightingLutInput input, bool abs) {
354 const std::string half_angle = "normalize(normalize(view) + light_vector)"; 381 const std::string half_angle = "normalize(normalize(view) + light_vector)";
355 std::string index; 382 std::string index;
356 switch (input) { 383 switch (input) {
@@ -378,7 +405,7 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
378 405
379 if (abs) { 406 if (abs) {
380 // LUT index is in the range of (0.0, 1.0) 407 // LUT index is in the range of (0.0, 1.0)
381 index = config.lighting.light[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)"; 408 index = lighting.light[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)";
382 return "(FLOAT_255 * clamp(" + index + ", 0.0, 1.0))"; 409 return "(FLOAT_255 * clamp(" + index + ", 0.0, 1.0))";
383 } else { 410 } else {
384 // LUT index is in the range of (-1.0, 1.0) 411 // LUT index is in the range of (-1.0, 1.0)
@@ -396,8 +423,8 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
396 }; 423 };
397 424
398 // Write the code to emulate each enabled light 425 // Write the code to emulate each enabled light
399 for (unsigned light_index = 0; light_index < config.lighting.src_num; ++light_index) { 426 for (unsigned light_index = 0; light_index < lighting.src_num; ++light_index) {
400 const auto& light_config = config.lighting.light[light_index]; 427 const auto& light_config = lighting.light[light_index];
401 std::string light_src = "light_src[" + std::to_string(light_config.num) + "]"; 428 std::string light_src = "light_src[" + std::to_string(light_config.num) + "]";
402 429
403 // Compute light vector (directional or positional) 430 // Compute light vector (directional or positional)
@@ -421,39 +448,39 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
421 } 448 }
422 449
423 // If enabled, clamp specular component if lighting result is negative 450 // If enabled, clamp specular component if lighting result is negative
424 std::string clamp_highlights = config.lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0"; 451 std::string clamp_highlights = lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0";
425 452
426 // Specular 0 component 453 // Specular 0 component
427 std::string d0_lut_value = "1.0"; 454 std::string d0_lut_value = "1.0";
428 if (config.lighting.lut_d0.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Distribution0)) { 455 if (lighting.lut_d0.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::Distribution0)) {
429 // Lookup specular "distribution 0" LUT value 456 // Lookup specular "distribution 0" LUT value
430 std::string index = GetLutIndex(light_config.num, config.lighting.lut_d0.type, config.lighting.lut_d0.abs_input); 457 std::string index = GetLutIndex(light_config.num, lighting.lut_d0.type, lighting.lut_d0.abs_input);
431 d0_lut_value = "(" + std::to_string(config.lighting.lut_d0.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution0, index) + ")"; 458 d0_lut_value = "(" + std::to_string(lighting.lut_d0.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution0, index) + ")";
432 } 459 }
433 std::string specular_0 = "(" + d0_lut_value + " * " + light_src + ".specular_0)"; 460 std::string specular_0 = "(" + d0_lut_value + " * " + light_src + ".specular_0)";
434 461
435 // If enabled, lookup ReflectRed value, otherwise, 1.0 is used 462 // If enabled, lookup ReflectRed value, otherwise, 1.0 is used
436 if (config.lighting.lut_rr.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectRed)) { 463 if (lighting.lut_rr.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::ReflectRed)) {
437 std::string index = GetLutIndex(light_config.num, config.lighting.lut_rr.type, config.lighting.lut_rr.abs_input); 464 std::string index = GetLutIndex(light_config.num, lighting.lut_rr.type, lighting.lut_rr.abs_input);
438 std::string value = "(" + std::to_string(config.lighting.lut_rr.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectRed, index) + ")"; 465 std::string value = "(" + std::to_string(lighting.lut_rr.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectRed, index) + ")";
439 out += "refl_value.r = " + value + ";\n"; 466 out += "refl_value.r = " + value + ";\n";
440 } else { 467 } else {
441 out += "refl_value.r = 1.0;\n"; 468 out += "refl_value.r = 1.0;\n";
442 } 469 }
443 470
444 // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used 471 // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used
445 if (config.lighting.lut_rg.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectGreen)) { 472 if (lighting.lut_rg.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::ReflectGreen)) {
446 std::string index = GetLutIndex(light_config.num, config.lighting.lut_rg.type, config.lighting.lut_rg.abs_input); 473 std::string index = GetLutIndex(light_config.num, lighting.lut_rg.type, lighting.lut_rg.abs_input);
447 std::string value = "(" + std::to_string(config.lighting.lut_rg.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectGreen, index) + ")"; 474 std::string value = "(" + std::to_string(lighting.lut_rg.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectGreen, index) + ")";
448 out += "refl_value.g = " + value + ";\n"; 475 out += "refl_value.g = " + value + ";\n";
449 } else { 476 } else {
450 out += "refl_value.g = refl_value.r;\n"; 477 out += "refl_value.g = refl_value.r;\n";
451 } 478 }
452 479
453 // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used 480 // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used
454 if (config.lighting.lut_rb.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectBlue)) { 481 if (lighting.lut_rb.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::ReflectBlue)) {
455 std::string index = GetLutIndex(light_config.num, config.lighting.lut_rb.type, config.lighting.lut_rb.abs_input); 482 std::string index = GetLutIndex(light_config.num, lighting.lut_rb.type, lighting.lut_rb.abs_input);
456 std::string value = "(" + std::to_string(config.lighting.lut_rb.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectBlue, index) + ")"; 483 std::string value = "(" + std::to_string(lighting.lut_rb.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectBlue, index) + ")";
457 out += "refl_value.b = " + value + ";\n"; 484 out += "refl_value.b = " + value + ";\n";
458 } else { 485 } else {
459 out += "refl_value.b = refl_value.r;\n"; 486 out += "refl_value.b = refl_value.r;\n";
@@ -461,27 +488,27 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
461 488
462 // Specular 1 component 489 // Specular 1 component
463 std::string d1_lut_value = "1.0"; 490 std::string d1_lut_value = "1.0";
464 if (config.lighting.lut_d1.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Distribution1)) { 491 if (lighting.lut_d1.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::Distribution1)) {
465 // Lookup specular "distribution 1" LUT value 492 // Lookup specular "distribution 1" LUT value
466 std::string index = GetLutIndex(light_config.num, config.lighting.lut_d1.type, config.lighting.lut_d1.abs_input); 493 std::string index = GetLutIndex(light_config.num, lighting.lut_d1.type, lighting.lut_d1.abs_input);
467 d1_lut_value = "(" + std::to_string(config.lighting.lut_d1.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution1, index) + ")"; 494 d1_lut_value = "(" + std::to_string(lighting.lut_d1.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution1, index) + ")";
468 } 495 }
469 std::string specular_1 = "(" + d1_lut_value + " * refl_value * " + light_src + ".specular_1)"; 496 std::string specular_1 = "(" + d1_lut_value + " * refl_value * " + light_src + ".specular_1)";
470 497
471 // Fresnel 498 // Fresnel
472 if (config.lighting.lut_fr.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Fresnel)) { 499 if (lighting.lut_fr.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::Fresnel)) {
473 // Lookup fresnel LUT value 500 // Lookup fresnel LUT value
474 std::string index = GetLutIndex(light_config.num, config.lighting.lut_fr.type, config.lighting.lut_fr.abs_input); 501 std::string index = GetLutIndex(light_config.num, lighting.lut_fr.type, lighting.lut_fr.abs_input);
475 std::string value = "(" + std::to_string(config.lighting.lut_fr.scale) + " * " + GetLutValue(Regs::LightingSampler::Fresnel, index) + ")"; 502 std::string value = "(" + std::to_string(lighting.lut_fr.scale) + " * " + GetLutValue(Regs::LightingSampler::Fresnel, index) + ")";
476 503
477 // Enabled for difffuse lighting alpha component 504 // Enabled for difffuse lighting alpha component
478 if (config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::PrimaryAlpha || 505 if (lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::PrimaryAlpha ||
479 config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both) 506 lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both)
480 out += "diffuse_sum.a *= " + value + ";\n"; 507 out += "diffuse_sum.a *= " + value + ";\n";
481 508
482 // Enabled for the specular lighting alpha component 509 // Enabled for the specular lighting alpha component
483 if (config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::SecondaryAlpha || 510 if (lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::SecondaryAlpha ||
484 config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both) 511 lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both)
485 out += "specular_sum.a *= " + value + ";\n"; 512 out += "specular_sum.a *= " + value + ";\n";
486 } 513 }
487 514
@@ -499,6 +526,8 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
499} 526}
500 527
501std::string GenerateFragmentShader(const PicaShaderConfig& config) { 528std::string GenerateFragmentShader(const PicaShaderConfig& config) {
529 const auto& state = config.state;
530
502 std::string out = R"( 531 std::string out = R"(
503#version 330 core 532#version 330 core
504#define NUM_TEV_STAGES 6 533#define NUM_TEV_STAGES 6
@@ -508,6 +537,7 @@ std::string GenerateFragmentShader(const PicaShaderConfig& config) {
508 537
509in vec4 primary_color; 538in vec4 primary_color;
510in vec2 texcoord[3]; 539in vec2 texcoord[3];
540in float texcoord0_w;
511in vec4 normquat; 541in vec4 normquat;
512in vec3 view; 542in vec3 view;
513 543
@@ -525,6 +555,7 @@ layout (std140) uniform shader_data {
525 vec4 const_color[NUM_TEV_STAGES]; 555 vec4 const_color[NUM_TEV_STAGES];
526 vec4 tev_combiner_buffer_color; 556 vec4 tev_combiner_buffer_color;
527 int alphatest_ref; 557 int alphatest_ref;
558 float depth_scale;
528 float depth_offset; 559 float depth_offset;
529 vec3 lighting_global_ambient; 560 vec3 lighting_global_ambient;
530 LightSrc light_src[NUM_LIGHTS]; 561 LightSrc light_src[NUM_LIGHTS];
@@ -544,29 +575,37 @@ vec4 secondary_fragment_color = vec4(0.0);
544)"; 575)";
545 576
546 // Do not do any sort of processing if it's obvious we're not going to pass the alpha test 577 // Do not do any sort of processing if it's obvious we're not going to pass the alpha test
547 if (config.alpha_test_func == Regs::CompareFunc::Never) { 578 if (state.alpha_test_func == Regs::CompareFunc::Never) {
548 out += "discard; }"; 579 out += "discard; }";
549 return out; 580 return out;
550 } 581 }
551 582
552 if (config.lighting.enable) 583 if (state.lighting.enable)
553 WriteLighting(out, config); 584 WriteLighting(out, config);
554 585
555 out += "vec4 combiner_buffer = vec4(0.0);\n"; 586 out += "vec4 combiner_buffer = vec4(0.0);\n";
556 out += "vec4 next_combiner_buffer = tev_combiner_buffer_color;\n"; 587 out += "vec4 next_combiner_buffer = tev_combiner_buffer_color;\n";
557 out += "vec4 last_tex_env_out = vec4(0.0);\n"; 588 out += "vec4 last_tex_env_out = vec4(0.0);\n";
558 589
559 for (size_t index = 0; index < config.tev_stages.size(); ++index) 590 for (size_t index = 0; index < state.tev_stages.size(); ++index)
560 WriteTevStage(out, config, (unsigned)index); 591 WriteTevStage(out, config, (unsigned)index);
561 592
562 if (config.alpha_test_func != Regs::CompareFunc::Always) { 593 if (state.alpha_test_func != Regs::CompareFunc::Always) {
563 out += "if ("; 594 out += "if (";
564 AppendAlphaTestCondition(out, config.alpha_test_func); 595 AppendAlphaTestCondition(out, state.alpha_test_func);
565 out += ") discard;\n"; 596 out += ") discard;\n";
566 } 597 }
567 598
568 out += "color = last_tex_env_out;\n"; 599 out += "color = last_tex_env_out;\n";
569 out += "gl_FragDepth = gl_FragCoord.z + depth_offset;\n}"; 600
601 out += "float z_over_w = 1.0 - gl_FragCoord.z * 2.0;\n";
602 out += "float depth = z_over_w * depth_scale + depth_offset;\n";
603 if (state.depthmap_enable == Pica::Regs::DepthBuffering::WBuffering) {
604 out += "depth /= gl_FragCoord.w;\n";
605 }
606 out += "gl_FragDepth = depth;\n";
607
608 out += "}";
570 609
571 return out; 610 return out;
572} 611}
@@ -574,17 +613,19 @@ vec4 secondary_fragment_color = vec4(0.0);
574std::string GenerateVertexShader() { 613std::string GenerateVertexShader() {
575 std::string out = "#version 330 core\n"; 614 std::string out = "#version 330 core\n";
576 615
577 out += "layout(location = " + std::to_string((int)ATTRIBUTE_POSITION) + ") in vec4 vert_position;\n"; 616 out += "layout(location = " + std::to_string((int)ATTRIBUTE_POSITION) + ") in vec4 vert_position;\n";
578 out += "layout(location = " + std::to_string((int)ATTRIBUTE_COLOR) + ") in vec4 vert_color;\n"; 617 out += "layout(location = " + std::to_string((int)ATTRIBUTE_COLOR) + ") in vec4 vert_color;\n";
579 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0) + ") in vec2 vert_texcoord0;\n"; 618 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0) + ") in vec2 vert_texcoord0;\n";
580 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD1) + ") in vec2 vert_texcoord1;\n"; 619 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD1) + ") in vec2 vert_texcoord1;\n";
581 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD2) + ") in vec2 vert_texcoord2;\n"; 620 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD2) + ") in vec2 vert_texcoord2;\n";
582 out += "layout(location = " + std::to_string((int)ATTRIBUTE_NORMQUAT) + ") in vec4 vert_normquat;\n"; 621 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0_W) + ") in float vert_texcoord0_w;\n";
583 out += "layout(location = " + std::to_string((int)ATTRIBUTE_VIEW) + ") in vec3 vert_view;\n"; 622 out += "layout(location = " + std::to_string((int)ATTRIBUTE_NORMQUAT) + ") in vec4 vert_normquat;\n";
623 out += "layout(location = " + std::to_string((int)ATTRIBUTE_VIEW) + ") in vec3 vert_view;\n";
584 624
585 out += R"( 625 out += R"(
586out vec4 primary_color; 626out vec4 primary_color;
587out vec2 texcoord[3]; 627out vec2 texcoord[3];
628out float texcoord0_w;
588out vec4 normquat; 629out vec4 normquat;
589out vec3 view; 630out vec3 view;
590 631
@@ -593,6 +634,7 @@ void main() {
593 texcoord[0] = vert_texcoord0; 634 texcoord[0] = vert_texcoord0;
594 texcoord[1] = vert_texcoord1; 635 texcoord[1] = vert_texcoord1;
595 texcoord[2] = vert_texcoord2; 636 texcoord[2] = vert_texcoord2;
637 texcoord0_w = vert_texcoord0_w;
596 normquat = vert_normquat; 638 normquat = vert_normquat;
597 view = vert_view; 639 view = vert_view;
598 gl_Position = vec4(vert_position.x, vert_position.y, -vert_position.z, vert_position.w); 640 gl_Position = vec4(vert_position.x, vert_position.y, -vert_position.z, vert_position.w);
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index 0ca9d2879..bef3249cf 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -6,7 +6,7 @@
6 6
7#include <string> 7#include <string>
8 8
9#include "video_core/renderer_opengl/gl_rasterizer.h" 9union PicaShaderConfig;
10 10
11namespace GLShader { 11namespace GLShader {
12 12
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp
index e3f7a5868..dded3db46 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_util.cpp
@@ -2,9 +2,10 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
6#include <vector> 5#include <vector>
7 6
7#include <glad/glad.h>
8
8#include "common/logging/log.h" 9#include "common/logging/log.h"
9#include "video_core/renderer_opengl/gl_shader_util.h" 10#include "video_core/renderer_opengl/gl_shader_util.h"
10 11
diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h
index 097242f6f..f59912f79 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.h
+++ b/src/video_core/renderer_opengl/gl_shader_util.h
@@ -14,6 +14,7 @@ enum Attributes {
14 ATTRIBUTE_TEXCOORD0, 14 ATTRIBUTE_TEXCOORD0,
15 ATTRIBUTE_TEXCOORD1, 15 ATTRIBUTE_TEXCOORD1,
16 ATTRIBUTE_TEXCOORD2, 16 ATTRIBUTE_TEXCOORD2,
17 ATTRIBUTE_TEXCOORD0_W,
17 ATTRIBUTE_NORMQUAT, 18 ATTRIBUTE_NORMQUAT,
18 ATTRIBUTE_VIEW, 19 ATTRIBUTE_VIEW,
19}; 20};
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 08e4d0b54..fa141fc9a 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -2,7 +2,11 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "video_core/pica.h" 5#include <glad/glad.h>
6
7#include "common/common_funcs.h"
8#include "common/logging/log.h"
9
6#include "video_core/renderer_opengl/gl_state.h" 10#include "video_core/renderer_opengl/gl_state.h"
7 11
8OpenGLState OpenGLState::cur_state; 12OpenGLState OpenGLState::cur_state;
@@ -32,6 +36,8 @@ OpenGLState::OpenGLState() {
32 stencil.action_stencil_fail = GL_KEEP; 36 stencil.action_stencil_fail = GL_KEEP;
33 37
34 blend.enabled = false; 38 blend.enabled = false;
39 blend.rgb_equation = GL_FUNC_ADD;
40 blend.a_equation = GL_FUNC_ADD;
35 blend.src_rgb_func = GL_ONE; 41 blend.src_rgb_func = GL_ONE;
36 blend.dst_rgb_func = GL_ZERO; 42 blend.dst_rgb_func = GL_ZERO;
37 blend.src_a_func = GL_ONE; 43 blend.src_a_func = GL_ONE;
@@ -48,17 +54,19 @@ OpenGLState::OpenGLState() {
48 texture_unit.sampler = 0; 54 texture_unit.sampler = 0;
49 } 55 }
50 56
51 for (auto& lut : lighting_lut) { 57 for (auto& lut : lighting_luts) {
52 lut.texture_1d = 0; 58 lut.texture_1d = 0;
53 } 59 }
54 60
55 draw.framebuffer = 0; 61 draw.read_framebuffer = 0;
62 draw.draw_framebuffer = 0;
56 draw.vertex_array = 0; 63 draw.vertex_array = 0;
57 draw.vertex_buffer = 0; 64 draw.vertex_buffer = 0;
65 draw.uniform_buffer = 0;
58 draw.shader_program = 0; 66 draw.shader_program = 0;
59} 67}
60 68
61void OpenGLState::Apply() { 69void OpenGLState::Apply() const {
62 // Culling 70 // Culling
63 if (cull.enabled != cur_state.cull.enabled) { 71 if (cull.enabled != cur_state.cull.enabled) {
64 if (cull.enabled) { 72 if (cull.enabled) {
@@ -159,6 +167,11 @@ void OpenGLState::Apply() {
159 blend.src_a_func, blend.dst_a_func); 167 blend.src_a_func, blend.dst_a_func);
160 } 168 }
161 169
170 if (blend.rgb_equation != cur_state.blend.rgb_equation ||
171 blend.a_equation != cur_state.blend.a_equation) {
172 glBlendEquationSeparate(blend.rgb_equation, blend.a_equation);
173 }
174
162 if (logic_op != cur_state.logic_op) { 175 if (logic_op != cur_state.logic_op) {
163 glLogicOp(logic_op); 176 glLogicOp(logic_op);
164 } 177 }
@@ -175,16 +188,19 @@ void OpenGLState::Apply() {
175 } 188 }
176 189
177 // Lighting LUTs 190 // Lighting LUTs
178 for (unsigned i = 0; i < ARRAY_SIZE(lighting_lut); ++i) { 191 for (unsigned i = 0; i < ARRAY_SIZE(lighting_luts); ++i) {
179 if (lighting_lut[i].texture_1d != cur_state.lighting_lut[i].texture_1d) { 192 if (lighting_luts[i].texture_1d != cur_state.lighting_luts[i].texture_1d) {
180 glActiveTexture(GL_TEXTURE3 + i); 193 glActiveTexture(GL_TEXTURE3 + i);
181 glBindTexture(GL_TEXTURE_1D, lighting_lut[i].texture_1d); 194 glBindTexture(GL_TEXTURE_1D, lighting_luts[i].texture_1d);
182 } 195 }
183 } 196 }
184 197
185 // Framebuffer 198 // Framebuffer
186 if (draw.framebuffer != cur_state.draw.framebuffer) { 199 if (draw.read_framebuffer != cur_state.draw.read_framebuffer) {
187 glBindFramebuffer(GL_FRAMEBUFFER, draw.framebuffer); 200 glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
201 }
202 if (draw.draw_framebuffer != cur_state.draw.draw_framebuffer) {
203 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer);
188 } 204 }
189 205
190 // Vertex array 206 // Vertex array
@@ -210,45 +226,58 @@ void OpenGLState::Apply() {
210 cur_state = *this; 226 cur_state = *this;
211} 227}
212 228
213void OpenGLState::ResetTexture(GLuint id) { 229GLenum OpenGLState::CheckFBStatus(GLenum target) {
230 GLenum fb_status = glCheckFramebufferStatus(target);
231 if (fb_status != GL_FRAMEBUFFER_COMPLETE) {
232 const char* fb_description = (target == GL_READ_FRAMEBUFFER ? "READ" : (target == GL_DRAW_FRAMEBUFFER ? "DRAW" : "UNK"));
233 LOG_CRITICAL(Render_OpenGL, "OpenGL %s framebuffer check failed, status %X", fb_description, fb_status);
234 }
235
236 return fb_status;
237}
238
239void OpenGLState::ResetTexture(GLuint handle) {
214 for (auto& unit : cur_state.texture_units) { 240 for (auto& unit : cur_state.texture_units) {
215 if (unit.texture_2d == id) { 241 if (unit.texture_2d == handle) {
216 unit.texture_2d = 0; 242 unit.texture_2d = 0;
217 } 243 }
218 } 244 }
219} 245}
220 246
221void OpenGLState::ResetSampler(GLuint id) { 247void OpenGLState::ResetSampler(GLuint handle) {
222 for (auto& unit : cur_state.texture_units) { 248 for (auto& unit : cur_state.texture_units) {
223 if (unit.sampler == id) { 249 if (unit.sampler == handle) {
224 unit.sampler = 0; 250 unit.sampler = 0;
225 } 251 }
226 } 252 }
227} 253}
228 254
229void OpenGLState::ResetProgram(GLuint id) { 255void OpenGLState::ResetProgram(GLuint handle) {
230 if (cur_state.draw.shader_program == id) { 256 if (cur_state.draw.shader_program == handle) {
231 cur_state.draw.shader_program = 0; 257 cur_state.draw.shader_program = 0;
232 } 258 }
233} 259}
234 260
235void OpenGLState::ResetBuffer(GLuint id) { 261void OpenGLState::ResetBuffer(GLuint handle) {
236 if (cur_state.draw.vertex_buffer == id) { 262 if (cur_state.draw.vertex_buffer == handle) {
237 cur_state.draw.vertex_buffer = 0; 263 cur_state.draw.vertex_buffer = 0;
238 } 264 }
239 if (cur_state.draw.uniform_buffer == id) { 265 if (cur_state.draw.uniform_buffer == handle) {
240 cur_state.draw.uniform_buffer = 0; 266 cur_state.draw.uniform_buffer = 0;
241 } 267 }
242} 268}
243 269
244void OpenGLState::ResetVertexArray(GLuint id) { 270void OpenGLState::ResetVertexArray(GLuint handle) {
245 if (cur_state.draw.vertex_array == id) { 271 if (cur_state.draw.vertex_array == handle) {
246 cur_state.draw.vertex_array = 0; 272 cur_state.draw.vertex_array = 0;
247 } 273 }
248} 274}
249 275
250void OpenGLState::ResetFramebuffer(GLuint id) { 276void OpenGLState::ResetFramebuffer(GLuint handle) {
251 if (cur_state.draw.framebuffer == id) { 277 if (cur_state.draw.read_framebuffer == handle) {
252 cur_state.draw.framebuffer = 0; 278 cur_state.draw.read_framebuffer = 0;
279 }
280 if (cur_state.draw.draw_framebuffer == handle) {
281 cur_state.draw.draw_framebuffer = 0;
253 } 282 }
254} 283}
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index e848058d7..228727054 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -40,6 +40,8 @@ public:
40 40
41 struct { 41 struct {
42 bool enabled; // GL_BLEND 42 bool enabled; // GL_BLEND
43 GLenum rgb_equation; // GL_BLEND_EQUATION_RGB
44 GLenum a_equation; // GL_BLEND_EQUATION_ALPHA
43 GLenum src_rgb_func; // GL_BLEND_SRC_RGB 45 GLenum src_rgb_func; // GL_BLEND_SRC_RGB
44 GLenum dst_rgb_func; // GL_BLEND_DST_RGB 46 GLenum dst_rgb_func; // GL_BLEND_DST_RGB
45 GLenum src_a_func; // GL_BLEND_SRC_ALPHA 47 GLenum src_a_func; // GL_BLEND_SRC_ALPHA
@@ -63,15 +65,15 @@ public:
63 65
64 struct { 66 struct {
65 GLuint texture_1d; // GL_TEXTURE_BINDING_1D 67 GLuint texture_1d; // GL_TEXTURE_BINDING_1D
66 } lighting_lut[6]; 68 } lighting_luts[6];
67 69
68 struct { 70 struct {
69 GLuint framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING 71 GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING
72 GLuint draw_framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING
70 GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING 73 GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING
71 GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING 74 GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING
72 GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING 75 GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING
73 GLuint shader_program; // GL_CURRENT_PROGRAM 76 GLuint shader_program; // GL_CURRENT_PROGRAM
74 bool shader_dirty;
75 } draw; 77 } draw;
76 78
77 OpenGLState(); 79 OpenGLState();
@@ -82,14 +84,18 @@ public:
82 } 84 }
83 85
84 /// Apply this state as the current OpenGL state 86 /// Apply this state as the current OpenGL state
85 void Apply(); 87 void Apply() const;
86 88
87 static void ResetTexture(GLuint id); 89 /// Check the status of the current OpenGL read or draw framebuffer configuration
88 static void ResetSampler(GLuint id); 90 static GLenum CheckFBStatus(GLenum target);
89 static void ResetProgram(GLuint id); 91
90 static void ResetBuffer(GLuint id); 92 /// Resets and unbinds any references to the given resource in the current OpenGL state
91 static void ResetVertexArray(GLuint id); 93 static void ResetTexture(GLuint handle);
92 static void ResetFramebuffer(GLuint id); 94 static void ResetSampler(GLuint handle);
95 static void ResetProgram(GLuint handle);
96 static void ResetBuffer(GLuint handle);
97 static void ResetVertexArray(GLuint handle);
98 static void ResetFramebuffer(GLuint handle);
93 99
94private: 100private:
95 static OpenGLState cur_state; 101 static OpenGLState cur_state;
diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h
index fd3617d77..6dc2758c5 100644
--- a/src/video_core/renderer_opengl/pica_to_gl.h
+++ b/src/video_core/renderer_opengl/pica_to_gl.h
@@ -4,9 +4,16 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <cstddef>
9
7#include <glad/glad.h> 10#include <glad/glad.h>
8 11
12#include "common/assert.h"
13#include "common/bit_field.h"
14#include "common/common_funcs.h"
9#include "common/common_types.h" 15#include "common/common_types.h"
16#include "common/logging/log.h"
10 17
11#include "video_core/pica.h" 18#include "video_core/pica.h"
12 19
@@ -71,6 +78,26 @@ inline GLenum WrapMode(Pica::Regs::TextureConfig::WrapMode mode) {
71 return gl_mode; 78 return gl_mode;
72} 79}
73 80
81inline GLenum BlendEquation(Pica::Regs::BlendEquation equation) {
82 static const GLenum blend_equation_table[] = {
83 GL_FUNC_ADD, // BlendEquation::Add
84 GL_FUNC_SUBTRACT, // BlendEquation::Subtract
85 GL_FUNC_REVERSE_SUBTRACT, // BlendEquation::ReverseSubtract
86 GL_MIN, // BlendEquation::Min
87 GL_MAX, // BlendEquation::Max
88 };
89
90 // Range check table for input
91 if (static_cast<size_t>(equation) >= ARRAY_SIZE(blend_equation_table)) {
92 LOG_CRITICAL(Render_OpenGL, "Unknown blend equation %d", equation);
93 UNREACHABLE();
94
95 return GL_FUNC_ADD;
96 }
97
98 return blend_equation_table[(unsigned)equation];
99}
100
74inline GLenum BlendFunc(Pica::Regs::BlendFactor factor) { 101inline GLenum BlendFunc(Pica::Regs::BlendFactor factor) {
75 static const GLenum blend_func_table[] = { 102 static const GLenum blend_func_table[] = {
76 GL_ZERO, // BlendFactor::Zero 103 GL_ZERO, // BlendFactor::Zero
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 11c4d0daf..8f424a435 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -5,23 +5,28 @@
5#include <algorithm> 5#include <algorithm>
6#include <cstddef> 6#include <cstddef>
7#include <cstdlib> 7#include <cstdlib>
8#include <memory>
9
10#include <glad/glad.h>
8 11
9#include "common/assert.h" 12#include "common/assert.h"
13#include "common/bit_field.h"
10#include "common/emu_window.h" 14#include "common/emu_window.h"
11#include "common/logging/log.h" 15#include "common/logging/log.h"
12#include "common/profiler_reporting.h" 16#include "common/profiler_reporting.h"
17#include "common/synchronized_wrapper.h"
13 18
14#include "core/memory.h"
15#include "core/settings.h"
16#include "core/hw/gpu.h" 19#include "core/hw/gpu.h"
17#include "core/hw/hw.h" 20#include "core/hw/hw.h"
18#include "core/hw/lcd.h" 21#include "core/hw/lcd.h"
22#include "core/memory.h"
23#include "core/settings.h"
24#include "core/tracer/recorder.h"
19 25
20#include "video_core/video_core.h"
21#include "video_core/debug_utils/debug_utils.h" 26#include "video_core/debug_utils/debug_utils.h"
22#include "video_core/renderer_opengl/gl_rasterizer.h" 27#include "video_core/rasterizer_interface.h"
23#include "video_core/renderer_opengl/gl_shader_util.h"
24#include "video_core/renderer_opengl/renderer_opengl.h" 28#include "video_core/renderer_opengl/renderer_opengl.h"
29#include "video_core/video_core.h"
25 30
26static const char vertex_shader[] = R"( 31static const char vertex_shader[] = R"(
27#version 150 core 32#version 150 core
@@ -107,7 +112,7 @@ void RendererOpenGL::SwapBuffers() {
107 OpenGLState prev_state = OpenGLState::GetCurState(); 112 OpenGLState prev_state = OpenGLState::GetCurState();
108 state.Apply(); 113 state.Apply();
109 114
110 for(int i : {0, 1}) { 115 for (int i : {0, 1}) {
111 const auto& framebuffer = GPU::g_regs.framebuffer_config[i]; 116 const auto& framebuffer = GPU::g_regs.framebuffer_config[i];
112 117
113 // Main LCD (0): 0x1ED02204, Sub LCD (1): 0x1ED02A04 118 // Main LCD (0): 0x1ED02204, Sub LCD (1): 0x1ED02A04
@@ -117,25 +122,25 @@ void RendererOpenGL::SwapBuffers() {
117 LCD::Read(color_fill.raw, lcd_color_addr); 122 LCD::Read(color_fill.raw, lcd_color_addr);
118 123
119 if (color_fill.is_enabled) { 124 if (color_fill.is_enabled) {
120 LoadColorToActiveGLTexture(color_fill.color_r, color_fill.color_g, color_fill.color_b, textures[i]); 125 LoadColorToActiveGLTexture(color_fill.color_r, color_fill.color_g, color_fill.color_b, screen_infos[i].texture);
121 126
122 // Resize the texture in case the framebuffer size has changed 127 // Resize the texture in case the framebuffer size has changed
123 textures[i].width = 1; 128 screen_infos[i].texture.width = 1;
124 textures[i].height = 1; 129 screen_infos[i].texture.height = 1;
125 } else { 130 } else {
126 if (textures[i].width != (GLsizei)framebuffer.width || 131 if (screen_infos[i].texture.width != (GLsizei)framebuffer.width ||
127 textures[i].height != (GLsizei)framebuffer.height || 132 screen_infos[i].texture.height != (GLsizei)framebuffer.height ||
128 textures[i].format != framebuffer.color_format) { 133 screen_infos[i].texture.format != framebuffer.color_format) {
129 // Reallocate texture if the framebuffer size has changed. 134 // Reallocate texture if the framebuffer size has changed.
130 // This is expected to not happen very often and hence should not be a 135 // This is expected to not happen very often and hence should not be a
131 // performance problem. 136 // performance problem.
132 ConfigureFramebufferTexture(textures[i], framebuffer); 137 ConfigureFramebufferTexture(screen_infos[i].texture, framebuffer);
133 } 138 }
134 LoadFBToActiveGLTexture(framebuffer, textures[i]); 139 LoadFBToScreenInfo(framebuffer, screen_infos[i]);
135 140
136 // Resize the texture in case the framebuffer size has changed 141 // Resize the texture in case the framebuffer size has changed
137 textures[i].width = framebuffer.width; 142 screen_infos[i].texture.width = framebuffer.width;
138 textures[i].height = framebuffer.height; 143 screen_infos[i].texture.height = framebuffer.height;
139 } 144 }
140 } 145 }
141 146
@@ -166,8 +171,8 @@ void RendererOpenGL::SwapBuffers() {
166/** 171/**
167 * Loads framebuffer from emulated memory into the active OpenGL texture. 172 * Loads framebuffer from emulated memory into the active OpenGL texture.
168 */ 173 */
169void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig& framebuffer, 174void RendererOpenGL::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer,
170 const TextureInfo& texture) { 175 ScreenInfo& screen_info) {
171 176
172 const PAddr framebuffer_addr = framebuffer.active_fb == 0 ? 177 const PAddr framebuffer_addr = framebuffer.active_fb == 0 ?
173 framebuffer.address_left1 : framebuffer.address_left2; 178 framebuffer.address_left1 : framebuffer.address_left2;
@@ -177,8 +182,6 @@ void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig&
177 framebuffer_addr, (int)framebuffer.width, 182 framebuffer_addr, (int)framebuffer.width,
178 (int)framebuffer.height, (int)framebuffer.format); 183 (int)framebuffer.height, (int)framebuffer.format);
179 184
180 const u8* framebuffer_data = Memory::GetPhysicalPointer(framebuffer_addr);
181
182 int bpp = GPU::Regs::BytesPerPixel(framebuffer.color_format); 185 int bpp = GPU::Regs::BytesPerPixel(framebuffer.color_format);
183 size_t pixel_stride = framebuffer.stride / bpp; 186 size_t pixel_stride = framebuffer.stride / bpp;
184 187
@@ -189,24 +192,34 @@ void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig&
189 // only allows rows to have a memory alignement of 4. 192 // only allows rows to have a memory alignement of 4.
190 ASSERT(pixel_stride % 4 == 0); 193 ASSERT(pixel_stride % 4 == 0);
191 194
192 state.texture_units[0].texture_2d = texture.handle; 195 if (!Rasterizer()->AccelerateDisplay(framebuffer, framebuffer_addr, static_cast<u32>(pixel_stride), screen_info)) {
193 state.Apply(); 196 // Reset the screen info's display texture to its own permanent texture
197 screen_info.display_texture = screen_info.texture.resource.handle;
198 screen_info.display_texcoords = MathUtil::Rectangle<float>(0.f, 0.f, 1.f, 1.f);
194 199
195 glActiveTexture(GL_TEXTURE0); 200 Memory::RasterizerFlushRegion(framebuffer_addr, framebuffer.stride * framebuffer.height);
196 glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)pixel_stride);
197 201
198 // Update existing texture 202 const u8* framebuffer_data = Memory::GetPhysicalPointer(framebuffer_addr);
199 // TODO: Test what happens on hardware when you change the framebuffer dimensions so that they
200 // differ from the LCD resolution.
201 // TODO: Applications could theoretically crash Citra here by specifying too large
202 // framebuffer sizes. We should make sure that this cannot happen.
203 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height,
204 texture.gl_format, texture.gl_type, framebuffer_data);
205 203
206 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); 204 state.texture_units[0].texture_2d = screen_info.texture.resource.handle;
205 state.Apply();
207 206
208 state.texture_units[0].texture_2d = 0; 207 glActiveTexture(GL_TEXTURE0);
209 state.Apply(); 208 glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)pixel_stride);
209
210 // Update existing texture
211 // TODO: Test what happens on hardware when you change the framebuffer dimensions so that they
212 // differ from the LCD resolution.
213 // TODO: Applications could theoretically crash Citra here by specifying too large
214 // framebuffer sizes. We should make sure that this cannot happen.
215 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height,
216 screen_info.texture.gl_format, screen_info.texture.gl_type, framebuffer_data);
217
218 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
219
220 state.texture_units[0].texture_2d = 0;
221 state.Apply();
222 }
210} 223}
211 224
212/** 225/**
@@ -216,7 +229,7 @@ void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig&
216 */ 229 */
217void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, 230void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b,
218 const TextureInfo& texture) { 231 const TextureInfo& texture) {
219 state.texture_units[0].texture_2d = texture.handle; 232 state.texture_units[0].texture_2d = texture.resource.handle;
220 state.Apply(); 233 state.Apply();
221 234
222 glActiveTexture(GL_TEXTURE0); 235 glActiveTexture(GL_TEXTURE0);
@@ -224,6 +237,9 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color
224 237
225 // Update existing texture 238 // Update existing texture
226 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, framebuffer_data); 239 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, framebuffer_data);
240
241 state.texture_units[0].texture_2d = 0;
242 state.Apply();
227} 243}
228 244
229/** 245/**
@@ -233,20 +249,22 @@ void RendererOpenGL::InitOpenGLObjects() {
233 glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, 0.0f); 249 glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, 0.0f);
234 250
235 // Link shaders and get variable locations 251 // Link shaders and get variable locations
236 program_id = GLShader::LoadProgram(vertex_shader, fragment_shader); 252 shader.Create(vertex_shader, fragment_shader);
237 uniform_modelview_matrix = glGetUniformLocation(program_id, "modelview_matrix"); 253 state.draw.shader_program = shader.handle;
238 uniform_color_texture = glGetUniformLocation(program_id, "color_texture"); 254 state.Apply();
239 attrib_position = glGetAttribLocation(program_id, "vert_position"); 255 uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix");
240 attrib_tex_coord = glGetAttribLocation(program_id, "vert_tex_coord"); 256 uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture");
257 attrib_position = glGetAttribLocation(shader.handle, "vert_position");
258 attrib_tex_coord = glGetAttribLocation(shader.handle, "vert_tex_coord");
241 259
242 // Generate VBO handle for drawing 260 // Generate VBO handle for drawing
243 glGenBuffers(1, &vertex_buffer_handle); 261 vertex_buffer.Create();
244 262
245 // Generate VAO 263 // Generate VAO
246 glGenVertexArrays(1, &vertex_array_handle); 264 vertex_array.Create();
247 265
248 state.draw.vertex_array = vertex_array_handle; 266 state.draw.vertex_array = vertex_array.handle;
249 state.draw.vertex_buffer = vertex_buffer_handle; 267 state.draw.vertex_buffer = vertex_buffer.handle;
250 state.draw.uniform_buffer = 0; 268 state.draw.uniform_buffer = 0;
251 state.Apply(); 269 state.Apply();
252 270
@@ -258,13 +276,13 @@ void RendererOpenGL::InitOpenGLObjects() {
258 glEnableVertexAttribArray(attrib_tex_coord); 276 glEnableVertexAttribArray(attrib_tex_coord);
259 277
260 // Allocate textures for each screen 278 // Allocate textures for each screen
261 for (auto& texture : textures) { 279 for (auto& screen_info : screen_infos) {
262 glGenTextures(1, &texture.handle); 280 screen_info.texture.resource.Create();
263 281
264 // Allocation of storage is deferred until the first frame, when we 282 // Allocation of storage is deferred until the first frame, when we
265 // know the framebuffer size. 283 // know the framebuffer size.
266 284
267 state.texture_units[0].texture_2d = texture.handle; 285 state.texture_units[0].texture_2d = screen_info.texture.resource.handle;
268 state.Apply(); 286 state.Apply();
269 287
270 glActiveTexture(GL_TEXTURE0); 288 glActiveTexture(GL_TEXTURE0);
@@ -273,6 +291,8 @@ void RendererOpenGL::InitOpenGLObjects() {
273 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); 291 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
274 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); 292 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
275 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); 293 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
294
295 screen_info.display_texture = screen_info.texture.resource.handle;
276 } 296 }
277 297
278 state.texture_units[0].texture_2d = 0; 298 state.texture_units[0].texture_2d = 0;
@@ -327,30 +347,38 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
327 UNIMPLEMENTED(); 347 UNIMPLEMENTED();
328 } 348 }
329 349
330 state.texture_units[0].texture_2d = texture.handle; 350 state.texture_units[0].texture_2d = texture.resource.handle;
331 state.Apply(); 351 state.Apply();
332 352
333 glActiveTexture(GL_TEXTURE0); 353 glActiveTexture(GL_TEXTURE0);
334 glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0, 354 glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0,
335 texture.gl_format, texture.gl_type, nullptr); 355 texture.gl_format, texture.gl_type, nullptr);
356
357 state.texture_units[0].texture_2d = 0;
358 state.Apply();
336} 359}
337 360
338/** 361/**
339 * Draws a single texture to the emulator window, rotating the texture to correct for the 3DS's LCD rotation. 362 * Draws a single texture to the emulator window, rotating the texture to correct for the 3DS's LCD rotation.
340 */ 363 */
341void RendererOpenGL::DrawSingleScreenRotated(const TextureInfo& texture, float x, float y, float w, float h) { 364void RendererOpenGL::DrawSingleScreenRotated(const ScreenInfo& screen_info, float x, float y, float w, float h) {
365 auto& texcoords = screen_info.display_texcoords;
366
342 std::array<ScreenRectVertex, 4> vertices = {{ 367 std::array<ScreenRectVertex, 4> vertices = {{
343 ScreenRectVertex(x, y, 1.f, 0.f), 368 ScreenRectVertex(x, y, texcoords.bottom, texcoords.left),
344 ScreenRectVertex(x+w, y, 1.f, 1.f), 369 ScreenRectVertex(x+w, y, texcoords.bottom, texcoords.right),
345 ScreenRectVertex(x, y+h, 0.f, 0.f), 370 ScreenRectVertex(x, y+h, texcoords.top, texcoords.left),
346 ScreenRectVertex(x+w, y+h, 0.f, 1.f), 371 ScreenRectVertex(x+w, y+h, texcoords.top, texcoords.right),
347 }}; 372 }};
348 373
349 state.texture_units[0].texture_2d = texture.handle; 374 state.texture_units[0].texture_2d = screen_info.display_texture;
350 state.Apply(); 375 state.Apply();
351 376
352 glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices.data()); 377 glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices.data());
353 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); 378 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
379
380 state.texture_units[0].texture_2d = 0;
381 state.Apply();
354} 382}
355 383
356/** 384/**
@@ -362,9 +390,6 @@ void RendererOpenGL::DrawScreens() {
362 glViewport(0, 0, layout.width, layout.height); 390 glViewport(0, 0, layout.width, layout.height);
363 glClear(GL_COLOR_BUFFER_BIT); 391 glClear(GL_COLOR_BUFFER_BIT);
364 392
365 state.draw.shader_program = program_id;
366 state.Apply();
367
368 // Set projection matrix 393 // Set projection matrix
369 std::array<GLfloat, 3 * 2> ortho_matrix = MakeOrthographicMatrix((float)layout.width, 394 std::array<GLfloat, 3 * 2> ortho_matrix = MakeOrthographicMatrix((float)layout.width,
370 (float)layout.height); 395 (float)layout.height);
@@ -374,9 +399,9 @@ void RendererOpenGL::DrawScreens() {
374 glActiveTexture(GL_TEXTURE0); 399 glActiveTexture(GL_TEXTURE0);
375 glUniform1i(uniform_color_texture, 0); 400 glUniform1i(uniform_color_texture, 0);
376 401
377 DrawSingleScreenRotated(textures[0], (float)layout.top_screen.left, (float)layout.top_screen.top, 402 DrawSingleScreenRotated(screen_infos[0], (float)layout.top_screen.left, (float)layout.top_screen.top,
378 (float)layout.top_screen.GetWidth(), (float)layout.top_screen.GetHeight()); 403 (float)layout.top_screen.GetWidth(), (float)layout.top_screen.GetHeight());
379 DrawSingleScreenRotated(textures[1], (float)layout.bottom_screen.left,(float)layout.bottom_screen.top, 404 DrawSingleScreenRotated(screen_infos[1], (float)layout.bottom_screen.left,(float)layout.bottom_screen.top,
380 (float)layout.bottom_screen.GetWidth(), (float)layout.bottom_screen.GetHeight()); 405 (float)layout.bottom_screen.GetWidth(), (float)layout.bottom_screen.GetHeight());
381 406
382 m_current_frame++; 407 m_current_frame++;
@@ -448,12 +473,6 @@ static void DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severity,
448bool RendererOpenGL::Init() { 473bool RendererOpenGL::Init() {
449 render_window->MakeCurrent(); 474 render_window->MakeCurrent();
450 475
451 // TODO: Make frontends initialize this, so they can use gladLoadGLLoader with their own loaders
452 if (!gladLoadGL()) {
453 LOG_CRITICAL(Render_OpenGL, "Failed to initialize GL functions! Exiting...");
454 exit(-1);
455 }
456
457 if (GLAD_GL_KHR_debug) { 476 if (GLAD_GL_KHR_debug) {
458 glEnable(GL_DEBUG_OUTPUT); 477 glEnable(GL_DEBUG_OUTPUT);
459 glDebugMessageCallback(DebugHandler, nullptr); 478 glDebugMessageCallback(DebugHandler, nullptr);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index fe4d142a5..00e1044ab 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -8,13 +8,34 @@
8 8
9#include <glad/glad.h> 9#include <glad/glad.h>
10 10
11#include "common/common_types.h"
12#include "common/math_util.h"
13
11#include "core/hw/gpu.h" 14#include "core/hw/gpu.h"
12 15
13#include "video_core/renderer_base.h" 16#include "video_core/renderer_base.h"
17#include "video_core/renderer_opengl/gl_resource_manager.h"
14#include "video_core/renderer_opengl/gl_state.h" 18#include "video_core/renderer_opengl/gl_state.h"
15 19
16class EmuWindow; 20class EmuWindow;
17 21
22/// Structure used for storing information about the textures for each 3DS screen
23struct TextureInfo {
24 OGLTexture resource;
25 GLsizei width;
26 GLsizei height;
27 GPU::Regs::PixelFormat format;
28 GLenum gl_format;
29 GLenum gl_type;
30};
31
32/// Structure used for storing information about the display target for each 3DS screen
33struct ScreenInfo {
34 GLuint display_texture;
35 MathUtil::Rectangle<float> display_texcoords;
36 TextureInfo texture;
37};
38
18class RendererOpenGL : public RendererBase { 39class RendererOpenGL : public RendererBase {
19public: 40public:
20 41
@@ -37,26 +58,16 @@ public:
37 void ShutDown() override; 58 void ShutDown() override;
38 59
39private: 60private:
40 /// Structure used for storing information about the textures for each 3DS screen
41 struct TextureInfo {
42 GLuint handle;
43 GLsizei width;
44 GLsizei height;
45 GPU::Regs::PixelFormat format;
46 GLenum gl_format;
47 GLenum gl_type;
48 };
49
50 void InitOpenGLObjects(); 61 void InitOpenGLObjects();
51 void ConfigureFramebufferTexture(TextureInfo& texture, 62 void ConfigureFramebufferTexture(TextureInfo& texture,
52 const GPU::Regs::FramebufferConfig& framebuffer); 63 const GPU::Regs::FramebufferConfig& framebuffer);
53 void DrawScreens(); 64 void DrawScreens();
54 void DrawSingleScreenRotated(const TextureInfo& texture, float x, float y, float w, float h); 65 void DrawSingleScreenRotated(const ScreenInfo& screen_info, float x, float y, float w, float h);
55 void UpdateFramerate(); 66 void UpdateFramerate();
56 67
57 // Loads framebuffer from emulated memory into the active OpenGL texture. 68 // Loads framebuffer from emulated memory into the display information structure
58 void LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig& framebuffer, 69 void LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer,
59 const TextureInfo& texture); 70 ScreenInfo& screen_info);
60 // Fills active OpenGL texture with the given RGB color. 71 // Fills active OpenGL texture with the given RGB color.
61 void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, 72 void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b,
62 const TextureInfo& texture); 73 const TextureInfo& texture);
@@ -69,10 +80,10 @@ private:
69 OpenGLState state; 80 OpenGLState state;
70 81
71 // OpenGL object IDs 82 // OpenGL object IDs
72 GLuint vertex_array_handle; 83 OGLVertexArray vertex_array;
73 GLuint vertex_buffer_handle; 84 OGLBuffer vertex_buffer;
74 GLuint program_id; 85 OGLShader shader;
75 std::array<TextureInfo, 2> textures; ///< Textures for top and bottom screens respectively 86 std::array<ScreenInfo, 2> screen_infos; ///< Display information for top and bottom screens respectively
76 // Shader uniform location indices 87 // Shader uniform location indices
77 GLuint uniform_modelview_matrix; 88 GLuint uniform_modelview_matrix;
78 GLuint uniform_color_texture; 89 GLuint uniform_color_texture;
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index 78d295c76..161097610 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -2,118 +2,91 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <memory> 5#include <atomic>
6#include <cmath>
7#include <cstring>
6#include <unordered_map> 8#include <unordered_map>
9#include <utility>
7 10
8#include <boost/range/algorithm/fill.hpp> 11#include <boost/range/algorithm/fill.hpp>
9 12
13#include "common/bit_field.h"
10#include "common/hash.h" 14#include "common/hash.h"
15#include "common/logging/log.h"
11#include "common/microprofile.h" 16#include "common/microprofile.h"
12#include "common/profiler.h"
13 17
14#include "video_core/debug_utils/debug_utils.h"
15#include "video_core/pica.h" 18#include "video_core/pica.h"
16#include "video_core/pica_state.h" 19#include "video_core/pica_state.h"
17#include "video_core/video_core.h" 20#include "video_core/shader/shader.h"
18 21#include "video_core/shader/shader_interpreter.h"
19#include "shader.h"
20#include "shader_interpreter.h"
21 22
22#ifdef ARCHITECTURE_x86_64 23#ifdef ARCHITECTURE_x86_64
23#include "shader_jit_x64.h" 24#include "video_core/shader/shader_jit_x64.h"
24#endif // ARCHITECTURE_x86_64 25#endif // ARCHITECTURE_x86_64
25 26
27#include "video_core/video_core.h"
28
26namespace Pica { 29namespace Pica {
27 30
28namespace Shader { 31namespace Shader {
29 32
30#ifdef ARCHITECTURE_x86_64 33#ifdef ARCHITECTURE_x86_64
31static std::unordered_map<u64, CompiledShader*> shader_map; 34static std::unordered_map<u64, std::unique_ptr<JitShader>> shader_map;
32static JitCompiler jit; 35static const JitShader* jit_shader;
33static CompiledShader* jit_shader; 36#endif // ARCHITECTURE_x86_64
34 37
35static void ClearCache() { 38void ClearCache() {
39#ifdef ARCHITECTURE_x86_64
36 shader_map.clear(); 40 shader_map.clear();
37 jit.Clear();
38 LOG_INFO(HW_GPU, "Shader JIT cache cleared");
39}
40#endif // ARCHITECTURE_x86_64 41#endif // ARCHITECTURE_x86_64
42}
41 43
42void Setup(UnitState<false>& state) { 44void ShaderSetup::Setup() {
43#ifdef ARCHITECTURE_x86_64 45#ifdef ARCHITECTURE_x86_64
44 if (VideoCore::g_shader_jit_enabled) { 46 if (VideoCore::g_shader_jit_enabled) {
45 u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ 47 u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^
46 Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data)) ^ 48 Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data)));
47 g_state.regs.vs.main_offset);
48 49
49 auto iter = shader_map.find(cache_key); 50 auto iter = shader_map.find(cache_key);
50 if (iter != shader_map.end()) { 51 if (iter != shader_map.end()) {
51 jit_shader = iter->second; 52 jit_shader = iter->second.get();
52 } else { 53 } else {
53 // Check if remaining JIT code space is enough for at least one more (massive) shader 54 auto shader = std::make_unique<JitShader>();
54 if (jit.GetSpaceLeft() < jit_shader_size) { 55 shader->Compile();
55 // If not, clear the cache of all previously compiled shaders 56 jit_shader = shader.get();
56 ClearCache(); 57 shader_map[cache_key] = std::move(shader);
57 }
58
59 jit_shader = jit.Compile();
60 shader_map.emplace(cache_key, jit_shader);
61 } 58 }
62 } 59 }
63#endif // ARCHITECTURE_x86_64 60#endif // ARCHITECTURE_x86_64
64} 61}
65 62
66void Shutdown() { 63MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240));
67#ifdef ARCHITECTURE_x86_64
68 ClearCache();
69#endif // ARCHITECTURE_x86_64
70}
71
72static Common::Profiling::TimingCategory shader_category("Vertex Shader");
73MICROPROFILE_DEFINE(GPU_VertexShader, "GPU", "Vertex Shader", MP_RGB(50, 50, 240));
74 64
75OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes) { 65OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num_attributes) {
76 auto& config = g_state.regs.vs; 66 auto& config = g_state.regs.vs;
67 auto& setup = g_state.vs;
77 68
78 Common::Profiling::ScopeTimer timer(shader_category); 69 MICROPROFILE_SCOPE(GPU_Shader);
79 MICROPROFILE_SCOPE(GPU_VertexShader);
80 70
81 state.program_counter = config.main_offset;
82 state.debug.max_offset = 0; 71 state.debug.max_offset = 0;
83 state.debug.max_opdesc_id = 0; 72 state.debug.max_opdesc_id = 0;
84 73
85 // Setup input register table 74 // Setup input register table
86 const auto& attribute_register_map = config.input_register_map; 75 const auto& attribute_register_map = config.input_register_map;
87 76
88 // TODO: Instead of this cumbersome logic, just load the input data directly like 77 for (unsigned i = 0; i < num_attributes; i++)
89 // for (int attr = 0; attr < num_attributes; ++attr) { input_attr[0] = state.registers.input[attribute_register_map.attribute0_register]; } 78 state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i];
90 if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = input.attr[0];
91 if (num_attributes > 1) state.registers.input[attribute_register_map.attribute1_register] = input.attr[1];
92 if (num_attributes > 2) state.registers.input[attribute_register_map.attribute2_register] = input.attr[2];
93 if (num_attributes > 3) state.registers.input[attribute_register_map.attribute3_register] = input.attr[3];
94 if (num_attributes > 4) state.registers.input[attribute_register_map.attribute4_register] = input.attr[4];
95 if (num_attributes > 5) state.registers.input[attribute_register_map.attribute5_register] = input.attr[5];
96 if (num_attributes > 6) state.registers.input[attribute_register_map.attribute6_register] = input.attr[6];
97 if (num_attributes > 7) state.registers.input[attribute_register_map.attribute7_register] = input.attr[7];
98 if (num_attributes > 8) state.registers.input[attribute_register_map.attribute8_register] = input.attr[8];
99 if (num_attributes > 9) state.registers.input[attribute_register_map.attribute9_register] = input.attr[9];
100 if (num_attributes > 10) state.registers.input[attribute_register_map.attribute10_register] = input.attr[10];
101 if (num_attributes > 11) state.registers.input[attribute_register_map.attribute11_register] = input.attr[11];
102 if (num_attributes > 12) state.registers.input[attribute_register_map.attribute12_register] = input.attr[12];
103 if (num_attributes > 13) state.registers.input[attribute_register_map.attribute13_register] = input.attr[13];
104 if (num_attributes > 14) state.registers.input[attribute_register_map.attribute14_register] = input.attr[14];
105 if (num_attributes > 15) state.registers.input[attribute_register_map.attribute15_register] = input.attr[15];
106 79
107 state.conditional_code[0] = false; 80 state.conditional_code[0] = false;
108 state.conditional_code[1] = false; 81 state.conditional_code[1] = false;
109 82
110#ifdef ARCHITECTURE_x86_64 83#ifdef ARCHITECTURE_x86_64
111 if (VideoCore::g_shader_jit_enabled) 84 if (VideoCore::g_shader_jit_enabled)
112 jit_shader(&state.registers); 85 jit_shader->Run(setup, state, config.main_offset);
113 else 86 else
114 RunInterpreter(state); 87 RunInterpreter(setup, state, config.main_offset);
115#else 88#else
116 RunInterpreter(state); 89 RunInterpreter(setup, state, config.main_offset);
117#endif // ARCHITECTURE_x86_64 90#endif // ARCHITECTURE_x86_64
118 91
119 // Setup output data 92 // Setup output data
@@ -167,10 +140,9 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr
167 return ret; 140 return ret;
168} 141}
169 142
170DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) { 143DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) {
171 UnitState<true> state; 144 UnitState<true> state;
172 145
173 state.program_counter = config.main_offset;
174 state.debug.max_offset = 0; 146 state.debug.max_offset = 0;
175 state.debug.max_opdesc_id = 0; 147 state.debug.max_opdesc_id = 0;
176 148
@@ -179,27 +151,13 @@ DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, c
179 float24 dummy_register; 151 float24 dummy_register;
180 boost::fill(state.registers.input, &dummy_register); 152 boost::fill(state.registers.input, &dummy_register);
181 153
182 if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = &input.attr[0].x; 154 for (unsigned i = 0; i < num_attributes; i++)
183 if (num_attributes > 1) state.registers.input[attribute_register_map.attribute1_register] = &input.attr[1].x; 155 state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i];
184 if (num_attributes > 2) state.registers.input[attribute_register_map.attribute2_register] = &input.attr[2].x;
185 if (num_attributes > 3) state.registers.input[attribute_register_map.attribute3_register] = &input.attr[3].x;
186 if (num_attributes > 4) state.registers.input[attribute_register_map.attribute4_register] = &input.attr[4].x;
187 if (num_attributes > 5) state.registers.input[attribute_register_map.attribute5_register] = &input.attr[5].x;
188 if (num_attributes > 6) state.registers.input[attribute_register_map.attribute6_register] = &input.attr[6].x;
189 if (num_attributes > 7) state.registers.input[attribute_register_map.attribute7_register] = &input.attr[7].x;
190 if (num_attributes > 8) state.registers.input[attribute_register_map.attribute8_register] = &input.attr[8].x;
191 if (num_attributes > 9) state.registers.input[attribute_register_map.attribute9_register] = &input.attr[9].x;
192 if (num_attributes > 10) state.registers.input[attribute_register_map.attribute10_register] = &input.attr[10].x;
193 if (num_attributes > 11) state.registers.input[attribute_register_map.attribute11_register] = &input.attr[11].x;
194 if (num_attributes > 12) state.registers.input[attribute_register_map.attribute12_register] = &input.attr[12].x;
195 if (num_attributes > 13) state.registers.input[attribute_register_map.attribute13_register] = &input.attr[13].x;
196 if (num_attributes > 14) state.registers.input[attribute_register_map.attribute14_register] = &input.attr[14].x;
197 if (num_attributes > 15) state.registers.input[attribute_register_map.attribute15_register] = &input.attr[15].x;
198 156
199 state.conditional_code[0] = false; 157 state.conditional_code[0] = false;
200 state.conditional_code[1] = false; 158 state.conditional_code[1] = false;
201 159
202 RunInterpreter(state); 160 RunInterpreter(setup, state, config.main_offset);
203 return state.debug; 161 return state.debug;
204} 162}
205 163
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index 7af8f1fa1..84898f21c 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -4,17 +4,23 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <cstddef>
9#include <memory>
10#include <type_traits>
7#include <vector> 11#include <vector>
8 12
9#include <boost/container/static_vector.hpp> 13#include <boost/container/static_vector.hpp>
10 14
11#include <nihstro/shader_binary.h> 15#include <nihstro/shader_bytecode.h>
12 16
17#include "common/assert.h"
13#include "common/common_funcs.h" 18#include "common/common_funcs.h"
14#include "common/common_types.h" 19#include "common/common_types.h"
15#include "common/vector_math.h" 20#include "common/vector_math.h"
16 21
17#include "video_core/pica.h" 22#include "video_core/pica.h"
23#include "video_core/pica_types.h"
18 24
19using nihstro::RegisterType; 25using nihstro::RegisterType;
20using nihstro::SourceRegister; 26using nihstro::SourceRegister;
@@ -25,7 +31,7 @@ namespace Pica {
25namespace Shader { 31namespace Shader {
26 32
27struct InputVertex { 33struct InputVertex {
28 Math::Vec4<float24> attr[16]; 34 alignas(16) Math::Vec4<float24> attr[16];
29}; 35};
30 36
31struct OutputVertex { 37struct OutputVertex {
@@ -37,7 +43,8 @@ struct OutputVertex {
37 Math::Vec4<float24> color; 43 Math::Vec4<float24> color;
38 Math::Vec2<float24> tc0; 44 Math::Vec2<float24> tc0;
39 Math::Vec2<float24> tc1; 45 Math::Vec2<float24> tc1;
40 INSERT_PADDING_WORDS(2); 46 float24 tc0_w;
47 INSERT_PADDING_WORDS(1);
41 Math::Vec3<float24> view; 48 Math::Vec3<float24> view;
42 INSERT_PADDING_WORDS(1); 49 INSERT_PADDING_WORDS(1);
43 Math::Vec2<float24> tc2; 50 Math::Vec2<float24> tc2;
@@ -77,23 +84,6 @@ struct OutputVertex {
77static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); 84static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD");
78static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); 85static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size");
79 86
80/// Vertex shader memory
81struct ShaderSetup {
82 struct {
83 // The float uniforms are accessed by the shader JIT using SSE instructions, and are
84 // therefore required to be 16-byte aligned.
85 alignas(16) Math::Vec4<float24> f[96];
86
87 std::array<bool, 16> b;
88 std::array<Math::Vec4<u8>, 4> i;
89 } uniforms;
90
91 Math::Vec4<float24> default_attributes[16];
92
93 std::array<u32, 1024> program_code;
94 std::array<u32, 1024> swizzle_data;
95};
96
97// Helper structure used to keep track of data useful for inspection of shader emulation 87// Helper structure used to keep track of data useful for inspection of shader emulation
98template<bool full_debugging> 88template<bool full_debugging>
99struct DebugData; 89struct DebugData;
@@ -282,38 +272,21 @@ struct UnitState {
282 } registers; 272 } registers;
283 static_assert(std::is_pod<Registers>::value, "Structure is not POD"); 273 static_assert(std::is_pod<Registers>::value, "Structure is not POD");
284 274
285 u32 program_counter;
286 bool conditional_code[2]; 275 bool conditional_code[2];
287 276
288 // Two Address registers and one loop counter 277 // Two Address registers and one loop counter
289 // TODO: How many bits do these actually have? 278 // TODO: How many bits do these actually have?
290 s32 address_registers[3]; 279 s32 address_registers[3];
291 280
292 enum {
293 INVALID_ADDRESS = 0xFFFFFFFF
294 };
295
296 struct CallStackElement {
297 u32 final_address; // Address upon which we jump to return_address
298 u32 return_address; // Where to jump when leaving scope
299 u8 repeat_counter; // How often to repeat until this call stack element is removed
300 u8 loop_increment; // Which value to add to the loop counter after an iteration
301 // TODO: Should this be a signed value? Does it even matter?
302 u32 loop_address; // The address where we'll return to after each loop iteration
303 };
304
305 // TODO: Is there a maximal size for this?
306 boost::container::static_vector<CallStackElement, 16> call_stack;
307
308 DebugData<Debug> debug; 281 DebugData<Debug> debug;
309 282
310 static size_t InputOffset(const SourceRegister& reg) { 283 static size_t InputOffset(const SourceRegister& reg) {
311 switch (reg.GetRegisterType()) { 284 switch (reg.GetRegisterType()) {
312 case RegisterType::Input: 285 case RegisterType::Input:
313 return offsetof(UnitState::Registers, input) + reg.GetIndex()*sizeof(Math::Vec4<float24>); 286 return offsetof(UnitState, registers.input) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
314 287
315 case RegisterType::Temporary: 288 case RegisterType::Temporary:
316 return offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); 289 return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
317 290
318 default: 291 default:
319 UNREACHABLE(); 292 UNREACHABLE();
@@ -324,10 +297,10 @@ struct UnitState {
324 static size_t OutputOffset(const DestRegister& reg) { 297 static size_t OutputOffset(const DestRegister& reg) {
325 switch (reg.GetRegisterType()) { 298 switch (reg.GetRegisterType()) {
326 case RegisterType::Output: 299 case RegisterType::Output:
327 return offsetof(UnitState::Registers, output) + reg.GetIndex()*sizeof(Math::Vec4<float24>); 300 return offsetof(UnitState, registers.output) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
328 301
329 case RegisterType::Temporary: 302 case RegisterType::Temporary:
330 return offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); 303 return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
331 304
332 default: 305 default:
333 UNREACHABLE(); 306 UNREACHABLE();
@@ -336,34 +309,66 @@ struct UnitState {
336 } 309 }
337}; 310};
338 311
339/** 312/// Clears the shader cache
340 * Performs any shader unit setup that only needs to happen once per shader (as opposed to once per 313void ClearCache();
341 * vertex, which would happen within the `Run` function).
342 * @param state Shader unit state, must be setup per shader and per shader unit
343 */
344void Setup(UnitState<false>& state);
345 314
346/// Performs any cleanup when the emulator is shutdown 315struct ShaderSetup {
347void Shutdown();
348 316
349/** 317 struct {
350 * Runs the currently setup shader 318 // The float uniforms are accessed by the shader JIT using SSE instructions, and are
351 * @param state Shader unit state, must be setup per shader and per shader unit 319 // therefore required to be 16-byte aligned.
352 * @param input Input vertex into the shader 320 alignas(16) Math::Vec4<float24> f[96];
353 * @param num_attributes The number of vertex shader attributes
354 * @return The output vertex, after having been processed by the vertex shader
355 */
356OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes);
357 321
358/** 322 std::array<bool, 16> b;
359 * Produce debug information based on the given shader and input vertex 323 std::array<Math::Vec4<u8>, 4> i;
360 * @param input Input vertex into the shader 324 } uniforms;
361 * @param num_attributes The number of vertex shader attributes 325
362 * @param config Configuration object for the shader pipeline 326 static size_t UniformOffset(RegisterType type, unsigned index) {
363 * @param setup Setup object for the shader pipeline 327 switch (type) {
364 * @return Debug information for this shader with regards to the given vertex 328 case RegisterType::FloatUniform:
365 */ 329 return offsetof(ShaderSetup, uniforms.f) + index*sizeof(Math::Vec4<float24>);
366DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup); 330
331 case RegisterType::BoolUniform:
332 return offsetof(ShaderSetup, uniforms.b) + index*sizeof(bool);
333
334 case RegisterType::IntUniform:
335 return offsetof(ShaderSetup, uniforms.i) + index*sizeof(Math::Vec4<u8>);
336
337 default:
338 UNREACHABLE();
339 return 0;
340 }
341 }
342
343 std::array<u32, 1024> program_code;
344 std::array<u32, 1024> swizzle_data;
345
346 /**
347 * Performs any shader unit setup that only needs to happen once per shader (as opposed to once per
348 * vertex, which would happen within the `Run` function).
349 */
350 void Setup();
351
352 /**
353 * Runs the currently setup shader
354 * @param state Shader unit state, must be setup per shader and per shader unit
355 * @param input Input vertex into the shader
356 * @param num_attributes The number of vertex shader attributes
357 * @return The output vertex, after having been processed by the vertex shader
358 */
359 OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes);
360
361 /**
362 * Produce debug information based on the given shader and input vertex
363 * @param input Input vertex into the shader
364 * @param num_attributes The number of vertex shader attributes
365 * @param config Configuration object for the shader pipeline
366 * @param setup Setup object for the shader pipeline
367 * @return Debug information for this shader with regards to the given vertex
368 */
369 DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup);
370
371};
367 372
368} // namespace Shader 373} // namespace Shader
369 374
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp
index 9b978583e..714e8bfd5 100644
--- a/src/video_core/shader/shader_interpreter.cpp
+++ b/src/video_core/shader/shader_interpreter.cpp
@@ -2,12 +2,20 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
6#include <array>
7#include <cmath>
5#include <numeric> 8#include <numeric>
9
6#include <nihstro/shader_bytecode.h> 10#include <nihstro/shader_bytecode.h>
7 11
8#include "common/file_util.h" 12#include "common/assert.h"
9#include "video_core/pica.h" 13#include "common/common_types.h"
14#include "common/logging/log.h"
15#include "common/vector_math.h"
16
10#include "video_core/pica_state.h" 17#include "video_core/pica_state.h"
18#include "video_core/pica_types.h"
11#include "video_core/shader/shader.h" 19#include "video_core/shader/shader.h"
12#include "video_core/shader/shader_interpreter.h" 20#include "video_core/shader/shader_interpreter.h"
13 21
@@ -21,8 +29,24 @@ namespace Pica {
21 29
22namespace Shader { 30namespace Shader {
23 31
32constexpr u32 INVALID_ADDRESS = 0xFFFFFFFF;
33
34struct CallStackElement {
35 u32 final_address; // Address upon which we jump to return_address
36 u32 return_address; // Where to jump when leaving scope
37 u8 repeat_counter; // How often to repeat until this call stack element is removed
38 u8 loop_increment; // Which value to add to the loop counter after an iteration
39 // TODO: Should this be a signed value? Does it even matter?
40 u32 loop_address; // The address where we'll return to after each loop iteration
41};
42
24template<bool Debug> 43template<bool Debug>
25void RunInterpreter(UnitState<Debug>& state) { 44void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset) {
45 // TODO: Is there a maximal size for this?
46 boost::container::static_vector<CallStackElement, 16> call_stack;
47
48 u32 program_counter = offset;
49
26 const auto& uniforms = g_state.vs.uniforms; 50 const auto& uniforms = g_state.vs.uniforms;
27 const auto& swizzle_data = g_state.vs.swizzle_data; 51 const auto& swizzle_data = g_state.vs.swizzle_data;
28 const auto& program_code = g_state.vs.program_code; 52 const auto& program_code = g_state.vs.program_code;
@@ -33,16 +57,16 @@ void RunInterpreter(UnitState<Debug>& state) {
33 unsigned iteration = 0; 57 unsigned iteration = 0;
34 bool exit_loop = false; 58 bool exit_loop = false;
35 while (!exit_loop) { 59 while (!exit_loop) {
36 if (!state.call_stack.empty()) { 60 if (!call_stack.empty()) {
37 auto& top = state.call_stack.back(); 61 auto& top = call_stack.back();
38 if (state.program_counter == top.final_address) { 62 if (program_counter == top.final_address) {
39 state.address_registers[2] += top.loop_increment; 63 state.address_registers[2] += top.loop_increment;
40 64
41 if (top.repeat_counter-- == 0) { 65 if (top.repeat_counter-- == 0) {
42 state.program_counter = top.return_address; 66 program_counter = top.return_address;
43 state.call_stack.pop_back(); 67 call_stack.pop_back();
44 } else { 68 } else {
45 state.program_counter = top.loop_address; 69 program_counter = top.loop_address;
46 } 70 }
47 71
48 // TODO: Is "trying again" accurate to hardware? 72 // TODO: Is "trying again" accurate to hardware?
@@ -50,20 +74,20 @@ void RunInterpreter(UnitState<Debug>& state) {
50 } 74 }
51 } 75 }
52 76
53 const Instruction instr = { program_code[state.program_counter] }; 77 const Instruction instr = { program_code[program_counter] };
54 const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] }; 78 const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] };
55 79
56 static auto call = [](UnitState<Debug>& state, u32 offset, u32 num_instructions, 80 static auto call = [&program_counter, &call_stack](UnitState<Debug>& state, u32 offset, u32 num_instructions,
57 u32 return_offset, u8 repeat_count, u8 loop_increment) { 81 u32 return_offset, u8 repeat_count, u8 loop_increment) {
58 state.program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset 82 program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset
59 ASSERT(state.call_stack.size() < state.call_stack.capacity()); 83 ASSERT(call_stack.size() < call_stack.capacity());
60 state.call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); 84 call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset });
61 }; 85 };
62 Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, state.program_counter); 86 Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, program_counter);
63 if (iteration > 0) 87 if (iteration > 0)
64 Record<DebugDataRecord::NEXT_INSTR>(state.debug, iteration - 1, state.program_counter); 88 Record<DebugDataRecord::NEXT_INSTR>(state.debug, iteration - 1, program_counter);
65 89
66 state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + state.program_counter); 90 state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + program_counter);
67 91
68 auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { 92 auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* {
69 switch (source_reg.GetRegisterType()) { 93 switch (source_reg.GetRegisterType()) {
@@ -511,7 +535,7 @@ void RunInterpreter(UnitState<Debug>& state) {
511 case OpCode::Id::JMPC: 535 case OpCode::Id::JMPC:
512 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); 536 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code);
513 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { 537 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) {
514 state.program_counter = instr.flow_control.dest_offset - 1; 538 program_counter = instr.flow_control.dest_offset - 1;
515 } 539 }
516 break; 540 break;
517 541
@@ -519,7 +543,7 @@ void RunInterpreter(UnitState<Debug>& state) {
519 Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); 543 Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
520 544
521 if (uniforms.b[instr.flow_control.bool_uniform_id] == !(instr.flow_control.num_instructions & 1)) { 545 if (uniforms.b[instr.flow_control.bool_uniform_id] == !(instr.flow_control.num_instructions & 1)) {
522 state.program_counter = instr.flow_control.dest_offset - 1; 546 program_counter = instr.flow_control.dest_offset - 1;
523 } 547 }
524 break; 548 break;
525 549
@@ -527,7 +551,7 @@ void RunInterpreter(UnitState<Debug>& state) {
527 call(state, 551 call(state,
528 instr.flow_control.dest_offset, 552 instr.flow_control.dest_offset,
529 instr.flow_control.num_instructions, 553 instr.flow_control.num_instructions,
530 state.program_counter + 1, 0, 0); 554 program_counter + 1, 0, 0);
531 break; 555 break;
532 556
533 case OpCode::Id::CALLU: 557 case OpCode::Id::CALLU:
@@ -536,7 +560,7 @@ void RunInterpreter(UnitState<Debug>& state) {
536 call(state, 560 call(state,
537 instr.flow_control.dest_offset, 561 instr.flow_control.dest_offset,
538 instr.flow_control.num_instructions, 562 instr.flow_control.num_instructions,
539 state.program_counter + 1, 0, 0); 563 program_counter + 1, 0, 0);
540 } 564 }
541 break; 565 break;
542 566
@@ -546,7 +570,7 @@ void RunInterpreter(UnitState<Debug>& state) {
546 call(state, 570 call(state,
547 instr.flow_control.dest_offset, 571 instr.flow_control.dest_offset,
548 instr.flow_control.num_instructions, 572 instr.flow_control.num_instructions,
549 state.program_counter + 1, 0, 0); 573 program_counter + 1, 0, 0);
550 } 574 }
551 break; 575 break;
552 576
@@ -557,8 +581,8 @@ void RunInterpreter(UnitState<Debug>& state) {
557 Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); 581 Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
558 if (uniforms.b[instr.flow_control.bool_uniform_id]) { 582 if (uniforms.b[instr.flow_control.bool_uniform_id]) {
559 call(state, 583 call(state,
560 state.program_counter + 1, 584 program_counter + 1,
561 instr.flow_control.dest_offset - state.program_counter - 1, 585 instr.flow_control.dest_offset - program_counter - 1,
562 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); 586 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0);
563 } else { 587 } else {
564 call(state, 588 call(state,
@@ -576,8 +600,8 @@ void RunInterpreter(UnitState<Debug>& state) {
576 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); 600 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code);
577 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { 601 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) {
578 call(state, 602 call(state,
579 state.program_counter + 1, 603 program_counter + 1,
580 instr.flow_control.dest_offset - state.program_counter - 1, 604 instr.flow_control.dest_offset - program_counter - 1,
581 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); 605 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0);
582 } else { 606 } else {
583 call(state, 607 call(state,
@@ -599,8 +623,8 @@ void RunInterpreter(UnitState<Debug>& state) {
599 623
600 Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param); 624 Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param);
601 call(state, 625 call(state,
602 state.program_counter + 1, 626 program_counter + 1,
603 instr.flow_control.dest_offset - state.program_counter + 1, 627 instr.flow_control.dest_offset - program_counter + 1,
604 instr.flow_control.dest_offset + 1, 628 instr.flow_control.dest_offset + 1,
605 loop_param.x, 629 loop_param.x,
606 loop_param.z); 630 loop_param.z);
@@ -617,14 +641,14 @@ void RunInterpreter(UnitState<Debug>& state) {
617 } 641 }
618 } 642 }
619 643
620 ++state.program_counter; 644 ++program_counter;
621 ++iteration; 645 ++iteration;
622 } 646 }
623} 647}
624 648
625// Explicit instantiation 649// Explicit instantiation
626template void RunInterpreter(UnitState<false>& state); 650template void RunInterpreter(const ShaderSetup& setup, UnitState<false>& state, unsigned offset);
627template void RunInterpreter(UnitState<true>& state); 651template void RunInterpreter(const ShaderSetup& setup, UnitState<true>& state, unsigned offset);
628 652
629} // namespace 653} // namespace
630 654
diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h
index 294bca50e..bb3ce1c6e 100644
--- a/src/video_core/shader/shader_interpreter.h
+++ b/src/video_core/shader/shader_interpreter.h
@@ -4,14 +4,14 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "video_core/shader/shader.h"
8
9namespace Pica { 7namespace Pica {
10 8
11namespace Shader { 9namespace Shader {
12 10
11template <bool Debug> struct UnitState;
12
13template<bool Debug> 13template<bool Debug>
14void RunInterpreter(UnitState<Debug>& state); 14void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset);
15 15
16} // namespace 16} // namespace
17 17
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index dffe051ef..43e7e6b4c 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -2,8 +2,16 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <smmintrin.h> 5#include <algorithm>
6#include <cmath>
7#include <cstdint>
8#include <xmmintrin.h>
6 9
10#include <nihstro/shader_bytecode.h>
11
12#include "common/assert.h"
13#include "common/logging/log.h"
14#include "common/vector_math.h"
7#include "common/x64/abi.h" 15#include "common/x64/abi.h"
8#include "common/x64/cpu_detect.h" 16#include "common/x64/cpu_detect.h"
9#include "common/x64/emitter.h" 17#include "common/x64/emitter.h"
@@ -12,6 +20,7 @@
12#include "shader_jit_x64.h" 20#include "shader_jit_x64.h"
13 21
14#include "video_core/pica_state.h" 22#include "video_core/pica_state.h"
23#include "video_core/pica_types.h"
15 24
16namespace Pica { 25namespace Pica {
17 26
@@ -19,73 +28,73 @@ namespace Shader {
19 28
20using namespace Gen; 29using namespace Gen;
21 30
22typedef void (JitCompiler::*JitFunction)(Instruction instr); 31typedef void (JitShader::*JitFunction)(Instruction instr);
23 32
24const JitFunction instr_table[64] = { 33const JitFunction instr_table[64] = {
25 &JitCompiler::Compile_ADD, // add 34 &JitShader::Compile_ADD, // add
26 &JitCompiler::Compile_DP3, // dp3 35 &JitShader::Compile_DP3, // dp3
27 &JitCompiler::Compile_DP4, // dp4 36 &JitShader::Compile_DP4, // dp4
28 &JitCompiler::Compile_DPH, // dph 37 &JitShader::Compile_DPH, // dph
29 nullptr, // unknown 38 nullptr, // unknown
30 &JitCompiler::Compile_EX2, // ex2 39 &JitShader::Compile_EX2, // ex2
31 &JitCompiler::Compile_LG2, // lg2 40 &JitShader::Compile_LG2, // lg2
32 nullptr, // unknown 41 nullptr, // unknown
33 &JitCompiler::Compile_MUL, // mul 42 &JitShader::Compile_MUL, // mul
34 &JitCompiler::Compile_SGE, // sge 43 &JitShader::Compile_SGE, // sge
35 &JitCompiler::Compile_SLT, // slt 44 &JitShader::Compile_SLT, // slt
36 &JitCompiler::Compile_FLR, // flr 45 &JitShader::Compile_FLR, // flr
37 &JitCompiler::Compile_MAX, // max 46 &JitShader::Compile_MAX, // max
38 &JitCompiler::Compile_MIN, // min 47 &JitShader::Compile_MIN, // min
39 &JitCompiler::Compile_RCP, // rcp 48 &JitShader::Compile_RCP, // rcp
40 &JitCompiler::Compile_RSQ, // rsq 49 &JitShader::Compile_RSQ, // rsq
41 nullptr, // unknown 50 nullptr, // unknown
42 nullptr, // unknown 51 nullptr, // unknown
43 &JitCompiler::Compile_MOVA, // mova 52 &JitShader::Compile_MOVA, // mova
44 &JitCompiler::Compile_MOV, // mov 53 &JitShader::Compile_MOV, // mov
45 nullptr, // unknown 54 nullptr, // unknown
46 nullptr, // unknown 55 nullptr, // unknown
47 nullptr, // unknown 56 nullptr, // unknown
48 nullptr, // unknown 57 nullptr, // unknown
49 &JitCompiler::Compile_DPH, // dphi 58 &JitShader::Compile_DPH, // dphi
50 nullptr, // unknown 59 nullptr, // unknown
51 &JitCompiler::Compile_SGE, // sgei 60 &JitShader::Compile_SGE, // sgei
52 &JitCompiler::Compile_SLT, // slti 61 &JitShader::Compile_SLT, // slti
53 nullptr, // unknown 62 nullptr, // unknown
54 nullptr, // unknown 63 nullptr, // unknown
55 nullptr, // unknown 64 nullptr, // unknown
56 nullptr, // unknown 65 nullptr, // unknown
57 nullptr, // unknown 66 nullptr, // unknown
58 &JitCompiler::Compile_NOP, // nop 67 &JitShader::Compile_NOP, // nop
59 &JitCompiler::Compile_END, // end 68 &JitShader::Compile_END, // end
60 nullptr, // break 69 nullptr, // break
61 &JitCompiler::Compile_CALL, // call 70 &JitShader::Compile_CALL, // call
62 &JitCompiler::Compile_CALLC, // callc 71 &JitShader::Compile_CALLC, // callc
63 &JitCompiler::Compile_CALLU, // callu 72 &JitShader::Compile_CALLU, // callu
64 &JitCompiler::Compile_IF, // ifu 73 &JitShader::Compile_IF, // ifu
65 &JitCompiler::Compile_IF, // ifc 74 &JitShader::Compile_IF, // ifc
66 &JitCompiler::Compile_LOOP, // loop 75 &JitShader::Compile_LOOP, // loop
67 nullptr, // emit 76 nullptr, // emit
68 nullptr, // sete 77 nullptr, // sete
69 &JitCompiler::Compile_JMP, // jmpc 78 &JitShader::Compile_JMP, // jmpc
70 &JitCompiler::Compile_JMP, // jmpu 79 &JitShader::Compile_JMP, // jmpu
71 &JitCompiler::Compile_CMP, // cmp 80 &JitShader::Compile_CMP, // cmp
72 &JitCompiler::Compile_CMP, // cmp 81 &JitShader::Compile_CMP, // cmp
73 &JitCompiler::Compile_MAD, // madi 82 &JitShader::Compile_MAD, // madi
74 &JitCompiler::Compile_MAD, // madi 83 &JitShader::Compile_MAD, // madi
75 &JitCompiler::Compile_MAD, // madi 84 &JitShader::Compile_MAD, // madi
76 &JitCompiler::Compile_MAD, // madi 85 &JitShader::Compile_MAD, // madi
77 &JitCompiler::Compile_MAD, // madi 86 &JitShader::Compile_MAD, // madi
78 &JitCompiler::Compile_MAD, // madi 87 &JitShader::Compile_MAD, // madi
79 &JitCompiler::Compile_MAD, // madi 88 &JitShader::Compile_MAD, // madi
80 &JitCompiler::Compile_MAD, // madi 89 &JitShader::Compile_MAD, // madi
81 &JitCompiler::Compile_MAD, // mad 90 &JitShader::Compile_MAD, // mad
82 &JitCompiler::Compile_MAD, // mad 91 &JitShader::Compile_MAD, // mad
83 &JitCompiler::Compile_MAD, // mad 92 &JitShader::Compile_MAD, // mad
84 &JitCompiler::Compile_MAD, // mad 93 &JitShader::Compile_MAD, // mad
85 &JitCompiler::Compile_MAD, // mad 94 &JitShader::Compile_MAD, // mad
86 &JitCompiler::Compile_MAD, // mad 95 &JitShader::Compile_MAD, // mad
87 &JitCompiler::Compile_MAD, // mad 96 &JitShader::Compile_MAD, // mad
88 &JitCompiler::Compile_MAD, // mad 97 &JitShader::Compile_MAD, // mad
89}; 98};
90 99
91// The following is used to alias some commonly used registers. Generally, RAX-RDX and XMM0-XMM3 can 100// The following is used to alias some commonly used registers. Generally, RAX-RDX and XMM0-XMM3 can
@@ -93,7 +102,7 @@ const JitFunction instr_table[64] = {
93// purposes, as documented below: 102// purposes, as documented below:
94 103
95/// Pointer to the uniform memory 104/// Pointer to the uniform memory
96static const X64Reg UNIFORMS = R9; 105static const X64Reg SETUP = R9;
97/// The two 32-bit VS address offset registers set by the MOVA instruction 106/// The two 32-bit VS address offset registers set by the MOVA instruction
98static const X64Reg ADDROFFS_REG_0 = R10; 107static const X64Reg ADDROFFS_REG_0 = R10;
99static const X64Reg ADDROFFS_REG_1 = R11; 108static const X64Reg ADDROFFS_REG_1 = R11;
@@ -108,7 +117,7 @@ static const X64Reg COND0 = R13;
108/// Result of the previous CMP instruction for the Y-component comparison 117/// Result of the previous CMP instruction for the Y-component comparison
109static const X64Reg COND1 = R14; 118static const X64Reg COND1 = R14;
110/// Pointer to the UnitState instance for the current VS unit 119/// Pointer to the UnitState instance for the current VS unit
111static const X64Reg REGISTERS = R15; 120static const X64Reg STATE = R15;
112/// SIMD scratch register 121/// SIMD scratch register
113static const X64Reg SCRATCH = XMM0; 122static const X64Reg SCRATCH = XMM0;
114/// Loaded with the first swizzled source register, otherwise can be used as a scratch register 123/// Loaded with the first swizzled source register, otherwise can be used as a scratch register
@@ -127,7 +136,7 @@ static const X64Reg NEGBIT = XMM15;
127// State registers that must not be modified by external functions calls 136// State registers that must not be modified by external functions calls
128// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed 137// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
129static const BitSet32 persistent_regs = { 138static const BitSet32 persistent_regs = {
130 UNIFORMS, REGISTERS, // Pointers to register blocks 139 SETUP, STATE, // Pointers to register blocks
131 ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers 140 ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers
132 ONE+16, NEGBIT+16, // Constants 141 ONE+16, NEGBIT+16, // Constants
133}; 142};
@@ -138,21 +147,40 @@ static const u8 NO_SRC_REG_SWIZZLE = 0x1b;
138static const u8 NO_DEST_REG_MASK = 0xf; 147static const u8 NO_DEST_REG_MASK = 0xf;
139 148
140/** 149/**
150 * Get the vertex shader instruction for a given offset in the current shader program
151 * @param offset Offset in the current shader program of the instruction
152 * @return Instruction at the specified offset
153 */
154static Instruction GetVertexShaderInstruction(size_t offset) {
155 return { g_state.vs.program_code[offset] };
156}
157
158static void LogCritical(const char* msg) {
159 LOG_CRITICAL(HW_GPU, "%s", msg);
160}
161
162void JitShader::Compile_Assert(bool condition, const char* msg) {
163 if (!condition) {
164 ABI_CallFunctionP(reinterpret_cast<const void*>(LogCritical), const_cast<char*>(msg));
165 }
166}
167
168/**
141 * Loads and swizzles a source register into the specified XMM register. 169 * Loads and swizzles a source register into the specified XMM register.
142 * @param instr VS instruction, used for determining how to load the source register 170 * @param instr VS instruction, used for determining how to load the source register
143 * @param src_num Number indicating which source register to load (1 = src1, 2 = src2, 3 = src3) 171 * @param src_num Number indicating which source register to load (1 = src1, 2 = src2, 3 = src3)
144 * @param src_reg SourceRegister object corresponding to the source register to load 172 * @param src_reg SourceRegister object corresponding to the source register to load
145 * @param dest Destination XMM register to store the loaded, swizzled source register 173 * @param dest Destination XMM register to store the loaded, swizzled source register
146 */ 174 */
147void JitCompiler::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, X64Reg dest) { 175void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, X64Reg dest) {
148 X64Reg src_ptr; 176 X64Reg src_ptr;
149 size_t src_offset; 177 size_t src_offset;
150 178
151 if (src_reg.GetRegisterType() == RegisterType::FloatUniform) { 179 if (src_reg.GetRegisterType() == RegisterType::FloatUniform) {
152 src_ptr = UNIFORMS; 180 src_ptr = SETUP;
153 src_offset = src_reg.GetIndex() * sizeof(float24) * 4; 181 src_offset = ShaderSetup::UniformOffset(RegisterType::FloatUniform, src_reg.GetIndex());
154 } else { 182 } else {
155 src_ptr = REGISTERS; 183 src_ptr = STATE;
156 src_offset = UnitState<false>::InputOffset(src_reg); 184 src_offset = UnitState<false>::InputOffset(src_reg);
157 } 185 }
158 186
@@ -216,7 +244,7 @@ void JitCompiler::Compile_SwizzleSrc(Instruction instr, unsigned src_num, Source
216 } 244 }
217} 245}
218 246
219void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) { 247void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) {
220 DestRegister dest; 248 DestRegister dest;
221 unsigned operand_desc_id; 249 unsigned operand_desc_id;
222 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD || 250 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD ||
@@ -236,11 +264,11 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) {
236 // If all components are enabled, write the result to the destination register 264 // If all components are enabled, write the result to the destination register
237 if (swiz.dest_mask == NO_DEST_REG_MASK) { 265 if (swiz.dest_mask == NO_DEST_REG_MASK) {
238 // Store dest back to memory 266 // Store dest back to memory
239 MOVAPS(MDisp(REGISTERS, dest_offset_disp), src); 267 MOVAPS(MDisp(STATE, dest_offset_disp), src);
240 268
241 } else { 269 } else {
242 // Not all components are enabled, so mask the result when storing to the destination register... 270 // Not all components are enabled, so mask the result when storing to the destination register...
243 MOVAPS(SCRATCH, MDisp(REGISTERS, dest_offset_disp)); 271 MOVAPS(SCRATCH, MDisp(STATE, dest_offset_disp));
244 272
245 if (Common::GetCPUCaps().sse4_1) { 273 if (Common::GetCPUCaps().sse4_1) {
246 u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); 274 u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1);
@@ -259,11 +287,11 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) {
259 } 287 }
260 288
261 // Store dest back to memory 289 // Store dest back to memory
262 MOVAPS(MDisp(REGISTERS, dest_offset_disp), SCRATCH); 290 MOVAPS(MDisp(STATE, dest_offset_disp), SCRATCH);
263 } 291 }
264} 292}
265 293
266void JitCompiler::Compile_SanitizedMul(Gen::X64Reg src1, Gen::X64Reg src2, Gen::X64Reg scratch) { 294void JitShader::Compile_SanitizedMul(Gen::X64Reg src1, Gen::X64Reg src2, Gen::X64Reg scratch) {
267 MOVAPS(scratch, R(src1)); 295 MOVAPS(scratch, R(src1));
268 CMPPS(scratch, R(src2), CMP_ORD); 296 CMPPS(scratch, R(src2), CMP_ORD);
269 297
@@ -276,7 +304,7 @@ void JitCompiler::Compile_SanitizedMul(Gen::X64Reg src1, Gen::X64Reg src2, Gen::
276 ANDPS(src1, R(scratch)); 304 ANDPS(src1, R(scratch));
277} 305}
278 306
279void JitCompiler::Compile_EvaluateCondition(Instruction instr) { 307void JitShader::Compile_EvaluateCondition(Instruction instr) {
280 // Note: NXOR is used below to check for equality 308 // Note: NXOR is used below to check for equality
281 switch (instr.flow_control.op) { 309 switch (instr.flow_control.op) {
282 case Instruction::FlowControlType::Or: 310 case Instruction::FlowControlType::Or:
@@ -307,23 +335,23 @@ void JitCompiler::Compile_EvaluateCondition(Instruction instr) {
307 } 335 }
308} 336}
309 337
310void JitCompiler::Compile_UniformCondition(Instruction instr) { 338void JitShader::Compile_UniformCondition(Instruction instr) {
311 int offset = offsetof(decltype(g_state.vs.uniforms), b) + (instr.flow_control.bool_uniform_id * sizeof(bool)); 339 int offset = ShaderSetup::UniformOffset(RegisterType::BoolUniform, instr.flow_control.bool_uniform_id);
312 CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0)); 340 CMP(sizeof(bool) * 8, MDisp(SETUP, offset), Imm8(0));
313} 341}
314 342
315BitSet32 JitCompiler::PersistentCallerSavedRegs() { 343BitSet32 JitShader::PersistentCallerSavedRegs() {
316 return persistent_regs & ABI_ALL_CALLER_SAVED; 344 return persistent_regs & ABI_ALL_CALLER_SAVED;
317} 345}
318 346
319void JitCompiler::Compile_ADD(Instruction instr) { 347void JitShader::Compile_ADD(Instruction instr) {
320 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 348 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
321 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); 349 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
322 ADDPS(SRC1, R(SRC2)); 350 ADDPS(SRC1, R(SRC2));
323 Compile_DestEnable(instr, SRC1); 351 Compile_DestEnable(instr, SRC1);
324} 352}
325 353
326void JitCompiler::Compile_DP3(Instruction instr) { 354void JitShader::Compile_DP3(Instruction instr) {
327 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 355 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
328 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); 356 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
329 357
@@ -342,7 +370,7 @@ void JitCompiler::Compile_DP3(Instruction instr) {
342 Compile_DestEnable(instr, SRC1); 370 Compile_DestEnable(instr, SRC1);
343} 371}
344 372
345void JitCompiler::Compile_DP4(Instruction instr) { 373void JitShader::Compile_DP4(Instruction instr) {
346 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 374 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
347 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); 375 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
348 376
@@ -359,7 +387,7 @@ void JitCompiler::Compile_DP4(Instruction instr) {
359 Compile_DestEnable(instr, SRC1); 387 Compile_DestEnable(instr, SRC1);
360} 388}
361 389
362void JitCompiler::Compile_DPH(Instruction instr) { 390void JitShader::Compile_DPH(Instruction instr) {
363 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::DPHI) { 391 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::DPHI) {
364 Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); 392 Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
365 Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); 393 Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
@@ -391,7 +419,7 @@ void JitCompiler::Compile_DPH(Instruction instr) {
391 Compile_DestEnable(instr, SRC1); 419 Compile_DestEnable(instr, SRC1);
392} 420}
393 421
394void JitCompiler::Compile_EX2(Instruction instr) { 422void JitShader::Compile_EX2(Instruction instr) {
395 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 423 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
396 MOVSS(XMM0, R(SRC1)); 424 MOVSS(XMM0, R(SRC1));
397 425
@@ -404,7 +432,7 @@ void JitCompiler::Compile_EX2(Instruction instr) {
404 Compile_DestEnable(instr, SRC1); 432 Compile_DestEnable(instr, SRC1);
405} 433}
406 434
407void JitCompiler::Compile_LG2(Instruction instr) { 435void JitShader::Compile_LG2(Instruction instr) {
408 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 436 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
409 MOVSS(XMM0, R(SRC1)); 437 MOVSS(XMM0, R(SRC1));
410 438
@@ -417,14 +445,14 @@ void JitCompiler::Compile_LG2(Instruction instr) {
417 Compile_DestEnable(instr, SRC1); 445 Compile_DestEnable(instr, SRC1);
418} 446}
419 447
420void JitCompiler::Compile_MUL(Instruction instr) { 448void JitShader::Compile_MUL(Instruction instr) {
421 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 449 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
422 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); 450 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
423 Compile_SanitizedMul(SRC1, SRC2, SCRATCH); 451 Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
424 Compile_DestEnable(instr, SRC1); 452 Compile_DestEnable(instr, SRC1);
425} 453}
426 454
427void JitCompiler::Compile_SGE(Instruction instr) { 455void JitShader::Compile_SGE(Instruction instr) {
428 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SGEI) { 456 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SGEI) {
429 Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); 457 Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
430 Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); 458 Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
@@ -439,7 +467,7 @@ void JitCompiler::Compile_SGE(Instruction instr) {
439 Compile_DestEnable(instr, SRC2); 467 Compile_DestEnable(instr, SRC2);
440} 468}
441 469
442void JitCompiler::Compile_SLT(Instruction instr) { 470void JitShader::Compile_SLT(Instruction instr) {
443 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SLTI) { 471 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SLTI) {
444 Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); 472 Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
445 Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); 473 Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
@@ -454,7 +482,7 @@ void JitCompiler::Compile_SLT(Instruction instr) {
454 Compile_DestEnable(instr, SRC1); 482 Compile_DestEnable(instr, SRC1);
455} 483}
456 484
457void JitCompiler::Compile_FLR(Instruction instr) { 485void JitShader::Compile_FLR(Instruction instr) {
458 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 486 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
459 487
460 if (Common::GetCPUCaps().sse4_1) { 488 if (Common::GetCPUCaps().sse4_1) {
@@ -467,7 +495,7 @@ void JitCompiler::Compile_FLR(Instruction instr) {
467 Compile_DestEnable(instr, SRC1); 495 Compile_DestEnable(instr, SRC1);
468} 496}
469 497
470void JitCompiler::Compile_MAX(Instruction instr) { 498void JitShader::Compile_MAX(Instruction instr) {
471 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 499 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
472 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); 500 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
473 // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned. 501 // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned.
@@ -475,7 +503,7 @@ void JitCompiler::Compile_MAX(Instruction instr) {
475 Compile_DestEnable(instr, SRC1); 503 Compile_DestEnable(instr, SRC1);
476} 504}
477 505
478void JitCompiler::Compile_MIN(Instruction instr) { 506void JitShader::Compile_MIN(Instruction instr) {
479 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 507 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
480 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); 508 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
481 // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned. 509 // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned.
@@ -483,7 +511,7 @@ void JitCompiler::Compile_MIN(Instruction instr) {
483 Compile_DestEnable(instr, SRC1); 511 Compile_DestEnable(instr, SRC1);
484} 512}
485 513
486void JitCompiler::Compile_MOVA(Instruction instr) { 514void JitShader::Compile_MOVA(Instruction instr) {
487 SwizzlePattern swiz = { g_state.vs.swizzle_data[instr.common.operand_desc_id] }; 515 SwizzlePattern swiz = { g_state.vs.swizzle_data[instr.common.operand_desc_id] };
488 516
489 if (!swiz.DestComponentEnabled(0) && !swiz.DestComponentEnabled(1)) { 517 if (!swiz.DestComponentEnabled(0) && !swiz.DestComponentEnabled(1)) {
@@ -528,12 +556,12 @@ void JitCompiler::Compile_MOVA(Instruction instr) {
528 } 556 }
529} 557}
530 558
531void JitCompiler::Compile_MOV(Instruction instr) { 559void JitShader::Compile_MOV(Instruction instr) {
532 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 560 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
533 Compile_DestEnable(instr, SRC1); 561 Compile_DestEnable(instr, SRC1);
534} 562}
535 563
536void JitCompiler::Compile_RCP(Instruction instr) { 564void JitShader::Compile_RCP(Instruction instr) {
537 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 565 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
538 566
539 // TODO(bunnei): RCPSS is a pretty rough approximation, this might cause problems if Pica 567 // TODO(bunnei): RCPSS is a pretty rough approximation, this might cause problems if Pica
@@ -544,7 +572,7 @@ void JitCompiler::Compile_RCP(Instruction instr) {
544 Compile_DestEnable(instr, SRC1); 572 Compile_DestEnable(instr, SRC1);
545} 573}
546 574
547void JitCompiler::Compile_RSQ(Instruction instr) { 575void JitShader::Compile_RSQ(Instruction instr) {
548 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 576 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
549 577
550 // TODO(bunnei): RSQRTSS is a pretty rough approximation, this might cause problems if Pica 578 // TODO(bunnei): RSQRTSS is a pretty rough approximation, this might cause problems if Pica
@@ -555,36 +583,41 @@ void JitCompiler::Compile_RSQ(Instruction instr) {
555 Compile_DestEnable(instr, SRC1); 583 Compile_DestEnable(instr, SRC1);
556} 584}
557 585
558void JitCompiler::Compile_NOP(Instruction instr) { 586void JitShader::Compile_NOP(Instruction instr) {
559} 587}
560 588
561void JitCompiler::Compile_END(Instruction instr) { 589void JitShader::Compile_END(Instruction instr) {
562 ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); 590 ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
563 RET(); 591 RET();
564} 592}
565 593
566void JitCompiler::Compile_CALL(Instruction instr) { 594void JitShader::Compile_CALL(Instruction instr) {
567 unsigned offset = instr.flow_control.dest_offset; 595 // Push offset of the return
568 while (offset < (instr.flow_control.dest_offset + instr.flow_control.num_instructions)) { 596 PUSH(64, Imm32(instr.flow_control.dest_offset + instr.flow_control.num_instructions));
569 Compile_NextInstr(&offset); 597
570 } 598 // Call the subroutine
599 FixupBranch b = CALL();
600 fixup_branches.push_back({ b, instr.flow_control.dest_offset });
601
602 // Skip over the return offset that's on the stack
603 ADD(64, R(RSP), Imm32(8));
571} 604}
572 605
573void JitCompiler::Compile_CALLC(Instruction instr) { 606void JitShader::Compile_CALLC(Instruction instr) {
574 Compile_EvaluateCondition(instr); 607 Compile_EvaluateCondition(instr);
575 FixupBranch b = J_CC(CC_Z, true); 608 FixupBranch b = J_CC(CC_Z, true);
576 Compile_CALL(instr); 609 Compile_CALL(instr);
577 SetJumpTarget(b); 610 SetJumpTarget(b);
578} 611}
579 612
580void JitCompiler::Compile_CALLU(Instruction instr) { 613void JitShader::Compile_CALLU(Instruction instr) {
581 Compile_UniformCondition(instr); 614 Compile_UniformCondition(instr);
582 FixupBranch b = J_CC(CC_Z, true); 615 FixupBranch b = J_CC(CC_Z, true);
583 Compile_CALL(instr); 616 Compile_CALL(instr);
584 SetJumpTarget(b); 617 SetJumpTarget(b);
585} 618}
586 619
587void JitCompiler::Compile_CMP(Instruction instr) { 620void JitShader::Compile_CMP(Instruction instr) {
588 using Op = Instruction::Common::CompareOpType::Op; 621 using Op = Instruction::Common::CompareOpType::Op;
589 Op op_x = instr.common.compare_op.x; 622 Op op_x = instr.common.compare_op.x;
590 Op op_y = instr.common.compare_op.y; 623 Op op_y = instr.common.compare_op.y;
@@ -627,7 +660,7 @@ void JitCompiler::Compile_CMP(Instruction instr) {
627 SHR(64, R(COND1), Imm8(63)); 660 SHR(64, R(COND1), Imm8(63));
628} 661}
629 662
630void JitCompiler::Compile_MAD(Instruction instr) { 663void JitShader::Compile_MAD(Instruction instr) {
631 Compile_SwizzleSrc(instr, 1, instr.mad.src1, SRC1); 664 Compile_SwizzleSrc(instr, 1, instr.mad.src1, SRC1);
632 665
633 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) { 666 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) {
@@ -644,9 +677,8 @@ void JitCompiler::Compile_MAD(Instruction instr) {
644 Compile_DestEnable(instr, SRC1); 677 Compile_DestEnable(instr, SRC1);
645} 678}
646 679
647void JitCompiler::Compile_IF(Instruction instr) { 680void JitShader::Compile_IF(Instruction instr) {
648 ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards if-statements (%d -> %d) not supported", 681 Compile_Assert(instr.flow_control.dest_offset >= program_counter, "Backwards if-statements not supported");
649 *offset_ptr, instr.flow_control.dest_offset.Value());
650 682
651 // Evaluate the "IF" condition 683 // Evaluate the "IF" condition
652 if (instr.opcode.Value() == OpCode::Id::IFU) { 684 if (instr.opcode.Value() == OpCode::Id::IFU) {
@@ -676,15 +708,14 @@ void JitCompiler::Compile_IF(Instruction instr) {
676 SetJumpTarget(b2); 708 SetJumpTarget(b2);
677} 709}
678 710
679void JitCompiler::Compile_LOOP(Instruction instr) { 711void JitShader::Compile_LOOP(Instruction instr) {
680 ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards loops (%d -> %d) not supported", 712 Compile_Assert(instr.flow_control.dest_offset >= program_counter, "Backwards loops not supported");
681 *offset_ptr, instr.flow_control.dest_offset.Value()); 713 Compile_Assert(!looping, "Nested loops not supported");
682 ASSERT_MSG(!looping, "Nested loops not supported");
683 714
684 looping = true; 715 looping = true;
685 716
686 int offset = offsetof(decltype(g_state.vs.uniforms), i) + (instr.flow_control.int_uniform_id * sizeof(Math::Vec4<u8>)); 717 int offset = ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id);
687 MOV(32, R(LOOPCOUNT), MDisp(UNIFORMS, offset)); 718 MOV(32, R(LOOPCOUNT), MDisp(SETUP, offset));
688 MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT)); 719 MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT));
689 SHR(32, R(LOOPCOUNT_REG), Imm8(8)); 720 SHR(32, R(LOOPCOUNT_REG), Imm8(8));
690 AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start 721 AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start
@@ -705,10 +736,7 @@ void JitCompiler::Compile_LOOP(Instruction instr) {
705 looping = false; 736 looping = false;
706} 737}
707 738
708void JitCompiler::Compile_JMP(Instruction instr) { 739void JitShader::Compile_JMP(Instruction instr) {
709 ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards jumps (%d -> %d) not supported",
710 *offset_ptr, instr.flow_control.dest_offset.Value());
711
712 if (instr.opcode.Value() == OpCode::Id::JMPC) 740 if (instr.opcode.Value() == OpCode::Id::JMPC)
713 Compile_EvaluateCondition(instr); 741 Compile_EvaluateCondition(instr);
714 else if (instr.opcode.Value() == OpCode::Id::JMPU) 742 else if (instr.opcode.Value() == OpCode::Id::JMPU)
@@ -718,30 +746,38 @@ void JitCompiler::Compile_JMP(Instruction instr) {
718 746
719 bool inverted_condition = (instr.opcode.Value() == OpCode::Id::JMPU) && 747 bool inverted_condition = (instr.opcode.Value() == OpCode::Id::JMPU) &&
720 (instr.flow_control.num_instructions & 1); 748 (instr.flow_control.num_instructions & 1);
749
721 FixupBranch b = J_CC(inverted_condition ? CC_Z : CC_NZ, true); 750 FixupBranch b = J_CC(inverted_condition ? CC_Z : CC_NZ, true);
751 fixup_branches.push_back({ b, instr.flow_control.dest_offset });
752}
722 753
723 Compile_Block(instr.flow_control.dest_offset); 754void JitShader::Compile_Block(unsigned end) {
755 while (program_counter < end) {
756 Compile_NextInstr();
757 }
758}
759
760void JitShader::Compile_Return() {
761 // Peek return offset on the stack and check if we're at that offset
762 MOV(64, R(RAX), MDisp(RSP, 8));
763 CMP(32, R(RAX), Imm32(program_counter));
724 764
765 // If so, jump back to before CALL
766 FixupBranch b = J_CC(CC_NZ, true);
767 RET();
725 SetJumpTarget(b); 768 SetJumpTarget(b);
726} 769}
727 770
728void JitCompiler::Compile_Block(unsigned end) { 771void JitShader::Compile_NextInstr() {
729 // Save current offset pointer 772 if (std::binary_search(return_offsets.begin(), return_offsets.end(), program_counter)) {
730 unsigned* prev_offset_ptr = offset_ptr; 773 Compile_Return();
731 unsigned offset = *prev_offset_ptr; 774 }
732 775
733 while (offset < end) 776 ASSERT_MSG(code_ptr[program_counter] == nullptr, "Tried to compile already compiled shader location!");
734 Compile_NextInstr(&offset); 777 code_ptr[program_counter] = GetCodePtr();
735 778
736 // Restore current offset pointer 779 Instruction instr = GetVertexShaderInstruction(program_counter++);
737 offset_ptr = prev_offset_ptr;
738 *offset_ptr = offset;
739}
740 780
741void JitCompiler::Compile_NextInstr(unsigned* offset) {
742 offset_ptr = offset;
743
744 Instruction instr = *(Instruction*)&g_state.vs.program_code[(*offset_ptr)++];
745 OpCode::Id opcode = instr.opcode.Value(); 781 OpCode::Id opcode = instr.opcode.Value();
746 auto instr_func = instr_table[static_cast<unsigned>(opcode)]; 782 auto instr_func = instr_table[static_cast<unsigned>(opcode)];
747 783
@@ -755,15 +791,43 @@ void JitCompiler::Compile_NextInstr(unsigned* offset) {
755 } 791 }
756} 792}
757 793
758CompiledShader* JitCompiler::Compile() { 794void JitShader::FindReturnOffsets() {
759 const u8* start = GetCodePtr(); 795 return_offsets.clear();
760 unsigned offset = g_state.regs.vs.main_offset; 796
797 for (size_t offset = 0; offset < g_state.vs.program_code.size(); ++offset) {
798 Instruction instr = GetVertexShaderInstruction(offset);
799
800 switch (instr.opcode.Value()) {
801 case OpCode::Id::CALL:
802 case OpCode::Id::CALLC:
803 case OpCode::Id::CALLU:
804 return_offsets.push_back(instr.flow_control.dest_offset + instr.flow_control.num_instructions);
805 break;
806 default:
807 break;
808 }
809 }
810
811 // Sort for efficient binary search later
812 std::sort(return_offsets.begin(), return_offsets.end());
813}
814
815void JitShader::Compile() {
816 // Reset flow control state
817 program = (CompiledShader*)GetCodePtr();
818 program_counter = 0;
819 looping = false;
820 code_ptr.fill(nullptr);
821 fixup_branches.clear();
822
823 // Find all `CALL` instructions and identify return locations
824 FindReturnOffsets();
761 825
762 // The stack pointer is 8 modulo 16 at the entry of a procedure 826 // The stack pointer is 8 modulo 16 at the entry of a procedure
763 ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); 827 ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
764 828
765 MOV(PTRBITS, R(REGISTERS), R(ABI_PARAM1)); 829 MOV(PTRBITS, R(SETUP), R(ABI_PARAM1));
766 MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms)); 830 MOV(PTRBITS, R(STATE), R(ABI_PARAM2));
767 831
768 // Zero address/loop registers 832 // Zero address/loop registers
769 XOR(64, R(ADDROFFS_REG_0), R(ADDROFFS_REG_0)); 833 XOR(64, R(ADDROFFS_REG_0), R(ADDROFFS_REG_0));
@@ -780,21 +844,31 @@ CompiledShader* JitCompiler::Compile() {
780 MOV(PTRBITS, R(RAX), ImmPtr(&neg)); 844 MOV(PTRBITS, R(RAX), ImmPtr(&neg));
781 MOVAPS(NEGBIT, MatR(RAX)); 845 MOVAPS(NEGBIT, MatR(RAX));
782 846
783 looping = false; 847 // Jump to start of the shader program
848 JMPptr(R(ABI_PARAM3));
849
850 // Compile entire program
851 Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size()));
784 852
785 while (offset < g_state.vs.program_code.size()) { 853 // Set the target for any incomplete branches now that the entire shader program has been emitted
786 Compile_NextInstr(&offset); 854 for (const auto& branch : fixup_branches) {
855 SetJumpTarget(branch.first, code_ptr[branch.second]);
787 } 856 }
788 857
789 return (CompiledShader*)start; 858 // Free memory that's no longer needed
790} 859 return_offsets.clear();
860 return_offsets.shrink_to_fit();
861 fixup_branches.clear();
862 fixup_branches.shrink_to_fit();
863
864 uintptr_t size = reinterpret_cast<uintptr_t>(GetCodePtr()) - reinterpret_cast<uintptr_t>(program);
865 ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!");
791 866
792JitCompiler::JitCompiler() { 867 LOG_DEBUG(HW_GPU, "Compiled shader size=%lu", size);
793 AllocCodeSpace(jit_cache_size);
794} 868}
795 869
796void JitCompiler::Clear() { 870JitShader::JitShader() {
797 ClearCodeSpace(); 871 AllocCodeSpace(MAX_SHADER_SIZE);
798} 872}
799 873
800} // namespace Shader 874} // namespace Shader
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h
index 5357c964b..5468459d4 100644
--- a/src/video_core/shader/shader_jit_x64.h
+++ b/src/video_core/shader/shader_jit_x64.h
@@ -4,11 +4,17 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <cstddef>
9#include <utility>
10#include <vector>
11
7#include <nihstro/shader_bytecode.h> 12#include <nihstro/shader_bytecode.h>
8 13
14#include "common/bit_set.h"
15#include "common/common_types.h"
9#include "common/x64/emitter.h" 16#include "common/x64/emitter.h"
10 17
11#include "video_core/pica.h"
12#include "video_core/shader/shader.h" 18#include "video_core/shader/shader.h"
13 19
14using nihstro::Instruction; 20using nihstro::Instruction;
@@ -19,24 +25,22 @@ namespace Pica {
19 25
20namespace Shader { 26namespace Shader {
21 27
22/// Memory needed to be available to compile the next shader (otherwise, clear the cache) 28/// Memory allocated for each compiled shader (64Kb)
23constexpr size_t jit_shader_size = 1024 * 512; 29constexpr size_t MAX_SHADER_SIZE = 1024 * 64;
24/// Memory allocated for the JIT code space cache
25constexpr size_t jit_cache_size = 1024 * 1024 * 8;
26
27using CompiledShader = void(void* registers);
28 30
29/** 31/**
30 * This class implements the shader JIT compiler. It recompiles a Pica shader program into x86_64 32 * This class implements the shader JIT compiler. It recompiles a Pica shader program into x86_64
31 * code that can be executed on the host machine directly. 33 * code that can be executed on the host machine directly.
32 */ 34 */
33class JitCompiler : public Gen::XCodeBlock { 35class JitShader : public Gen::XCodeBlock {
34public: 36public:
35 JitCompiler(); 37 JitShader();
36 38
37 CompiledShader* Compile(); 39 void Run(const ShaderSetup& setup, UnitState<false>& state, unsigned offset) const {
40 program(&setup, &state, code_ptr[offset]);
41 }
38 42
39 void Clear(); 43 void Compile();
40 44
41 void Compile_ADD(Instruction instr); 45 void Compile_ADD(Instruction instr);
42 void Compile_DP3(Instruction instr); 46 void Compile_DP3(Instruction instr);
@@ -66,8 +70,9 @@ public:
66 void Compile_MAD(Instruction instr); 70 void Compile_MAD(Instruction instr);
67 71
68private: 72private:
73
69 void Compile_Block(unsigned end); 74 void Compile_Block(unsigned end);
70 void Compile_NextInstr(unsigned* offset); 75 void Compile_NextInstr();
71 76
72 void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest); 77 void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest);
73 void Compile_DestEnable(Instruction instr, Gen::X64Reg dest); 78 void Compile_DestEnable(Instruction instr, Gen::X64Reg dest);
@@ -81,13 +86,39 @@ private:
81 void Compile_EvaluateCondition(Instruction instr); 86 void Compile_EvaluateCondition(Instruction instr);
82 void Compile_UniformCondition(Instruction instr); 87 void Compile_UniformCondition(Instruction instr);
83 88
89 /**
90 * Emits the code to conditionally return from a subroutine envoked by the `CALL` instruction.
91 */
92 void Compile_Return();
93
84 BitSet32 PersistentCallerSavedRegs(); 94 BitSet32 PersistentCallerSavedRegs();
85 95
86 /// Pointer to the variable that stores the current Pica code offset. Used to handle nested code blocks. 96 /**
87 unsigned* offset_ptr = nullptr; 97 * Assertion evaluated at compile-time, but only triggered if executed at runtime.
98 * @param msg Message to be logged if the assertion fails.
99 */
100 void Compile_Assert(bool condition, const char* msg);
101
102 /**
103 * Analyzes the entire shader program for `CALL` instructions before emitting any code,
104 * identifying the locations where a return needs to be inserted.
105 */
106 void FindReturnOffsets();
107
108 /// Mapping of Pica VS instructions to pointers in the emitted code
109 std::array<const u8*, 1024> code_ptr;
110
111 /// Offsets in code where a return needs to be inserted
112 std::vector<unsigned> return_offsets;
113
114 unsigned program_counter = 0; ///< Offset of the next instruction to decode
115 bool looping = false; ///< True if compiling a loop, used to check for nested loops
116
117 /// Branches that need to be fixed up once the entire shader program is compiled
118 std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches;
88 119
89 /// Set to true if currently in a loop, used to check for the existence of nested loops 120 using CompiledShader = void(const void* setup, void* state, const u8* start_addr);
90 bool looping = false; 121 CompiledShader* program = nullptr;
91}; 122};
92 123
93} // Shader 124} // Shader
diff --git a/src/video_core/swrasterizer.h b/src/video_core/swrasterizer.h
index 9a9a76d7a..0a028b774 100644
--- a/src/video_core/swrasterizer.h
+++ b/src/video_core/swrasterizer.h
@@ -8,19 +8,23 @@
8 8
9#include "video_core/rasterizer_interface.h" 9#include "video_core/rasterizer_interface.h"
10 10
11namespace Pica {
12namespace Shader {
13struct OutputVertex;
14}
15}
16
11namespace VideoCore { 17namespace VideoCore {
12 18
13class SWRasterizer : public RasterizerInterface { 19class SWRasterizer : public RasterizerInterface {
14 void InitObjects() override {}
15 void Reset() override {}
16 void AddTriangle(const Pica::Shader::OutputVertex& v0, 20 void AddTriangle(const Pica::Shader::OutputVertex& v0,
17 const Pica::Shader::OutputVertex& v1, 21 const Pica::Shader::OutputVertex& v1,
18 const Pica::Shader::OutputVertex& v2) override; 22 const Pica::Shader::OutputVertex& v2) override;
19 void DrawTriangles() override {} 23 void DrawTriangles() override {}
20 void FlushFramebuffer() override {}
21 void NotifyPicaRegisterChanged(u32 id) override {} 24 void NotifyPicaRegisterChanged(u32 id) override {}
25 void FlushAll() override {}
22 void FlushRegion(PAddr addr, u32 size) override {} 26 void FlushRegion(PAddr addr, u32 size) override {}
23 void InvalidateRegion(PAddr addr, u32 size) override {} 27 void FlushAndInvalidateRegion(PAddr addr, u32 size) override {}
24}; 28};
25 29
26} 30}
diff --git a/src/video_core/utils.cpp b/src/video_core/utils.cpp
deleted file mode 100644
index 6e1ff5cf4..000000000
--- a/src/video_core/utils.cpp
+++ /dev/null
@@ -1,36 +0,0 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstdio>
6#include <cstring>
7
8#include "video_core/utils.h"
9
10namespace VideoCore {
11
12/**
13 * Dumps a texture to TGA
14 * @param filename String filename to dump texture to
15 * @param width Width of texture in pixels
16 * @param height Height of texture in pixels
17 * @param raw_data Raw RGBA8 texture data to dump
18 * @todo This should be moved to some general purpose/common code
19 */
20void DumpTGA(std::string filename, short width, short height, u8* raw_data) {
21 TGAHeader hdr = {0, 0, 2, 0, 0, 0, 0, width, height, 24, 0};
22 FILE* fout = fopen(filename.c_str(), "wb");
23
24 fwrite(&hdr, sizeof(TGAHeader), 1, fout);
25
26 for (int y = 0; y < height; y++) {
27 for (int x = 0; x < width; x++) {
28 putc(raw_data[(3 * (y * width)) + (3 * x) + 0], fout); // b
29 putc(raw_data[(3 * (y * width)) + (3 * x) + 1], fout); // g
30 putc(raw_data[(3 * (y * width)) + (3 * x) + 2], fout); // r
31 }
32 }
33
34 fclose(fout);
35}
36} // namespace
diff --git a/src/video_core/utils.h b/src/video_core/utils.h
index 4fa60a10e..7ce83a055 100644
--- a/src/video_core/utils.h
+++ b/src/video_core/utils.h
@@ -4,37 +4,10 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <string>
8
9#include "common/common_types.h" 7#include "common/common_types.h"
10 8
11namespace VideoCore { 9namespace VideoCore {
12 10
13/// Structure for the TGA texture format (for dumping)
14struct TGAHeader {
15 char idlength;
16 char colormaptype;
17 char datatypecode;
18 short int colormaporigin;
19 short int colormaplength;
20 short int x_origin;
21 short int y_origin;
22 short width;
23 short height;
24 char bitsperpixel;
25 char imagedescriptor;
26};
27
28/**
29 * Dumps a texture to TGA
30 * @param filename String filename to dump texture to
31 * @param width Width of texture in pixels
32 * @param height Height of texture in pixels
33 * @param raw_data Raw RGBA8 texture data to dump
34 * @todo This should be moved to some general purpose/common code
35 */
36void DumpTGA(std::string filename, short width, short height, u8* raw_data);
37
38/** 11/**
39 * Interleave the lower 3 bits of each coordinate to get the intra-block offsets, which are 12 * Interleave the lower 3 bits of each coordinate to get the intra-block offsets, which are
40 * arranged in a Z-order curve. More details on the bit manipulation at: 13 * arranged in a Z-order curve. More details on the bit manipulation at:
diff --git a/src/video_core/vertex_loader.cpp b/src/video_core/vertex_loader.cpp
new file mode 100644
index 000000000..e40f0f1ee
--- /dev/null
+++ b/src/video_core/vertex_loader.cpp
@@ -0,0 +1,146 @@
1#include <memory>
2
3#include <boost/range/algorithm/fill.hpp>
4
5#include "common/alignment.h"
6#include "common/assert.h"
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "common/logging/log.h"
10#include "common/vector_math.h"
11
12#include "core/memory.h"
13
14#include "video_core/debug_utils/debug_utils.h"
15#include "video_core/pica.h"
16#include "video_core/pica_state.h"
17#include "video_core/pica_types.h"
18#include "video_core/shader/shader.h"
19#include "video_core/vertex_loader.h"
20
21namespace Pica {
22
23void VertexLoader::Setup(const Pica::Regs& regs) {
24 ASSERT_MSG(!is_setup, "VertexLoader is not intended to be setup more than once.");
25
26 const auto& attribute_config = regs.vertex_attributes;
27 num_total_attributes = attribute_config.GetNumTotalAttributes();
28
29 boost::fill(vertex_attribute_sources, 0xdeadbeef);
30
31 for (int i = 0; i < 16; i++) {
32 vertex_attribute_is_default[i] = attribute_config.IsDefaultAttribute(i);
33 }
34
35 // Setup attribute data from loaders
36 for (int loader = 0; loader < 12; ++loader) {
37 const auto& loader_config = attribute_config.attribute_loaders[loader];
38
39 u32 offset = 0;
40
41 // TODO: What happens if a loader overwrites a previous one's data?
42 for (unsigned component = 0; component < loader_config.component_count; ++component) {
43 if (component >= 12) {
44 LOG_ERROR(HW_GPU, "Overflow in the vertex attribute loader %u trying to load component %u", loader, component);
45 continue;
46 }
47
48 u32 attribute_index = loader_config.GetComponent(component);
49 if (attribute_index < 12) {
50 offset = Common::AlignUp(offset, attribute_config.GetElementSizeInBytes(attribute_index));
51 vertex_attribute_sources[attribute_index] = loader_config.data_offset + offset;
52 vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count);
53 vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index);
54 vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index);
55 offset += attribute_config.GetStride(attribute_index);
56 } else if (attribute_index < 16) {
57 // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively
58 offset = Common::AlignUp(offset, 4);
59 offset += (attribute_index - 11) * 4;
60 } else {
61 UNREACHABLE(); // This is truly unreachable due to the number of bits for each component
62 }
63 }
64 }
65
66 is_setup = true;
67}
68
69void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, DebugUtils::MemoryAccessTracker& memory_accesses) {
70 ASSERT_MSG(is_setup, "A VertexLoader needs to be setup before loading vertices.");
71
72 for (int i = 0; i < num_total_attributes; ++i) {
73 if (vertex_attribute_elements[i] != 0) {
74 // Load per-vertex data from the loader arrays
75 u32 source_addr = base_address + vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex;
76
77 if (g_debug_context && Pica::g_debug_context->recorder) {
78 memory_accesses.AddAccess(source_addr, vertex_attribute_elements[i] * (
79 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4
80 : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1));
81 }
82
83 switch (vertex_attribute_formats[i]) {
84 case Regs::VertexAttributeFormat::BYTE:
85 {
86 const s8* srcdata = reinterpret_cast<const s8*>(Memory::GetPhysicalPointer(source_addr));
87 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
88 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
89 }
90 break;
91 }
92 case Regs::VertexAttributeFormat::UBYTE:
93 {
94 const u8* srcdata = reinterpret_cast<const u8*>(Memory::GetPhysicalPointer(source_addr));
95 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
96 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
97 }
98 break;
99 }
100 case Regs::VertexAttributeFormat::SHORT:
101 {
102 const s16* srcdata = reinterpret_cast<const s16*>(Memory::GetPhysicalPointer(source_addr));
103 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
104 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
105 }
106 break;
107 }
108 case Regs::VertexAttributeFormat::FLOAT:
109 {
110 const float* srcdata = reinterpret_cast<const float*>(Memory::GetPhysicalPointer(source_addr));
111 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
112 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
113 }
114 break;
115 }
116 }
117
118 // Default attribute values set if array elements have < 4 components. This
119 // is *not* carried over from the default attribute settings even if they're
120 // enabled for this attribute.
121 for (unsigned int comp = vertex_attribute_elements[i]; comp < 4; ++comp) {
122 input.attr[i][comp] = comp == 3 ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f);
123 }
124
125 LOG_TRACE(HW_GPU, "Loaded %d components of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f %f %f %f",
126 vertex_attribute_elements[i], i, vertex, index,
127 base_address,
128 vertex_attribute_sources[i],
129 vertex_attribute_strides[i] * vertex,
130 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
131 } else if (vertex_attribute_is_default[i]) {
132 // Load the default attribute if we're configured to do so
133 input.attr[i] = g_state.vs_default_attributes[i];
134 LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)",
135 i, vertex, index,
136 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(),
137 input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
138 } else {
139 // TODO(yuriks): In this case, no data gets loaded and the vertex
140 // remains with the last value it had. This isn't currently maintained
141 // as global state, however, and so won't work in Citra yet.
142 }
143 }
144}
145
146} // namespace Pica
diff --git a/src/video_core/vertex_loader.h b/src/video_core/vertex_loader.h
new file mode 100644
index 000000000..ac162c254
--- /dev/null
+++ b/src/video_core/vertex_loader.h
@@ -0,0 +1,40 @@
1#pragma once
2
3#include <array>
4
5#include "common/common_types.h"
6#include "video_core/pica.h"
7
8namespace Pica {
9
10namespace DebugUtils {
11class MemoryAccessTracker;
12}
13
14namespace Shader {
15struct InputVertex;
16}
17
18class VertexLoader {
19public:
20 VertexLoader() = default;
21 explicit VertexLoader(const Pica::Regs& regs) {
22 Setup(regs);
23 }
24
25 void Setup(const Pica::Regs& regs);
26 void LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, DebugUtils::MemoryAccessTracker& memory_accesses);
27
28 int GetNumTotalAttributes() const { return num_total_attributes; }
29
30private:
31 std::array<u32, 16> vertex_attribute_sources;
32 std::array<u32, 16> vertex_attribute_strides{};
33 std::array<Regs::VertexAttributeFormat, 16> vertex_attribute_formats;
34 std::array<u32, 16> vertex_attribute_elements{};
35 std::array<bool, 16> vertex_attribute_is_default;
36 int num_total_attributes = 0;
37 bool is_setup = false;
38};
39
40} // namespace Pica
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index 256899c89..c9975876d 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -4,12 +4,8 @@
4 4
5#include <memory> 5#include <memory>
6 6
7#include "common/emu_window.h"
8#include "common/logging/log.h" 7#include "common/logging/log.h"
9 8
10#include "core/core.h"
11#include "core/settings.h"
12
13#include "video_core/pica.h" 9#include "video_core/pica.h"
14#include "video_core/renderer_base.h" 10#include "video_core/renderer_base.h"
15#include "video_core/video_core.h" 11#include "video_core/video_core.h"
@@ -25,6 +21,7 @@ std::unique_ptr<RendererBase> g_renderer; ///< Renderer plugin
25 21
26std::atomic<bool> g_hw_renderer_enabled; 22std::atomic<bool> g_hw_renderer_enabled;
27std::atomic<bool> g_shader_jit_enabled; 23std::atomic<bool> g_shader_jit_enabled;
24std::atomic<bool> g_scaled_resolution_enabled;
28 25
29/// Initialize the video core 26/// Initialize the video core
30bool Init(EmuWindow* emu_window) { 27bool Init(EmuWindow* emu_window) {
diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h
index bca67fb8c..30267489e 100644
--- a/src/video_core/video_core.h
+++ b/src/video_core/video_core.h
@@ -36,6 +36,7 @@ extern EmuWindow* g_emu_window; ///< Emu window
36// TODO: Wrap these in a user settings struct along with any other graphics settings (often set from qt ui) 36// TODO: Wrap these in a user settings struct along with any other graphics settings (often set from qt ui)
37extern std::atomic<bool> g_hw_renderer_enabled; 37extern std::atomic<bool> g_hw_renderer_enabled;
38extern std::atomic<bool> g_shader_jit_enabled; 38extern std::atomic<bool> g_shader_jit_enabled;
39extern std::atomic<bool> g_scaled_resolution_enabled;
39 40
40/// Start the video core 41/// Start the video core
41void Start(); 42void Start();