summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitmodules3
-rwxr-xr-x.travis-deps.sh5
-rw-r--r--CMakeLists.txt20
m---------externals/boost0
-rw-r--r--externals/microprofile/microprofileui.h7
m---------externals/soundtouch0
-rw-r--r--src/audio_core/CMakeLists.txt23
-rw-r--r--src/audio_core/audio_core.cpp46
-rw-r--r--src/audio_core/audio_core.h7
-rw-r--r--src/audio_core/hle/common.h11
-rw-r--r--src/audio_core/hle/dsp.cpp69
-rw-r--r--src/audio_core/hle/dsp.h40
-rw-r--r--src/audio_core/hle/filter.h1
-rw-r--r--src/audio_core/hle/pipe.cpp41
-rw-r--r--src/audio_core/hle/pipe.h16
-rw-r--r--src/audio_core/hle/source.cpp320
-rw-r--r--src/audio_core/hle/source.h144
-rw-r--r--src/audio_core/interpolate.cpp85
-rw-r--r--src/audio_core/interpolate.h41
-rw-r--r--src/audio_core/null_sink.h29
-rw-r--r--src/audio_core/sdl2_sink.cpp126
-rw-r--r--src/audio_core/sdl2_sink.h30
-rw-r--r--src/audio_core/sink.h2
-rw-r--r--src/audio_core/sink_details.cpp25
-rw-r--r--src/audio_core/sink_details.h27
-rw-r--r--src/citra/CMakeLists.txt2
-rw-r--r--src/citra/citra.cpp29
-rw-r--r--src/citra/config.cpp6
-rw-r--r--src/citra/default_ini.h9
-rw-r--r--src/citra/emu_window/emu_window_sdl2.cpp7
-rw-r--r--src/citra_qt/CMakeLists.txt3
-rw-r--r--src/citra_qt/bootmanager.cpp2
-rw-r--r--src/citra_qt/config.cpp10
-rw-r--r--src/citra_qt/configure_general.cpp2
-rw-r--r--src/citra_qt/configure_general.ui7
-rw-r--r--src/citra_qt/debugger/graphics_breakpoints.cpp6
-rw-r--r--src/citra_qt/debugger/graphics_framebuffer.cpp6
-rw-r--r--src/citra_qt/debugger/graphics_tracing.cpp4
-rw-r--r--src/citra_qt/debugger/graphics_vertex_shader.cpp6
-rw-r--r--src/citra_qt/debugger/profiler.cpp39
-rw-r--r--src/citra_qt/debugger/profiler.h3
-rw-r--r--src/citra_qt/game_list.cpp16
-rw-r--r--src/citra_qt/game_list.h2
-rw-r--r--src/citra_qt/game_list_p.h106
-rw-r--r--src/citra_qt/main.cpp20
-rw-r--r--src/citra_qt/util/util.cpp2
-rw-r--r--src/common/CMakeLists.txt1
-rw-r--r--src/common/assert.h2
-rw-r--r--src/common/bit_field.h2
-rw-r--r--src/common/bit_set.h3
-rw-r--r--src/common/code_block.h6
-rw-r--r--src/common/common_funcs.h4
-rw-r--r--src/common/file_util.cpp15
-rw-r--r--src/common/file_util.h6
-rw-r--r--src/common/logging/backend.cpp1
-rw-r--r--src/common/logging/log.h3
-rw-r--r--src/common/microprofile.h4
-rw-r--r--src/common/microprofileui.h3
-rw-r--r--src/common/profiler.cpp82
-rw-r--r--src/common/profiler.h152
-rw-r--r--src/common/profiler_reporting.h27
-rw-r--r--src/common/x64/emitter.h2
-rw-r--r--src/core/arm/dyncom/arm_dyncom_interpreter.cpp7
-rw-r--r--src/core/core.cpp2
-rw-r--r--src/core/gdbstub/gdbstub.cpp24
-rw-r--r--src/core/hle/applets/mii_selector.cpp24
-rw-r--r--src/core/hle/applets/mii_selector.h50
-rw-r--r--src/core/hle/applets/swkbd.cpp20
-rw-r--r--src/core/hle/applets/swkbd.h7
-rw-r--r--src/core/hle/hle.cpp20
-rw-r--r--src/core/hle/hle.h4
-rw-r--r--src/core/hle/kernel/thread.cpp3
-rw-r--r--src/core/hle/result.h2
-rw-r--r--src/core/hle/service/ac_u.cpp26
-rw-r--r--src/core/hle/service/am/am.cpp2
-rw-r--r--src/core/hle/service/apt/apt.h15
-rw-r--r--src/core/hle/service/cfg/cfg.cpp4
-rw-r--r--src/core/hle/service/cfg/cfg.h13
-rw-r--r--src/core/hle/service/dsp_dsp.cpp195
-rw-r--r--src/core/hle/service/dsp_dsp.h19
-rw-r--r--src/core/hle/service/fs/archive.cpp1
-rw-r--r--src/core/hle/service/fs/fs_user.cpp2
-rw-r--r--src/core/hle/service/gsp_gpu.cpp70
-rw-r--r--src/core/hle/service/gsp_gpu.h1
-rw-r--r--src/core/hle/service/ndm/ndm.cpp197
-rw-r--r--src/core/hle/service/ndm/ndm.h216
-rw-r--r--src/core/hle/service/ndm/ndm_u.cpp34
-rw-r--r--src/core/hle/service/soc_u.cpp100
-rw-r--r--src/core/hle/service/y2r_u.cpp490
-rw-r--r--src/core/hle/service/y2r_u.h20
-rw-r--r--src/core/hle/svc.cpp8
-rw-r--r--src/core/hw/gpu.cpp327
-rw-r--r--src/core/hw/gpu.h4
-rw-r--r--src/core/hw/lcd.h2
-rw-r--r--src/core/loader/3dsx.cpp33
-rw-r--r--src/core/loader/3dsx.h9
-rw-r--r--src/core/loader/loader.cpp53
-rw-r--r--src/core/loader/loader.h57
-rw-r--r--src/core/loader/ncch.cpp24
-rw-r--r--src/core/loader/ncch.h7
-rw-r--r--src/core/memory.cpp140
-rw-r--r--src/core/memory.h16
-rw-r--r--src/core/settings.cpp5
-rw-r--r--src/core/settings.h7
-rw-r--r--src/core/tracer/recorder.cpp24
-rw-r--r--src/core/tracer/recorder.h1
-rw-r--r--src/video_core/CMakeLists.txt3
-rw-r--r--src/video_core/clipper.cpp13
-rw-r--r--src/video_core/command_processor.cpp163
-rw-r--r--src/video_core/debug_utils/debug_utils.cpp32
-rw-r--r--src/video_core/debug_utils/debug_utils.h64
-rw-r--r--src/video_core/pica.cpp5
-rw-r--r--src/video_core/pica.h16
-rw-r--r--src/video_core/pica_state.h7
-rw-r--r--src/video_core/pica_types.h1
-rw-r--r--src/video_core/primitive_assembly.cpp3
-rw-r--r--src/video_core/rasterizer.cpp112
-rw-r--r--src/video_core/rasterizer_interface.h31
-rw-r--r--src/video_core/renderer_base.cpp5
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp858
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h148
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp712
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h221
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp11
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp68
-rw-r--r--src/video_core/renderer_opengl/gl_state.h26
-rw-r--r--src/video_core/renderer_opengl/pica_to_gl.h7
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp149
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h47
-rw-r--r--src/video_core/shader/shader.cpp59
-rw-r--r--src/video_core/shader/shader.h10
-rw-r--r--src/video_core/shader/shader_interpreter.cpp12
-rw-r--r--src/video_core/shader/shader_interpreter.h4
-rw-r--r--src/video_core/shader/shader_jit_x64.cpp16
-rw-r--r--src/video_core/shader/shader_jit_x64.h5
-rw-r--r--src/video_core/swrasterizer.h12
-rw-r--r--src/video_core/utils.cpp36
-rw-r--r--src/video_core/utils.h27
-rw-r--r--src/video_core/vertex_loader.cpp140
-rw-r--r--src/video_core/vertex_loader.h33
-rw-r--r--src/video_core/video_core.cpp5
-rw-r--r--src/video_core/video_core.h1
144 files changed, 5033 insertions, 2037 deletions
diff --git a/.gitmodules b/.gitmodules
index 598e4c64d..059512902 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -7,3 +7,6 @@
7[submodule "nihstro"] 7[submodule "nihstro"]
8 path = externals/nihstro 8 path = externals/nihstro
9 url = https://github.com/neobrain/nihstro.git 9 url = https://github.com/neobrain/nihstro.git
10[submodule "soundtouch"]
11 path = externals/soundtouch
12 url = https://github.com/citra-emu/soundtouch.git
diff --git a/.travis-deps.sh b/.travis-deps.sh
index c7bb7e785..4a79feb70 100755
--- a/.travis-deps.sh
+++ b/.travis-deps.sh
@@ -9,7 +9,7 @@ if [ "$TRAVIS_OS_NAME" = "linux" -o -z "$TRAVIS_OS_NAME" ]; then
9 export CXX=g++-5 9 export CXX=g++-5
10 mkdir -p $HOME/.local 10 mkdir -p $HOME/.local
11 11
12 curl -L http://www.cmake.org/files/v2.8/cmake-2.8.11-Linux-i386.tar.gz \ 12 curl -L http://www.cmake.org/files/v3.1/cmake-3.1.0-Linux-i386.tar.gz \
13 | tar -xz -C $HOME/.local --strip-components=1 13 | tar -xz -C $HOME/.local --strip-components=1
14 14
15 ( 15 (
@@ -20,6 +20,7 @@ if [ "$TRAVIS_OS_NAME" = "linux" -o -z "$TRAVIS_OS_NAME" ]; then
20 ) 20 )
21elif [ "$TRAVIS_OS_NAME" = "osx" ]; then 21elif [ "$TRAVIS_OS_NAME" = "osx" ]; then
22 brew update > /dev/null # silence the very verbose output 22 brew update > /dev/null # silence the very verbose output
23 brew install qt5 sdl2 dylibbundler 23 brew unlink cmake
24 brew install cmake31 qt5 sdl2 dylibbundler
24 gem install xcpretty 25 gem install xcpretty
25fi 26fi
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3a0a161e7..8f2898973 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,6 @@
1# CMake 2.8.11 required for Qt5 settings to be applied automatically on 1# CMake 3.1 required for Qt5 settings to be applied automatically on
2# dependent libraries. 2# dependent libraries and IMPORTED targets.
3cmake_minimum_required(VERSION 2.8.11) 3cmake_minimum_required(VERSION 3.1)
4 4
5function(download_bundled_external remote_path lib_name prefix_var) 5function(download_bundled_external remote_path lib_name prefix_var)
6 set(prefix "${CMAKE_BINARY_DIR}/externals/${lib_name}") 6 set(prefix "${CMAKE_BINARY_DIR}/externals/${lib_name}")
@@ -65,8 +65,8 @@ endif()
65message(STATUS "Target architecture: ${ARCHITECTURE}") 65message(STATUS "Target architecture: ${ARCHITECTURE}")
66 66
67if (NOT MSVC) 67if (NOT MSVC)
68 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++1y -Wno-attributes -pthread") 68 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++1y -Wno-attributes")
69 set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread") 69 set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
70 70
71 if (ARCHITECTURE_x86_64) 71 if (ARCHITECTURE_x86_64)
72 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1") 72 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1")
@@ -135,6 +135,10 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/externals/cmake-modules")
135find_package(OpenGL REQUIRED) 135find_package(OpenGL REQUIRED)
136include_directories(${OPENGL_INCLUDE_DIR}) 136include_directories(${OPENGL_INCLUDE_DIR})
137 137
138# Prefer the -pthread flag on Linux.
139set (THREADS_PREFER_PTHREAD_FLAG ON)
140find_package(Threads REQUIRED)
141
138if (ENABLE_SDL2) 142if (ENABLE_SDL2)
139 if (CITRA_USE_BUNDLED_SDL2) 143 if (CITRA_USE_BUNDLED_SDL2)
140 # Detect toolchain and platform 144 # Detect toolchain and platform
@@ -148,12 +152,15 @@ if (ENABLE_SDL2)
148 download_bundled_external("sdl2/" ${SDL2_VER} SDL2_PREFIX) 152 download_bundled_external("sdl2/" ${SDL2_VER} SDL2_PREFIX)
149 endif() 153 endif()
150 154
155 set(SDL2_FOUND YES)
151 set(SDL2_INCLUDE_DIR "${SDL2_PREFIX}/include" CACHE PATH "Path to SDL2 headers") 156 set(SDL2_INCLUDE_DIR "${SDL2_PREFIX}/include" CACHE PATH "Path to SDL2 headers")
152 set(SDL2_LIBRARY "${SDL2_PREFIX}/lib/x64/SDL2.lib" CACHE PATH "Path to SDL2 library") 157 set(SDL2_LIBRARY "${SDL2_PREFIX}/lib/x64/SDL2.lib" CACHE PATH "Path to SDL2 library")
153 set(SDL2_DLL_DIR "${SDL2_PREFIX}/lib/x64/" CACHE PATH "Path to SDL2.dll") 158 set(SDL2_DLL_DIR "${SDL2_PREFIX}/lib/x64/" CACHE PATH "Path to SDL2.dll")
154 else() 159 else()
155 find_package(SDL2 REQUIRED) 160 find_package(SDL2 REQUIRED)
156 endif() 161 endif()
162else()
163 set(SDL2_FOUND NO)
157endif() 164endif()
158 165
159IF (APPLE) 166IF (APPLE)
@@ -245,6 +252,9 @@ if(ENABLE_QT)
245 include_directories(externals/qhexedit) 252 include_directories(externals/qhexedit)
246 add_subdirectory(externals/qhexedit) 253 add_subdirectory(externals/qhexedit)
247endif() 254endif()
255
256add_subdirectory(externals/soundtouch)
257
248add_subdirectory(src) 258add_subdirectory(src)
249 259
250# Install freedesktop.org metadata files, following those specifications: 260# Install freedesktop.org metadata files, following those specifications:
diff --git a/externals/boost b/externals/boost
Subproject d81b9269900ae183d0dc98403eea4c971590a80 Subproject 2dcb9d979665b6aabb1635c617973e02914e60e
diff --git a/externals/microprofile/microprofileui.h b/externals/microprofile/microprofileui.h
index eac1119a4..45bec8af6 100644
--- a/externals/microprofile/microprofileui.h
+++ b/externals/microprofile/microprofileui.h
@@ -879,7 +879,7 @@ void MicroProfileDrawDetailedBars(uint32_t nWidth, uint32_t nHeight, int nBaseY,
879 static int64_t nRefCpu = 0, nRefGpu = 0; 879 static int64_t nRefCpu = 0, nRefGpu = 0;
880 if(MicroProfileGetGpuTickReference(&nTickReferenceCpu, &nTickReferenceGpu)) 880 if(MicroProfileGetGpuTickReference(&nTickReferenceCpu, &nTickReferenceGpu))
881 { 881 {
882 if(0 == nRefCpu || abs(nRefCpu-nBaseTicksCpu) > abs(nTickReferenceCpu-nBaseTicksCpu)) 882 if(0 == nRefCpu || std::abs(nRefCpu-nBaseTicksCpu) > std::abs(nTickReferenceCpu-nBaseTicksCpu))
883 { 883 {
884 nRefCpu = nTickReferenceCpu; 884 nRefCpu = nTickReferenceCpu;
885 nRefGpu = nTickReferenceGpu; 885 nRefGpu = nTickReferenceGpu;
@@ -1230,7 +1230,12 @@ void MicroProfileDrawDetailedBars(uint32_t nWidth, uint32_t nHeight, int nBaseY,
1230 char ThreadName[MicroProfileThreadLog::THREAD_MAX_LEN + 16]; 1230 char ThreadName[MicroProfileThreadLog::THREAD_MAX_LEN + 16];
1231 const char* cLocal = MicroProfileIsLocalThread(nThreadId) ? "*": " "; 1231 const char* cLocal = MicroProfileIsLocalThread(nThreadId) ? "*": " ";
1232 1232
1233#if defined(WIN32)
1234 // nThreadId is 32-bit on Windows
1233 int nStrLen = snprintf(ThreadName, sizeof(ThreadName)-1, "%04x: %s%s", nThreadId, cLocal, i < nNumThreadsBase ? &S.Pool[i]->ThreadName[0] : MICROPROFILE_THREAD_NAME_FROM_ID(nThreadId) ); 1235 int nStrLen = snprintf(ThreadName, sizeof(ThreadName)-1, "%04x: %s%s", nThreadId, cLocal, i < nNumThreadsBase ? &S.Pool[i]->ThreadName[0] : MICROPROFILE_THREAD_NAME_FROM_ID(nThreadId) );
1236#else
1237 int nStrLen = snprintf(ThreadName, sizeof(ThreadName)-1, "%04llx: %s%s", nThreadId, cLocal, i < nNumThreadsBase ? &S.Pool[i]->ThreadName[0] : MICROPROFILE_THREAD_NAME_FROM_ID(nThreadId) );
1238#endif
1234 uint32_t nThreadColor = -1; 1239 uint32_t nThreadColor = -1;
1235 if(nThreadId == nContextSwitchHoverThreadAfter || nThreadId == nContextSwitchHoverThreadBefore) 1240 if(nThreadId == nContextSwitchHoverThreadAfter || nThreadId == nContextSwitchHoverThreadBefore)
1236 nThreadColor = UI.nHoverColorShared|0x906060; 1241 nThreadColor = UI.nHoverColorShared|0x906060;
diff --git a/externals/soundtouch b/externals/soundtouch
new file mode 160000
Subproject 5274ec4dec498bd88ccbcd28862a0f78a3b95ef
diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt
index 869da5e83..13b5e400e 100644
--- a/src/audio_core/CMakeLists.txt
+++ b/src/audio_core/CMakeLists.txt
@@ -4,6 +4,9 @@ set(SRCS
4 hle/dsp.cpp 4 hle/dsp.cpp
5 hle/filter.cpp 5 hle/filter.cpp
6 hle/pipe.cpp 6 hle/pipe.cpp
7 hle/source.cpp
8 interpolate.cpp
9 sink_details.cpp
7 ) 10 )
8 11
9set(HEADERS 12set(HEADERS
@@ -13,9 +16,27 @@ set(HEADERS
13 hle/dsp.h 16 hle/dsp.h
14 hle/filter.h 17 hle/filter.h
15 hle/pipe.h 18 hle/pipe.h
19 hle/source.h
20 interpolate.h
21 null_sink.h
16 sink.h 22 sink.h
23 sink_details.h
17 ) 24 )
18 25
26include_directories(../../externals/soundtouch/include)
27
28if(SDL2_FOUND)
29 set(SRCS ${SRCS} sdl2_sink.cpp)
30 set(HEADERS ${HEADERS} sdl2_sink.h)
31 include_directories(${SDL2_INCLUDE_DIR})
32endif()
33
19create_directory_groups(${SRCS} ${HEADERS}) 34create_directory_groups(${SRCS} ${HEADERS})
20 35
21add_library(audio_core STATIC ${SRCS} ${HEADERS}) \ No newline at end of file 36add_library(audio_core STATIC ${SRCS} ${HEADERS})
37target_link_libraries(audio_core SoundTouch)
38
39if(SDL2_FOUND)
40 target_link_libraries(audio_core ${SDL2_LIBRARY})
41 set_property(TARGET audio_core APPEND PROPERTY COMPILE_DEFINITIONS HAVE_SDL2)
42endif()
diff --git a/src/audio_core/audio_core.cpp b/src/audio_core/audio_core.cpp
index 894f46990..d42249ebd 100644
--- a/src/audio_core/audio_core.cpp
+++ b/src/audio_core/audio_core.cpp
@@ -2,8 +2,15 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <memory>
6#include <string>
7
5#include "audio_core/audio_core.h" 8#include "audio_core/audio_core.h"
6#include "audio_core/hle/dsp.h" 9#include "audio_core/hle/dsp.h"
10#include "audio_core/hle/pipe.h"
11#include "audio_core/null_sink.h"
12#include "audio_core/sink.h"
13#include "audio_core/sink_details.h"
7 14
8#include "core/core_timing.h" 15#include "core/core_timing.h"
9#include "core/hle/kernel/vm_manager.h" 16#include "core/hle/kernel/vm_manager.h"
@@ -17,17 +24,16 @@ static constexpr u64 audio_frame_ticks = 1310252ull; ///< Units: ARM11 cycles
17 24
18static void AudioTickCallback(u64 /*userdata*/, int cycles_late) { 25static void AudioTickCallback(u64 /*userdata*/, int cycles_late) {
19 if (DSP::HLE::Tick()) { 26 if (DSP::HLE::Tick()) {
20 // HACK: We're not signaling the interrups when they should be, but just firing them all off together. 27 // TODO(merry): Signal all the other interrupts as appropriate.
21 // It should be only (interrupt_id = 2, channel_id = 2) that's signalled here. 28 DSP_DSP::SignalPipeInterrupt(DSP::HLE::DspPipe::Audio);
22 // TODO(merry): Understand when the other interrupts are fired. 29 // HACK(merry): Added to prevent regressions. Will remove soon.
23 DSP_DSP::SignalAllInterrupts(); 30 DSP_DSP::SignalPipeInterrupt(DSP::HLE::DspPipe::Binary);
24 } 31 }
25 32
26 // Reschedule recurrent event 33 // Reschedule recurrent event
27 CoreTiming::ScheduleEvent(audio_frame_ticks - cycles_late, tick_event); 34 CoreTiming::ScheduleEvent(audio_frame_ticks - cycles_late, tick_event);
28} 35}
29 36
30/// Initialise Audio
31void Init() { 37void Init() {
32 DSP::HLE::Init(); 38 DSP::HLE::Init();
33 39
@@ -35,19 +41,39 @@ void Init() {
35 CoreTiming::ScheduleEvent(audio_frame_ticks, tick_event); 41 CoreTiming::ScheduleEvent(audio_frame_ticks, tick_event);
36} 42}
37 43
38/// Add DSP address spaces to Process's address space.
39void AddAddressSpace(Kernel::VMManager& address_space) { 44void AddAddressSpace(Kernel::VMManager& address_space) {
40 auto r0_vma = address_space.MapBackingMemory(DSP::HLE::region0_base, reinterpret_cast<u8*>(&DSP::HLE::g_region0), sizeof(DSP::HLE::SharedMemory), Kernel::MemoryState::IO).MoveFrom(); 45 auto r0_vma = address_space.MapBackingMemory(DSP::HLE::region0_base, reinterpret_cast<u8*>(&DSP::HLE::g_regions[0]), sizeof(DSP::HLE::SharedMemory), Kernel::MemoryState::IO).MoveFrom();
41 address_space.Reprotect(r0_vma, Kernel::VMAPermission::ReadWrite); 46 address_space.Reprotect(r0_vma, Kernel::VMAPermission::ReadWrite);
42 47
43 auto r1_vma = address_space.MapBackingMemory(DSP::HLE::region1_base, reinterpret_cast<u8*>(&DSP::HLE::g_region1), sizeof(DSP::HLE::SharedMemory), Kernel::MemoryState::IO).MoveFrom(); 48 auto r1_vma = address_space.MapBackingMemory(DSP::HLE::region1_base, reinterpret_cast<u8*>(&DSP::HLE::g_regions[1]), sizeof(DSP::HLE::SharedMemory), Kernel::MemoryState::IO).MoveFrom();
44 address_space.Reprotect(r1_vma, Kernel::VMAPermission::ReadWrite); 49 address_space.Reprotect(r1_vma, Kernel::VMAPermission::ReadWrite);
45} 50}
46 51
47/// Shutdown Audio 52void SelectSink(std::string sink_id) {
53 if (sink_id == "auto") {
54 // Auto-select.
55 // g_sink_details is ordered in terms of desirability, with the best choice at the front.
56 const auto& sink_detail = g_sink_details.front();
57 DSP::HLE::SetSink(sink_detail.factory());
58 return;
59 }
60
61 auto iter = std::find_if(g_sink_details.begin(), g_sink_details.end(), [sink_id](const auto& sink_detail) {
62 return sink_detail.id == sink_id;
63 });
64
65 if (iter == g_sink_details.end()) {
66 LOG_ERROR(Audio, "AudioCore::SelectSink given invalid sink_id");
67 DSP::HLE::SetSink(std::make_unique<NullSink>());
68 return;
69 }
70
71 DSP::HLE::SetSink(iter->factory());
72}
73
48void Shutdown() { 74void Shutdown() {
49 CoreTiming::UnscheduleEvent(tick_event, 0); 75 CoreTiming::UnscheduleEvent(tick_event, 0);
50 DSP::HLE::Shutdown(); 76 DSP::HLE::Shutdown();
51} 77}
52 78
53} //namespace 79} // namespace AudioCore
diff --git a/src/audio_core/audio_core.h b/src/audio_core/audio_core.h
index 64c330914..f618361f3 100644
--- a/src/audio_core/audio_core.h
+++ b/src/audio_core/audio_core.h
@@ -4,14 +4,14 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <string>
8
7namespace Kernel { 9namespace Kernel {
8class VMManager; 10class VMManager;
9} 11}
10 12
11namespace AudioCore { 13namespace AudioCore {
12 14
13constexpr int num_sources = 24;
14constexpr int samples_per_frame = 160; ///< Samples per audio frame at native sample rate
15constexpr int native_sample_rate = 32728; ///< 32kHz 15constexpr int native_sample_rate = 32728; ///< 32kHz
16 16
17/// Initialise Audio Core 17/// Initialise Audio Core
@@ -20,6 +20,9 @@ void Init();
20/// Add DSP address spaces to a Process. 20/// Add DSP address spaces to a Process.
21void AddAddressSpace(Kernel::VMManager& vm_manager); 21void AddAddressSpace(Kernel::VMManager& vm_manager);
22 22
23/// Select the sink to use based on sink id.
24void SelectSink(std::string sink_id);
25
23/// Shutdown Audio Core 26/// Shutdown Audio Core
24void Shutdown(); 27void Shutdown();
25 28
diff --git a/src/audio_core/hle/common.h b/src/audio_core/hle/common.h
index 37d441eb2..596b67eaf 100644
--- a/src/audio_core/hle/common.h
+++ b/src/audio_core/hle/common.h
@@ -7,18 +7,19 @@
7#include <algorithm> 7#include <algorithm>
8#include <array> 8#include <array>
9 9
10#include "audio_core/audio_core.h"
11
12#include "common/common_types.h" 10#include "common/common_types.h"
13 11
14namespace DSP { 12namespace DSP {
15namespace HLE { 13namespace HLE {
16 14
15constexpr int num_sources = 24;
16constexpr int samples_per_frame = 160; ///< Samples per audio frame at native sample rate
17
17/// The final output to the speakers is stereo. Preprocessing output in Source is also stereo. 18/// The final output to the speakers is stereo. Preprocessing output in Source is also stereo.
18using StereoFrame16 = std::array<std::array<s16, 2>, AudioCore::samples_per_frame>; 19using StereoFrame16 = std::array<std::array<s16, 2>, samples_per_frame>;
19 20
20/// The DSP is quadraphonic internally. 21/// The DSP is quadraphonic internally.
21using QuadFrame32 = std::array<std::array<s32, 4>, AudioCore::samples_per_frame>; 22using QuadFrame32 = std::array<std::array<s32, 4>, samples_per_frame>;
22 23
23/** 24/**
24 * This performs the filter operation defined by FilterT::ProcessSample on the frame in-place. 25 * This performs the filter operation defined by FilterT::ProcessSample on the frame in-place.
@@ -26,7 +27,7 @@ using QuadFrame32 = std::array<std::array<s32, 4>, AudioCore::samples_per_fram
26 */ 27 */
27template<typename FrameT, typename FilterT> 28template<typename FrameT, typename FilterT>
28void FilterFrame(FrameT& frame, FilterT& filter) { 29void FilterFrame(FrameT& frame, FilterT& filter) {
29 std::transform(frame.begin(), frame.end(), frame.begin(), [&filter](const typename FrameT::value_type& sample) { 30 std::transform(frame.begin(), frame.end(), frame.begin(), [&filter](const auto& sample) {
30 return filter.ProcessSample(sample); 31 return filter.ProcessSample(sample);
31 }); 32 });
32} 33}
diff --git a/src/audio_core/hle/dsp.cpp b/src/audio_core/hle/dsp.cpp
index c89356edc..0cdbdb06a 100644
--- a/src/audio_core/hle/dsp.cpp
+++ b/src/audio_core/hle/dsp.cpp
@@ -2,40 +2,81 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <array>
6#include <memory>
7
5#include "audio_core/hle/dsp.h" 8#include "audio_core/hle/dsp.h"
6#include "audio_core/hle/pipe.h" 9#include "audio_core/hle/pipe.h"
10#include "audio_core/hle/source.h"
11#include "audio_core/sink.h"
7 12
8namespace DSP { 13namespace DSP {
9namespace HLE { 14namespace HLE {
10 15
11SharedMemory g_region0; 16std::array<SharedMemory, 2> g_regions;
12SharedMemory g_region1; 17
18static size_t CurrentRegionIndex() {
19 // The region with the higher frame counter is chosen unless there is wraparound.
20 // This function only returns a 0 or 1.
21
22 if (g_regions[0].frame_counter == 0xFFFFu && g_regions[1].frame_counter != 0xFFFEu) {
23 // Wraparound has occured.
24 return 1;
25 }
26
27 if (g_regions[1].frame_counter == 0xFFFFu && g_regions[0].frame_counter != 0xFFFEu) {
28 // Wraparound has occured.
29 return 0;
30 }
31
32 return (g_regions[0].frame_counter > g_regions[1].frame_counter) ? 0 : 1;
33}
34
35static SharedMemory& ReadRegion() {
36 return g_regions[CurrentRegionIndex()];
37}
38
39static SharedMemory& WriteRegion() {
40 return g_regions[1 - CurrentRegionIndex()];
41}
42
43static std::array<Source, num_sources> sources = {
44 Source(0), Source(1), Source(2), Source(3), Source(4), Source(5),
45 Source(6), Source(7), Source(8), Source(9), Source(10), Source(11),
46 Source(12), Source(13), Source(14), Source(15), Source(16), Source(17),
47 Source(18), Source(19), Source(20), Source(21), Source(22), Source(23)
48};
49
50static std::unique_ptr<AudioCore::Sink> sink;
13 51
14void Init() { 52void Init() {
15 DSP::HLE::ResetPipes(); 53 DSP::HLE::ResetPipes();
54 for (auto& source : sources) {
55 source.Reset();
56 }
16} 57}
17 58
18void Shutdown() { 59void Shutdown() {
19} 60}
20 61
21bool Tick() { 62bool Tick() {
22 return true; 63 SharedMemory& read = ReadRegion();
23} 64 SharedMemory& write = WriteRegion();
24 65
25SharedMemory& CurrentRegion() { 66 std::array<QuadFrame32, 3> intermediate_mixes = {};
26 // The region with the higher frame counter is chosen unless there is wraparound.
27 67
28 if (g_region0.frame_counter == 0xFFFFu && g_region1.frame_counter != 0xFFFEu) { 68 for (size_t i = 0; i < num_sources; i++) {
29 // Wraparound has occured. 69 write.source_statuses.status[i] = sources[i].Tick(read.source_configurations.config[i], read.adpcm_coefficients.coeff[i]);
30 return g_region1; 70 for (size_t mix = 0; mix < 3; mix++) {
71 sources[i].MixInto(intermediate_mixes[mix], mix);
72 }
31 } 73 }
32 74
33 if (g_region1.frame_counter == 0xFFFFu && g_region0.frame_counter != 0xFFFEu) { 75 return true;
34 // Wraparound has occured. 76}
35 return g_region0;
36 }
37 77
38 return (g_region0.frame_counter > g_region1.frame_counter) ? g_region0 : g_region1; 78void SetSink(std::unique_ptr<AudioCore::Sink> sink_) {
79 sink = std::move(sink_);
39} 80}
40 81
41} // namespace HLE 82} // namespace HLE
diff --git a/src/audio_core/hle/dsp.h b/src/audio_core/hle/dsp.h
index c15ef0b7a..f6e53f68f 100644
--- a/src/audio_core/hle/dsp.h
+++ b/src/audio_core/hle/dsp.h
@@ -4,16 +4,22 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
7#include <cstddef> 8#include <cstddef>
9#include <memory>
8#include <type_traits> 10#include <type_traits>
9 11
10#include "audio_core/audio_core.h" 12#include "audio_core/hle/common.h"
11 13
12#include "common/bit_field.h" 14#include "common/bit_field.h"
13#include "common/common_funcs.h" 15#include "common/common_funcs.h"
14#include "common/common_types.h" 16#include "common/common_types.h"
15#include "common/swap.h" 17#include "common/swap.h"
16 18
19namespace AudioCore {
20class Sink;
21}
22
17namespace DSP { 23namespace DSP {
18namespace HLE { 24namespace HLE {
19 25
@@ -27,13 +33,8 @@ namespace HLE {
27// double-buffer. The frame counter is located as the very last u16 of each region and is incremented 33// double-buffer. The frame counter is located as the very last u16 of each region and is incremented
28// each audio tick. 34// each audio tick.
29 35
30struct SharedMemory;
31
32constexpr VAddr region0_base = 0x1FF50000; 36constexpr VAddr region0_base = 0x1FF50000;
33extern SharedMemory g_region0;
34
35constexpr VAddr region1_base = 0x1FF70000; 37constexpr VAddr region1_base = 0x1FF70000;
36extern SharedMemory g_region1;
37 38
38/** 39/**
39 * The DSP is native 16-bit. The DSP also appears to be big-endian. When reading 32-bit numbers from 40 * The DSP is native 16-bit. The DSP also appears to be big-endian. When reading 32-bit numbers from
@@ -164,9 +165,9 @@ struct SourceConfiguration {
164 float_le rate_multiplier; 165 float_le rate_multiplier;
165 166
166 enum class InterpolationMode : u8 { 167 enum class InterpolationMode : u8 {
167 None = 0, 168 Polyphase = 0,
168 Linear = 1, 169 Linear = 1,
169 Polyphase = 2 170 None = 2
170 }; 171 };
171 172
172 InterpolationMode interpolation_mode; 173 InterpolationMode interpolation_mode;
@@ -305,7 +306,7 @@ struct SourceConfiguration {
305 u16_le buffer_id; 306 u16_le buffer_id;
306 }; 307 };
307 308
308 Configuration config[AudioCore::num_sources]; 309 Configuration config[num_sources];
309}; 310};
310ASSERT_DSP_STRUCT(SourceConfiguration::Configuration, 192); 311ASSERT_DSP_STRUCT(SourceConfiguration::Configuration, 192);
311ASSERT_DSP_STRUCT(SourceConfiguration::Configuration::Buffer, 20); 312ASSERT_DSP_STRUCT(SourceConfiguration::Configuration::Buffer, 20);
@@ -313,14 +314,14 @@ ASSERT_DSP_STRUCT(SourceConfiguration::Configuration::Buffer, 20);
313struct SourceStatus { 314struct SourceStatus {
314 struct Status { 315 struct Status {
315 u8 is_enabled; ///< Is this channel enabled? (Doesn't have to be playing anything.) 316 u8 is_enabled; ///< Is this channel enabled? (Doesn't have to be playing anything.)
316 u8 previous_buffer_id_dirty; ///< Non-zero when previous_buffer_id changes 317 u8 current_buffer_id_dirty; ///< Non-zero when current_buffer_id changes
317 u16_le sync; ///< Is set by the DSP to the value of SourceConfiguration::sync 318 u16_le sync; ///< Is set by the DSP to the value of SourceConfiguration::sync
318 u32_dsp buffer_position; ///< Number of samples into the current buffer 319 u32_dsp buffer_position; ///< Number of samples into the current buffer
319 u16_le previous_buffer_id; ///< Updated when a buffer finishes playing 320 u16_le current_buffer_id; ///< Updated when a buffer finishes playing
320 INSERT_PADDING_DSPWORDS(1); 321 INSERT_PADDING_DSPWORDS(1);
321 }; 322 };
322 323
323 Status status[AudioCore::num_sources]; 324 Status status[num_sources];
324}; 325};
325ASSERT_DSP_STRUCT(SourceStatus::Status, 12); 326ASSERT_DSP_STRUCT(SourceStatus::Status, 12);
326 327
@@ -413,7 +414,7 @@ ASSERT_DSP_STRUCT(DspConfiguration::ReverbEffect, 52);
413struct AdpcmCoefficients { 414struct AdpcmCoefficients {
414 /// Coefficients are signed fixed point with 11 fractional bits. 415 /// Coefficients are signed fixed point with 11 fractional bits.
415 /// Each source has 16 coefficients associated with it. 416 /// Each source has 16 coefficients associated with it.
416 s16_le coeff[AudioCore::num_sources][16]; 417 s16_le coeff[num_sources][16];
417}; 418};
418ASSERT_DSP_STRUCT(AdpcmCoefficients, 768); 419ASSERT_DSP_STRUCT(AdpcmCoefficients, 768);
419 420
@@ -427,7 +428,7 @@ ASSERT_DSP_STRUCT(DspStatus, 32);
427/// Final mixed output in PCM16 stereo format, what you hear out of the speakers. 428/// Final mixed output in PCM16 stereo format, what you hear out of the speakers.
428/// When the application writes to this region it has no effect. 429/// When the application writes to this region it has no effect.
429struct FinalMixSamples { 430struct FinalMixSamples {
430 s16_le pcm16[2 * AudioCore::samples_per_frame]; 431 s16_le pcm16[2 * samples_per_frame];
431}; 432};
432ASSERT_DSP_STRUCT(FinalMixSamples, 640); 433ASSERT_DSP_STRUCT(FinalMixSamples, 640);
433 434
@@ -437,7 +438,7 @@ ASSERT_DSP_STRUCT(FinalMixSamples, 640);
437/// Values that exceed s16 range will be clipped by the DSP after further processing. 438/// Values that exceed s16 range will be clipped by the DSP after further processing.
438struct IntermediateMixSamples { 439struct IntermediateMixSamples {
439 struct Samples { 440 struct Samples {
440 s32_le pcm32[4][AudioCore::samples_per_frame]; ///< Little-endian as opposed to DSP middle-endian. 441 s32_le pcm32[4][samples_per_frame]; ///< Little-endian as opposed to DSP middle-endian.
441 }; 442 };
442 443
443 Samples mix1; 444 Samples mix1;
@@ -502,6 +503,8 @@ struct SharedMemory {
502}; 503};
503ASSERT_DSP_STRUCT(SharedMemory, 0x8000); 504ASSERT_DSP_STRUCT(SharedMemory, 0x8000);
504 505
506extern std::array<SharedMemory, 2> g_regions;
507
505// Structures must have an offset that is a multiple of two. 508// Structures must have an offset that is a multiple of two.
506static_assert(offsetof(SharedMemory, frame_counter) % 2 == 0, "Structures in DSP::HLE::SharedMemory must be 2-byte aligned"); 509static_assert(offsetof(SharedMemory, frame_counter) % 2 == 0, "Structures in DSP::HLE::SharedMemory must be 2-byte aligned");
507static_assert(offsetof(SharedMemory, source_configurations) % 2 == 0, "Structures in DSP::HLE::SharedMemory must be 2-byte aligned"); 510static_assert(offsetof(SharedMemory, source_configurations) % 2 == 0, "Structures in DSP::HLE::SharedMemory must be 2-byte aligned");
@@ -535,8 +538,11 @@ void Shutdown();
535 */ 538 */
536bool Tick(); 539bool Tick();
537 540
538/// Returns a mutable reference to the current region. Current region is selected based on the frame counter. 541/**
539SharedMemory& CurrentRegion(); 542 * Set the output sink. This must be called before calling Tick().
543 * @param sink The sink to which audio will be output to.
544 */
545void SetSink(std::unique_ptr<AudioCore::Sink> sink);
540 546
541} // namespace HLE 547} // namespace HLE
542} // namespace DSP 548} // namespace DSP
diff --git a/src/audio_core/hle/filter.h b/src/audio_core/hle/filter.h
index 75738f600..43d2035cd 100644
--- a/src/audio_core/hle/filter.h
+++ b/src/audio_core/hle/filter.h
@@ -16,6 +16,7 @@ namespace HLE {
16 16
17/// Preprocessing filters. There is an independent set of filters for each Source. 17/// Preprocessing filters. There is an independent set of filters for each Source.
18class SourceFilters final { 18class SourceFilters final {
19public:
19 SourceFilters() { Reset(); } 20 SourceFilters() { Reset(); }
20 21
21 /// Reset internal state. 22 /// Reset internal state.
diff --git a/src/audio_core/hle/pipe.cpp b/src/audio_core/hle/pipe.cpp
index 9381883b4..44dff1345 100644
--- a/src/audio_core/hle/pipe.cpp
+++ b/src/audio_core/hle/pipe.cpp
@@ -12,12 +12,14 @@
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "common/logging/log.h" 13#include "common/logging/log.h"
14 14
15#include "core/hle/service/dsp_dsp.h"
16
15namespace DSP { 17namespace DSP {
16namespace HLE { 18namespace HLE {
17 19
18static DspState dsp_state = DspState::Off; 20static DspState dsp_state = DspState::Off;
19 21
20static std::array<std::vector<u8>, static_cast<size_t>(DspPipe::DspPipe_MAX)> pipe_data; 22static std::array<std::vector<u8>, NUM_DSP_PIPE> pipe_data;
21 23
22void ResetPipes() { 24void ResetPipes() {
23 for (auto& data : pipe_data) { 25 for (auto& data : pipe_data) {
@@ -27,17 +29,24 @@ void ResetPipes() {
27} 29}
28 30
29std::vector<u8> PipeRead(DspPipe pipe_number, u32 length) { 31std::vector<u8> PipeRead(DspPipe pipe_number, u32 length) {
30 if (pipe_number >= DspPipe::DspPipe_MAX) { 32 const size_t pipe_index = static_cast<size_t>(pipe_number);
31 LOG_ERROR(Audio_DSP, "pipe_number = %u invalid", pipe_number); 33
34 if (pipe_index >= NUM_DSP_PIPE) {
35 LOG_ERROR(Audio_DSP, "pipe_number = %zu invalid", pipe_index);
32 return {}; 36 return {};
33 } 37 }
34 38
35 std::vector<u8>& data = pipe_data[static_cast<size_t>(pipe_number)]; 39 if (length > UINT16_MAX) { // Can only read at most UINT16_MAX from the pipe
40 LOG_ERROR(Audio_DSP, "length of %u greater than max of %u", length, UINT16_MAX);
41 return {};
42 }
43
44 std::vector<u8>& data = pipe_data[pipe_index];
36 45
37 if (length > data.size()) { 46 if (length > data.size()) {
38 LOG_WARNING(Audio_DSP, "pipe_number = %u is out of data, application requested read of %u but %zu remain", 47 LOG_WARNING(Audio_DSP, "pipe_number = %zu is out of data, application requested read of %u but %zu remain",
39 pipe_number, length, data.size()); 48 pipe_index, length, data.size());
40 length = data.size(); 49 length = static_cast<u32>(data.size());
41 } 50 }
42 51
43 if (length == 0) 52 if (length == 0)
@@ -49,16 +58,20 @@ std::vector<u8> PipeRead(DspPipe pipe_number, u32 length) {
49} 58}
50 59
51size_t GetPipeReadableSize(DspPipe pipe_number) { 60size_t GetPipeReadableSize(DspPipe pipe_number) {
52 if (pipe_number >= DspPipe::DspPipe_MAX) { 61 const size_t pipe_index = static_cast<size_t>(pipe_number);
53 LOG_ERROR(Audio_DSP, "pipe_number = %u invalid", pipe_number); 62
63 if (pipe_index >= NUM_DSP_PIPE) {
64 LOG_ERROR(Audio_DSP, "pipe_number = %zu invalid", pipe_index);
54 return 0; 65 return 0;
55 } 66 }
56 67
57 return pipe_data[static_cast<size_t>(pipe_number)].size(); 68 return pipe_data[pipe_index].size();
58} 69}
59 70
60static void WriteU16(DspPipe pipe_number, u16 value) { 71static void WriteU16(DspPipe pipe_number, u16 value) {
61 std::vector<u8>& data = pipe_data[static_cast<size_t>(pipe_number)]; 72 const size_t pipe_index = static_cast<size_t>(pipe_number);
73
74 std::vector<u8>& data = pipe_data.at(pipe_index);
62 // Little endian 75 // Little endian
63 data.emplace_back(value & 0xFF); 76 data.emplace_back(value & 0xFF);
64 data.emplace_back(value >> 8); 77 data.emplace_back(value >> 8);
@@ -86,11 +99,13 @@ static void AudioPipeWriteStructAddresses() {
86 }; 99 };
87 100
88 // Begin with a u16 denoting the number of structs. 101 // Begin with a u16 denoting the number of structs.
89 WriteU16(DspPipe::Audio, struct_addresses.size()); 102 WriteU16(DspPipe::Audio, static_cast<u16>(struct_addresses.size()));
90 // Then write the struct addresses. 103 // Then write the struct addresses.
91 for (u16 addr : struct_addresses) { 104 for (u16 addr : struct_addresses) {
92 WriteU16(DspPipe::Audio, addr); 105 WriteU16(DspPipe::Audio, addr);
93 } 106 }
107 // Signal that we have data on this pipe.
108 DSP_DSP::SignalPipeInterrupt(DspPipe::Audio);
94} 109}
95 110
96void PipeWrite(DspPipe pipe_number, const std::vector<u8>& buffer) { 111void PipeWrite(DspPipe pipe_number, const std::vector<u8>& buffer) {
@@ -145,7 +160,7 @@ void PipeWrite(DspPipe pipe_number, const std::vector<u8>& buffer) {
145 return; 160 return;
146 } 161 }
147 default: 162 default:
148 LOG_CRITICAL(Audio_DSP, "pipe_number = %u unimplemented", pipe_number); 163 LOG_CRITICAL(Audio_DSP, "pipe_number = %zu unimplemented", static_cast<size_t>(pipe_number));
149 UNIMPLEMENTED(); 164 UNIMPLEMENTED();
150 return; 165 return;
151 } 166 }
diff --git a/src/audio_core/hle/pipe.h b/src/audio_core/hle/pipe.h
index 382d35e87..b714c0496 100644
--- a/src/audio_core/hle/pipe.h
+++ b/src/audio_core/hle/pipe.h
@@ -19,15 +19,19 @@ enum class DspPipe {
19 Debug = 0, 19 Debug = 0,
20 Dma = 1, 20 Dma = 1,
21 Audio = 2, 21 Audio = 2,
22 Binary = 3, 22 Binary = 3
23 DspPipe_MAX
24}; 23};
24constexpr size_t NUM_DSP_PIPE = 8;
25 25
26/** 26/**
27 * Read a DSP pipe. 27 * Reads `length` bytes from the DSP pipe identified with `pipe_number`.
28 * @param pipe_number The Pipe ID 28 * @note Can read up to the maximum value of a u16 in bytes (65,535).
29 * @param length How much data to request. 29 * @note IF an error is encoutered with either an invalid `pipe_number` or `length` value, an empty vector will be returned.
30 * @return The data read from the pipe. The size of this vector can be less than the length requested. 30 * @note IF `length` is set to 0, an empty vector will be returned.
31 * @note IF `length` is greater than the amount of data available, this function will only read the available amount.
32 * @param pipe_number a `DspPipe`
33 * @param length the number of bytes to read. The max is 65,535 (max of u16).
34 * @returns a vector of bytes from the specified pipe. On error, will be empty.
31 */ 35 */
32std::vector<u8> PipeRead(DspPipe pipe_number, u32 length); 36std::vector<u8> PipeRead(DspPipe pipe_number, u32 length);
33 37
diff --git a/src/audio_core/hle/source.cpp b/src/audio_core/hle/source.cpp
new file mode 100644
index 000000000..daaf6e3f3
--- /dev/null
+++ b/src/audio_core/hle/source.cpp
@@ -0,0 +1,320 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7
8#include "audio_core/codec.h"
9#include "audio_core/hle/common.h"
10#include "audio_core/hle/source.h"
11#include "audio_core/interpolate.h"
12
13#include "common/assert.h"
14#include "common/logging/log.h"
15
16#include "core/memory.h"
17
18namespace DSP {
19namespace HLE {
20
21SourceStatus::Status Source::Tick(SourceConfiguration::Configuration& config, const s16_le (&adpcm_coeffs)[16]) {
22 ParseConfig(config, adpcm_coeffs);
23
24 if (state.enabled) {
25 GenerateFrame();
26 }
27
28 return GetCurrentStatus();
29}
30
31void Source::MixInto(QuadFrame32& dest, size_t intermediate_mix_id) const {
32 if (!state.enabled)
33 return;
34
35 const std::array<float, 4>& gains = state.gain.at(intermediate_mix_id);
36 for (size_t samplei = 0; samplei < samples_per_frame; samplei++) {
37 // Conversion from stereo (current_frame) to quadraphonic (dest) occurs here.
38 dest[samplei][0] += static_cast<s32>(gains[0] * current_frame[samplei][0]);
39 dest[samplei][1] += static_cast<s32>(gains[1] * current_frame[samplei][1]);
40 dest[samplei][2] += static_cast<s32>(gains[2] * current_frame[samplei][0]);
41 dest[samplei][3] += static_cast<s32>(gains[3] * current_frame[samplei][1]);
42 }
43}
44
45void Source::Reset() {
46 current_frame.fill({});
47 state = {};
48}
49
50void Source::ParseConfig(SourceConfiguration::Configuration& config, const s16_le (&adpcm_coeffs)[16]) {
51 if (!config.dirty_raw) {
52 return;
53 }
54
55 if (config.reset_flag) {
56 config.reset_flag.Assign(0);
57 Reset();
58 LOG_TRACE(Audio_DSP, "source_id=%zu reset", source_id);
59 }
60
61 if (config.partial_reset_flag) {
62 config.partial_reset_flag.Assign(0);
63 state.input_queue = std::priority_queue<Buffer, std::vector<Buffer>, BufferOrder>{};
64 LOG_TRACE(Audio_DSP, "source_id=%zu partial_reset", source_id);
65 }
66
67 if (config.enable_dirty) {
68 config.enable_dirty.Assign(0);
69 state.enabled = config.enable != 0;
70 LOG_TRACE(Audio_DSP, "source_id=%zu enable=%d", source_id, state.enabled);
71 }
72
73 if (config.sync_dirty) {
74 config.sync_dirty.Assign(0);
75 state.sync = config.sync;
76 LOG_TRACE(Audio_DSP, "source_id=%zu sync=%u", source_id, state.sync);
77 }
78
79 if (config.rate_multiplier_dirty) {
80 config.rate_multiplier_dirty.Assign(0);
81 state.rate_multiplier = config.rate_multiplier;
82 LOG_TRACE(Audio_DSP, "source_id=%zu rate=%f", source_id, state.rate_multiplier);
83
84 if (state.rate_multiplier <= 0) {
85 LOG_ERROR(Audio_DSP, "Was given an invalid rate multiplier: source_id=%zu rate=%f", source_id, state.rate_multiplier);
86 state.rate_multiplier = 1.0f;
87 // Note: Actual firmware starts producing garbage if this occurs.
88 }
89 }
90
91 if (config.adpcm_coefficients_dirty) {
92 config.adpcm_coefficients_dirty.Assign(0);
93 std::transform(adpcm_coeffs, adpcm_coeffs + state.adpcm_coeffs.size(), state.adpcm_coeffs.begin(),
94 [](const auto& coeff) { return static_cast<s16>(coeff); });
95 LOG_TRACE(Audio_DSP, "source_id=%zu adpcm update", source_id);
96 }
97
98 if (config.gain_0_dirty) {
99 config.gain_0_dirty.Assign(0);
100 std::transform(config.gain[0], config.gain[0] + state.gain[0].size(), state.gain[0].begin(),
101 [](const auto& coeff) { return static_cast<float>(coeff); });
102 LOG_TRACE(Audio_DSP, "source_id=%zu gain 0 update", source_id);
103 }
104
105 if (config.gain_1_dirty) {
106 config.gain_1_dirty.Assign(0);
107 std::transform(config.gain[1], config.gain[1] + state.gain[1].size(), state.gain[1].begin(),
108 [](const auto& coeff) { return static_cast<float>(coeff); });
109 LOG_TRACE(Audio_DSP, "source_id=%zu gain 1 update", source_id);
110 }
111
112 if (config.gain_2_dirty) {
113 config.gain_2_dirty.Assign(0);
114 std::transform(config.gain[2], config.gain[2] + state.gain[2].size(), state.gain[2].begin(),
115 [](const auto& coeff) { return static_cast<float>(coeff); });
116 LOG_TRACE(Audio_DSP, "source_id=%zu gain 2 update", source_id);
117 }
118
119 if (config.filters_enabled_dirty) {
120 config.filters_enabled_dirty.Assign(0);
121 state.filters.Enable(config.simple_filter_enabled.ToBool(), config.biquad_filter_enabled.ToBool());
122 LOG_TRACE(Audio_DSP, "source_id=%zu enable_simple=%hu enable_biquad=%hu",
123 source_id, config.simple_filter_enabled.Value(), config.biquad_filter_enabled.Value());
124 }
125
126 if (config.simple_filter_dirty) {
127 config.simple_filter_dirty.Assign(0);
128 state.filters.Configure(config.simple_filter);
129 LOG_TRACE(Audio_DSP, "source_id=%zu simple filter update");
130 }
131
132 if (config.biquad_filter_dirty) {
133 config.biquad_filter_dirty.Assign(0);
134 state.filters.Configure(config.biquad_filter);
135 LOG_TRACE(Audio_DSP, "source_id=%zu biquad filter update");
136 }
137
138 if (config.interpolation_dirty) {
139 config.interpolation_dirty.Assign(0);
140 state.interpolation_mode = config.interpolation_mode;
141 LOG_TRACE(Audio_DSP, "source_id=%zu interpolation_mode=%zu", source_id, static_cast<size_t>(state.interpolation_mode));
142 }
143
144 if (config.format_dirty || config.embedded_buffer_dirty) {
145 config.format_dirty.Assign(0);
146 state.format = config.format;
147 LOG_TRACE(Audio_DSP, "source_id=%zu format=%zu", source_id, static_cast<size_t>(state.format));
148 }
149
150 if (config.mono_or_stereo_dirty || config.embedded_buffer_dirty) {
151 config.mono_or_stereo_dirty.Assign(0);
152 state.mono_or_stereo = config.mono_or_stereo;
153 LOG_TRACE(Audio_DSP, "source_id=%zu mono_or_stereo=%zu", source_id, static_cast<size_t>(state.mono_or_stereo));
154 }
155
156 if (config.embedded_buffer_dirty) {
157 config.embedded_buffer_dirty.Assign(0);
158 state.input_queue.emplace(Buffer{
159 config.physical_address,
160 config.length,
161 static_cast<u8>(config.adpcm_ps),
162 { config.adpcm_yn[0], config.adpcm_yn[1] },
163 config.adpcm_dirty.ToBool(),
164 config.is_looping.ToBool(),
165 config.buffer_id,
166 state.mono_or_stereo,
167 state.format,
168 false
169 });
170 LOG_TRACE(Audio_DSP, "enqueuing embedded addr=0x%08x len=%u id=%hu", config.physical_address, config.length, config.buffer_id);
171 }
172
173 if (config.buffer_queue_dirty) {
174 config.buffer_queue_dirty.Assign(0);
175 for (size_t i = 0; i < 4; i++) {
176 if (config.buffers_dirty & (1 << i)) {
177 const auto& b = config.buffers[i];
178 state.input_queue.emplace(Buffer{
179 b.physical_address,
180 b.length,
181 static_cast<u8>(b.adpcm_ps),
182 { b.adpcm_yn[0], b.adpcm_yn[1] },
183 b.adpcm_dirty != 0,
184 b.is_looping != 0,
185 b.buffer_id,
186 state.mono_or_stereo,
187 state.format,
188 true
189 });
190 LOG_TRACE(Audio_DSP, "enqueuing queued %zu addr=0x%08x len=%u id=%hu", i, b.physical_address, b.length, b.buffer_id);
191 }
192 }
193 config.buffers_dirty = 0;
194 }
195
196 if (config.dirty_raw) {
197 LOG_DEBUG(Audio_DSP, "source_id=%zu remaining_dirty=%x", source_id, config.dirty_raw);
198 }
199
200 config.dirty_raw = 0;
201}
202
203void Source::GenerateFrame() {
204 current_frame.fill({});
205
206 if (state.current_buffer.empty() && !DequeueBuffer()) {
207 state.enabled = false;
208 state.buffer_update = true;
209 state.current_buffer_id = 0;
210 return;
211 }
212
213 size_t frame_position = 0;
214
215 state.current_sample_number = state.next_sample_number;
216 while (frame_position < current_frame.size()) {
217 if (state.current_buffer.empty() && !DequeueBuffer()) {
218 break;
219 }
220
221 const size_t size_to_copy = std::min(state.current_buffer.size(), current_frame.size() - frame_position);
222
223 std::copy(state.current_buffer.begin(), state.current_buffer.begin() + size_to_copy, current_frame.begin() + frame_position);
224 state.current_buffer.erase(state.current_buffer.begin(), state.current_buffer.begin() + size_to_copy);
225
226 frame_position += size_to_copy;
227 state.next_sample_number += static_cast<u32>(size_to_copy);
228 }
229
230 state.filters.ProcessFrame(current_frame);
231}
232
233
234bool Source::DequeueBuffer() {
235 ASSERT_MSG(state.current_buffer.empty(), "Shouldn't dequeue; we still have data in current_buffer");
236
237 if (state.input_queue.empty())
238 return false;
239
240 const Buffer buf = state.input_queue.top();
241 state.input_queue.pop();
242
243 if (buf.adpcm_dirty) {
244 state.adpcm_state.yn1 = buf.adpcm_yn[0];
245 state.adpcm_state.yn2 = buf.adpcm_yn[1];
246 }
247
248 if (buf.is_looping) {
249 LOG_ERROR(Audio_DSP, "Looped buffers are unimplemented at the moment");
250 }
251
252 const u8* const memory = Memory::GetPhysicalPointer(buf.physical_address);
253 if (memory) {
254 const unsigned num_channels = buf.mono_or_stereo == MonoOrStereo::Stereo ? 2 : 1;
255 switch (buf.format) {
256 case Format::PCM8:
257 state.current_buffer = Codec::DecodePCM8(num_channels, memory, buf.length);
258 break;
259 case Format::PCM16:
260 state.current_buffer = Codec::DecodePCM16(num_channels, memory, buf.length);
261 break;
262 case Format::ADPCM:
263 DEBUG_ASSERT(num_channels == 1);
264 state.current_buffer = Codec::DecodeADPCM(memory, buf.length, state.adpcm_coeffs, state.adpcm_state);
265 break;
266 default:
267 UNIMPLEMENTED();
268 break;
269 }
270 } else {
271 LOG_WARNING(Audio_DSP, "source_id=%zu buffer_id=%hu length=%u: Invalid physical address 0x%08X",
272 source_id, buf.buffer_id, buf.length, buf.physical_address);
273 state.current_buffer.clear();
274 return true;
275 }
276
277 switch (state.interpolation_mode) {
278 case InterpolationMode::None:
279 state.current_buffer = AudioInterp::None(state.interp_state, state.current_buffer, state.rate_multiplier);
280 break;
281 case InterpolationMode::Linear:
282 state.current_buffer = AudioInterp::Linear(state.interp_state, state.current_buffer, state.rate_multiplier);
283 break;
284 case InterpolationMode::Polyphase:
285 // TODO(merry): Implement polyphase interpolation
286 state.current_buffer = AudioInterp::Linear(state.interp_state, state.current_buffer, state.rate_multiplier);
287 break;
288 default:
289 UNIMPLEMENTED();
290 break;
291 }
292
293 state.current_sample_number = 0;
294 state.next_sample_number = 0;
295 state.current_buffer_id = buf.buffer_id;
296 state.buffer_update = buf.from_queue;
297
298 LOG_TRACE(Audio_DSP, "source_id=%zu buffer_id=%hu from_queue=%s current_buffer.size()=%zu",
299 source_id, buf.buffer_id, buf.from_queue ? "true" : "false", state.current_buffer.size());
300 return true;
301}
302
303SourceStatus::Status Source::GetCurrentStatus() {
304 SourceStatus::Status ret;
305
306 // Applications depend on the correct emulation of
307 // current_buffer_id_dirty and current_buffer_id to synchronise
308 // audio with video.
309 ret.is_enabled = state.enabled;
310 ret.current_buffer_id_dirty = state.buffer_update ? 1 : 0;
311 state.buffer_update = false;
312 ret.current_buffer_id = state.current_buffer_id;
313 ret.buffer_position = state.current_sample_number;
314 ret.sync = state.sync;
315
316 return ret;
317}
318
319} // namespace HLE
320} // namespace DSP
diff --git a/src/audio_core/hle/source.h b/src/audio_core/hle/source.h
new file mode 100644
index 000000000..7ee08d424
--- /dev/null
+++ b/src/audio_core/hle/source.h
@@ -0,0 +1,144 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <queue>
9#include <vector>
10
11#include "audio_core/codec.h"
12#include "audio_core/hle/common.h"
13#include "audio_core/hle/dsp.h"
14#include "audio_core/hle/filter.h"
15#include "audio_core/interpolate.h"
16
17#include "common/common_types.h"
18
19namespace DSP {
20namespace HLE {
21
22/**
23 * This module performs:
24 * - Buffer management
25 * - Decoding of buffers
26 * - Buffer resampling and interpolation
27 * - Per-source filtering (SimpleFilter, BiquadFilter)
28 * - Per-source gain
29 * - Other per-source processing
30 */
31class Source final {
32public:
33 explicit Source(size_t source_id_) : source_id(source_id_) {
34 Reset();
35 }
36
37 /// Resets internal state.
38 void Reset();
39
40 /**
41 * This is called once every audio frame. This performs per-source processing every frame.
42 * @param config The new configuration we've got for this Source from the application.
43 * @param adpcm_coeffs ADPCM coefficients to use if config tells us to use them (may contain invalid values otherwise).
44 * @return The current status of this Source. This is given back to the emulated application via SharedMemory.
45 */
46 SourceStatus::Status Tick(SourceConfiguration::Configuration& config, const s16_le (&adpcm_coeffs)[16]);
47
48 /**
49 * Mix this source's output into dest, using the gains for the `intermediate_mix_id`-th intermediate mixer.
50 * @param dest The QuadFrame32 to mix into.
51 * @param intermediate_mix_id The id of the intermediate mix whose gains we are using.
52 */
53 void MixInto(QuadFrame32& dest, size_t intermediate_mix_id) const;
54
55private:
56 const size_t source_id;
57 StereoFrame16 current_frame;
58
59 using Format = SourceConfiguration::Configuration::Format;
60 using InterpolationMode = SourceConfiguration::Configuration::InterpolationMode;
61 using MonoOrStereo = SourceConfiguration::Configuration::MonoOrStereo;
62
63 /// Internal representation of a buffer for our buffer queue
64 struct Buffer {
65 PAddr physical_address;
66 u32 length;
67 u8 adpcm_ps;
68 std::array<u16, 2> adpcm_yn;
69 bool adpcm_dirty;
70 bool is_looping;
71 u16 buffer_id;
72
73 MonoOrStereo mono_or_stereo;
74 Format format;
75
76 bool from_queue;
77 };
78
79 struct BufferOrder {
80 bool operator() (const Buffer& a, const Buffer& b) const {
81 // Lower buffer_id comes first.
82 return a.buffer_id > b.buffer_id;
83 }
84 };
85
86 struct {
87
88 // State variables
89
90 bool enabled = false;
91 u16 sync = 0;
92
93 // Mixing
94
95 std::array<std::array<float, 4>, 3> gain = {};
96
97 // Buffer queue
98
99 std::priority_queue<Buffer, std::vector<Buffer>, BufferOrder> input_queue;
100 MonoOrStereo mono_or_stereo = MonoOrStereo::Mono;
101 Format format = Format::ADPCM;
102
103 // Current buffer
104
105 u32 current_sample_number = 0;
106 u32 next_sample_number = 0;
107 std::vector<std::array<s16, 2>> current_buffer;
108
109 // buffer_id state
110
111 bool buffer_update = false;
112 u32 current_buffer_id = 0;
113
114 // Decoding state
115
116 std::array<s16, 16> adpcm_coeffs = {};
117 Codec::ADPCMState adpcm_state = {};
118
119 // Resampling state
120
121 float rate_multiplier = 1.0;
122 InterpolationMode interpolation_mode = InterpolationMode::Polyphase;
123 AudioInterp::State interp_state = {};
124
125 // Filter state
126
127 SourceFilters filters;
128
129 } state;
130
131 // Internal functions
132
133 /// INTERNAL: Update our internal state based on the current config.
134 void ParseConfig(SourceConfiguration::Configuration& config, const s16_le (&adpcm_coeffs)[16]);
135 /// INTERNAL: Generate the current audio output for this frame based on our internal state.
136 void GenerateFrame();
137 /// INTERNAL: Dequeues a buffer and does preprocessing on it (decoding, resampling). Puts it into current_buffer.
138 bool DequeueBuffer();
139 /// INTERNAL: Generates a SourceStatus::Status based on our internal state.
140 SourceStatus::Status GetCurrentStatus();
141};
142
143} // namespace HLE
144} // namespace DSP
diff --git a/src/audio_core/interpolate.cpp b/src/audio_core/interpolate.cpp
new file mode 100644
index 000000000..fcd3aa066
--- /dev/null
+++ b/src/audio_core/interpolate.cpp
@@ -0,0 +1,85 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "audio_core/interpolate.h"
6
7#include "common/assert.h"
8#include "common/math_util.h"
9
10namespace AudioInterp {
11
12// Calculations are done in fixed point with 24 fractional bits.
13// (This is not verified. This was chosen for minimal error.)
14constexpr u64 scale_factor = 1 << 24;
15constexpr u64 scale_mask = scale_factor - 1;
16
17/// Here we step over the input in steps of rate_multiplier, until we consume all of the input.
18/// Three adjacent samples are passed to fn each step.
19template <typename Function>
20static StereoBuffer16 StepOverSamples(State& state, const StereoBuffer16& input, float rate_multiplier, Function fn) {
21 ASSERT(rate_multiplier > 0);
22
23 if (input.size() < 2)
24 return {};
25
26 StereoBuffer16 output;
27 output.reserve(static_cast<size_t>(input.size() / rate_multiplier));
28
29 u64 step_size = static_cast<u64>(rate_multiplier * scale_factor);
30
31 u64 fposition = 0;
32 const u64 max_fposition = input.size() * scale_factor;
33
34 while (fposition < 1 * scale_factor) {
35 u64 fraction = fposition & scale_mask;
36
37 output.push_back(fn(fraction, state.xn2, state.xn1, input[0]));
38
39 fposition += step_size;
40 }
41
42 while (fposition < 2 * scale_factor) {
43 u64 fraction = fposition & scale_mask;
44
45 output.push_back(fn(fraction, state.xn1, input[0], input[1]));
46
47 fposition += step_size;
48 }
49
50 while (fposition < max_fposition) {
51 u64 fraction = fposition & scale_mask;
52
53 size_t index = static_cast<size_t>(fposition / scale_factor);
54 output.push_back(fn(fraction, input[index - 2], input[index - 1], input[index]));
55
56 fposition += step_size;
57 }
58
59 state.xn2 = input[input.size() - 2];
60 state.xn1 = input[input.size() - 1];
61
62 return output;
63}
64
65StereoBuffer16 None(State& state, const StereoBuffer16& input, float rate_multiplier) {
66 return StepOverSamples(state, input, rate_multiplier, [](u64 fraction, const auto& x0, const auto& x1, const auto& x2) {
67 return x0;
68 });
69}
70
71StereoBuffer16 Linear(State& state, const StereoBuffer16& input, float rate_multiplier) {
72 // Note on accuracy: Some values that this produces are +/- 1 from the actual firmware.
73 return StepOverSamples(state, input, rate_multiplier, [](u64 fraction, const auto& x0, const auto& x1, const auto& x2) {
74 // This is a saturated subtraction. (Verified by black-box fuzzing.)
75 s64 delta0 = MathUtil::Clamp<s64>(x1[0] - x0[0], -32768, 32767);
76 s64 delta1 = MathUtil::Clamp<s64>(x1[1] - x0[1], -32768, 32767);
77
78 return std::array<s16, 2> {
79 static_cast<s16>(x0[0] + fraction * delta0 / scale_factor),
80 static_cast<s16>(x0[1] + fraction * delta1 / scale_factor)
81 };
82 });
83}
84
85} // namespace AudioInterp
diff --git a/src/audio_core/interpolate.h b/src/audio_core/interpolate.h
new file mode 100644
index 000000000..a4c0a453d
--- /dev/null
+++ b/src/audio_core/interpolate.h
@@ -0,0 +1,41 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <vector>
9
10#include "common/common_types.h"
11
12namespace AudioInterp {
13
14/// A variable length buffer of signed PCM16 stereo samples.
15using StereoBuffer16 = std::vector<std::array<s16, 2>>;
16
17struct State {
18 // Two historical samples.
19 std::array<s16, 2> xn1 = {}; ///< x[n-1]
20 std::array<s16, 2> xn2 = {}; ///< x[n-2]
21};
22
23/**
24 * No interpolation. This is equivalent to a zero-order hold. There is a two-sample predelay.
25 * @param input Input buffer.
26 * @param rate_multiplier Stretch factor. Must be a positive non-zero value.
27 * rate_multiplier > 1.0 performs decimation and rate_multipler < 1.0 performs upsampling.
28 * @return The resampled audio buffer.
29 */
30StereoBuffer16 None(State& state, const StereoBuffer16& input, float rate_multiplier);
31
32/**
33 * Linear interpolation. This is equivalent to a first-order hold. There is a two-sample predelay.
34 * @param input Input buffer.
35 * @param rate_multiplier Stretch factor. Must be a positive non-zero value.
36 * rate_multiplier > 1.0 performs decimation and rate_multipler < 1.0 performs upsampling.
37 * @return The resampled audio buffer.
38 */
39StereoBuffer16 Linear(State& state, const StereoBuffer16& input, float rate_multiplier);
40
41} // namespace AudioInterp
diff --git a/src/audio_core/null_sink.h b/src/audio_core/null_sink.h
new file mode 100644
index 000000000..faf0ee4e1
--- /dev/null
+++ b/src/audio_core/null_sink.h
@@ -0,0 +1,29 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstddef>
8
9#include "audio_core/audio_core.h"
10#include "audio_core/sink.h"
11
12namespace AudioCore {
13
14class NullSink final : public Sink {
15public:
16 ~NullSink() override = default;
17
18 unsigned int GetNativeSampleRate() const override {
19 return native_sample_rate;
20 }
21
22 void EnqueueSamples(const std::vector<s16>&) override {}
23
24 size_t SamplesInQueue() const override {
25 return 0;
26 }
27};
28
29} // namespace AudioCore
diff --git a/src/audio_core/sdl2_sink.cpp b/src/audio_core/sdl2_sink.cpp
new file mode 100644
index 000000000..dc75c04ee
--- /dev/null
+++ b/src/audio_core/sdl2_sink.cpp
@@ -0,0 +1,126 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <list>
6#include <vector>
7
8#include <SDL.h>
9
10#include "audio_core/audio_core.h"
11#include "audio_core/sdl2_sink.h"
12
13#include "common/assert.h"
14#include "common/logging/log.h"
15#include <numeric>
16
17namespace AudioCore {
18
19struct SDL2Sink::Impl {
20 unsigned int sample_rate = 0;
21
22 SDL_AudioDeviceID audio_device_id = 0;
23
24 std::list<std::vector<s16>> queue;
25
26 static void Callback(void* impl_, u8* buffer, int buffer_size_in_bytes);
27};
28
29SDL2Sink::SDL2Sink() : impl(std::make_unique<Impl>()) {
30 if (SDL_Init(SDL_INIT_AUDIO) < 0) {
31 LOG_CRITICAL(Audio_Sink, "SDL_Init(SDL_INIT_AUDIO) failed");
32 impl->audio_device_id = 0;
33 return;
34 }
35
36 SDL_AudioSpec desired_audiospec;
37 SDL_zero(desired_audiospec);
38 desired_audiospec.format = AUDIO_S16;
39 desired_audiospec.channels = 2;
40 desired_audiospec.freq = native_sample_rate;
41 desired_audiospec.samples = 1024;
42 desired_audiospec.userdata = impl.get();
43 desired_audiospec.callback = &Impl::Callback;
44
45 SDL_AudioSpec obtained_audiospec;
46 SDL_zero(obtained_audiospec);
47
48 impl->audio_device_id = SDL_OpenAudioDevice(nullptr, false, &desired_audiospec, &obtained_audiospec, 0);
49 if (impl->audio_device_id <= 0) {
50 LOG_CRITICAL(Audio_Sink, "SDL_OpenAudioDevice failed");
51 return;
52 }
53
54 impl->sample_rate = obtained_audiospec.freq;
55
56 // SDL2 audio devices start out paused, unpause it:
57 SDL_PauseAudioDevice(impl->audio_device_id, 0);
58}
59
60SDL2Sink::~SDL2Sink() {
61 if (impl->audio_device_id <= 0)
62 return;
63
64 SDL_CloseAudioDevice(impl->audio_device_id);
65}
66
67unsigned int SDL2Sink::GetNativeSampleRate() const {
68 if (impl->audio_device_id <= 0)
69 return native_sample_rate;
70
71 return impl->sample_rate;
72}
73
74void SDL2Sink::EnqueueSamples(const std::vector<s16>& samples) {
75 if (impl->audio_device_id <= 0)
76 return;
77
78 ASSERT_MSG(samples.size() % 2 == 0, "Samples must be in interleaved stereo PCM16 format (size must be a multiple of two)");
79
80 SDL_LockAudioDevice(impl->audio_device_id);
81 impl->queue.emplace_back(samples);
82 SDL_UnlockAudioDevice(impl->audio_device_id);
83}
84
85size_t SDL2Sink::SamplesInQueue() const {
86 if (impl->audio_device_id <= 0)
87 return 0;
88
89 SDL_LockAudioDevice(impl->audio_device_id);
90
91 size_t total_size = std::accumulate(impl->queue.begin(), impl->queue.end(), static_cast<size_t>(0),
92 [](size_t sum, const auto& buffer) {
93 // Division by two because each stereo sample is made of two s16.
94 return sum + buffer.size() / 2;
95 });
96
97 SDL_UnlockAudioDevice(impl->audio_device_id);
98
99 return total_size;
100}
101
102void SDL2Sink::Impl::Callback(void* impl_, u8* buffer, int buffer_size_in_bytes) {
103 Impl* impl = reinterpret_cast<Impl*>(impl_);
104
105 size_t remaining_size = static_cast<size_t>(buffer_size_in_bytes) / sizeof(s16); // Keep track of size in 16-bit increments.
106
107 while (remaining_size > 0 && !impl->queue.empty()) {
108 if (impl->queue.front().size() <= remaining_size) {
109 memcpy(buffer, impl->queue.front().data(), impl->queue.front().size() * sizeof(s16));
110 buffer += impl->queue.front().size() * sizeof(s16);
111 remaining_size -= impl->queue.front().size();
112 impl->queue.pop_front();
113 } else {
114 memcpy(buffer, impl->queue.front().data(), remaining_size * sizeof(s16));
115 buffer += remaining_size * sizeof(s16);
116 impl->queue.front().erase(impl->queue.front().begin(), impl->queue.front().begin() + remaining_size);
117 remaining_size = 0;
118 }
119 }
120
121 if (remaining_size > 0) {
122 memset(buffer, 0, remaining_size * sizeof(s16));
123 }
124}
125
126} // namespace AudioCore
diff --git a/src/audio_core/sdl2_sink.h b/src/audio_core/sdl2_sink.h
new file mode 100644
index 000000000..0f296b673
--- /dev/null
+++ b/src/audio_core/sdl2_sink.h
@@ -0,0 +1,30 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstddef>
8#include <memory>
9
10#include "audio_core/sink.h"
11
12namespace AudioCore {
13
14class SDL2Sink final : public Sink {
15public:
16 SDL2Sink();
17 ~SDL2Sink() override;
18
19 unsigned int GetNativeSampleRate() const override;
20
21 void EnqueueSamples(const std::vector<s16>& samples) override;
22
23 size_t SamplesInQueue() const override;
24
25private:
26 struct Impl;
27 std::unique_ptr<Impl> impl;
28};
29
30} // namespace AudioCore
diff --git a/src/audio_core/sink.h b/src/audio_core/sink.h
index cad21a85e..1c881c3d2 100644
--- a/src/audio_core/sink.h
+++ b/src/audio_core/sink.h
@@ -19,7 +19,7 @@ public:
19 virtual ~Sink() = default; 19 virtual ~Sink() = default;
20 20
21 /// The native rate of this sink. The sink expects to be fed samples that respect this. (Units: samples/sec) 21 /// The native rate of this sink. The sink expects to be fed samples that respect this. (Units: samples/sec)
22 virtual unsigned GetNativeSampleRate() const = 0; 22 virtual unsigned int GetNativeSampleRate() const = 0;
23 23
24 /** 24 /**
25 * Feed stereo samples to sink. 25 * Feed stereo samples to sink.
diff --git a/src/audio_core/sink_details.cpp b/src/audio_core/sink_details.cpp
new file mode 100644
index 000000000..ba5e83d17
--- /dev/null
+++ b/src/audio_core/sink_details.cpp
@@ -0,0 +1,25 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <memory>
6#include <vector>
7
8#include "audio_core/null_sink.h"
9#include "audio_core/sink_details.h"
10
11#ifdef HAVE_SDL2
12#include "audio_core/sdl2_sink.h"
13#endif
14
15namespace AudioCore {
16
17// g_sink_details is ordered in terms of desirability, with the best choice at the top.
18const std::vector<SinkDetails> g_sink_details = {
19#ifdef HAVE_SDL2
20 { "sdl2", []() { return std::make_unique<SDL2Sink>(); } },
21#endif
22 { "null", []() { return std::make_unique<NullSink>(); } },
23};
24
25} // namespace AudioCore
diff --git a/src/audio_core/sink_details.h b/src/audio_core/sink_details.h
new file mode 100644
index 000000000..4b30cf835
--- /dev/null
+++ b/src/audio_core/sink_details.h
@@ -0,0 +1,27 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <functional>
8#include <memory>
9#include <vector>
10
11namespace AudioCore {
12
13class Sink;
14
15struct SinkDetails {
16 SinkDetails(const char* id_, std::function<std::unique_ptr<Sink>()> factory_)
17 : id(id_), factory(factory_) {}
18
19 /// Name for this sink.
20 const char* id;
21 /// A method to call to construct an instance of this type of sink.
22 std::function<std::unique_ptr<Sink>()> factory;
23};
24
25extern const std::vector<SinkDetails> g_sink_details;
26
27} // namespace AudioCore
diff --git a/src/citra/CMakeLists.txt b/src/citra/CMakeLists.txt
index fa615deb9..43fa06b4e 100644
--- a/src/citra/CMakeLists.txt
+++ b/src/citra/CMakeLists.txt
@@ -21,7 +21,7 @@ target_link_libraries(citra ${SDL2_LIBRARY} ${OPENGL_gl_LIBRARY} inih glad)
21if (MSVC) 21if (MSVC)
22 target_link_libraries(citra getopt) 22 target_link_libraries(citra getopt)
23endif() 23endif()
24target_link_libraries(citra ${PLATFORM_LIBRARIES}) 24target_link_libraries(citra ${PLATFORM_LIBRARIES} Threads::Threads)
25 25
26if(${CMAKE_SYSTEM_NAME} MATCHES "Linux|FreeBSD|OpenBSD|NetBSD") 26if(${CMAKE_SYSTEM_NAME} MATCHES "Linux|FreeBSD|OpenBSD|NetBSD")
27 install(TARGETS citra RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/bin") 27 install(TARGETS citra RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/bin")
diff --git a/src/citra/citra.cpp b/src/citra/citra.cpp
index d6ad13f69..b4501eb2e 100644
--- a/src/citra/citra.cpp
+++ b/src/citra/citra.cpp
@@ -20,6 +20,7 @@
20#include "common/logging/log.h" 20#include "common/logging/log.h"
21#include "common/logging/backend.h" 21#include "common/logging/backend.h"
22#include "common/logging/filter.h" 22#include "common/logging/filter.h"
23#include "common/scm_rev.h"
23#include "common/scope_exit.h" 24#include "common/scope_exit.h"
24 25
25#include "core/settings.h" 26#include "core/settings.h"
@@ -34,11 +35,17 @@
34#include "video_core/video_core.h" 35#include "video_core/video_core.h"
35 36
36 37
37static void PrintHelp() 38static void PrintHelp(const char *argv0)
38{ 39{
39 std::cout << "Usage: citra [options] <filename>" << std::endl; 40 std::cout << "Usage: " << argv0 << " [options] <filename>\n"
40 std::cout << "--help, -h Display this information" << std::endl; 41 "-g, --gdbport=NUMBER Enable gdb stub on port NUMBER\n"
41 std::cout << "--gdbport, -g number Enable gdb stub on port number" << std::endl; 42 "-h, --help Display this help and exit\n"
43 "-v, --version Output version information and exit\n";
44}
45
46static void PrintVersion()
47{
48 std::cout << "Citra " << Common::g_scm_branch << " " << Common::g_scm_desc << std::endl;
42} 49}
43 50
44/// Application entry point 51/// Application entry point
@@ -51,18 +58,16 @@ int main(int argc, char **argv) {
51 std::string boot_filename; 58 std::string boot_filename;
52 59
53 static struct option long_options[] = { 60 static struct option long_options[] = {
54 { "help", no_argument, 0, 'h' },
55 { "gdbport", required_argument, 0, 'g' }, 61 { "gdbport", required_argument, 0, 'g' },
62 { "help", no_argument, 0, 'h' },
63 { "version", no_argument, 0, 'v' },
56 { 0, 0, 0, 0 } 64 { 0, 0, 0, 0 }
57 }; 65 };
58 66
59 while (optind < argc) { 67 while (optind < argc) {
60 char arg = getopt_long(argc, argv, ":hg:", long_options, &option_index); 68 char arg = getopt_long(argc, argv, "g:hv", long_options, &option_index);
61 if (arg != -1) { 69 if (arg != -1) {
62 switch (arg) { 70 switch (arg) {
63 case 'h':
64 PrintHelp();
65 return 0;
66 case 'g': 71 case 'g':
67 errno = 0; 72 errno = 0;
68 gdb_port = strtoul(optarg, &endarg, 0); 73 gdb_port = strtoul(optarg, &endarg, 0);
@@ -73,6 +78,12 @@ int main(int argc, char **argv) {
73 exit(1); 78 exit(1);
74 } 79 }
75 break; 80 break;
81 case 'h':
82 PrintHelp(argv[0]);
83 return 0;
84 case 'v':
85 PrintVersion();
86 return 0;
76 } 87 }
77 } else { 88 } else {
78 boot_filename = argv[optind]; 89 boot_filename = argv[optind];
diff --git a/src/citra/config.cpp b/src/citra/config.cpp
index 6b6617352..c5cb4fb38 100644
--- a/src/citra/config.cpp
+++ b/src/citra/config.cpp
@@ -65,11 +65,15 @@ void Config::ReadValues() {
65 // Renderer 65 // Renderer
66 Settings::values.use_hw_renderer = sdl2_config->GetBoolean("Renderer", "use_hw_renderer", false); 66 Settings::values.use_hw_renderer = sdl2_config->GetBoolean("Renderer", "use_hw_renderer", false);
67 Settings::values.use_shader_jit = sdl2_config->GetBoolean("Renderer", "use_shader_jit", true); 67 Settings::values.use_shader_jit = sdl2_config->GetBoolean("Renderer", "use_shader_jit", true);
68 Settings::values.use_scaled_resolution = sdl2_config->GetBoolean("Renderer", "use_scaled_resolution", false);
68 69
69 Settings::values.bg_red = (float)sdl2_config->GetReal("Renderer", "bg_red", 1.0); 70 Settings::values.bg_red = (float)sdl2_config->GetReal("Renderer", "bg_red", 1.0);
70 Settings::values.bg_green = (float)sdl2_config->GetReal("Renderer", "bg_green", 1.0); 71 Settings::values.bg_green = (float)sdl2_config->GetReal("Renderer", "bg_green", 1.0);
71 Settings::values.bg_blue = (float)sdl2_config->GetReal("Renderer", "bg_blue", 1.0); 72 Settings::values.bg_blue = (float)sdl2_config->GetReal("Renderer", "bg_blue", 1.0);
72 73
74 // Audio
75 Settings::values.sink_id = sdl2_config->Get("Audio", "output_engine", "auto");
76
73 // Data Storage 77 // Data Storage
74 Settings::values.use_virtual_sd = sdl2_config->GetBoolean("Data Storage", "use_virtual_sd", true); 78 Settings::values.use_virtual_sd = sdl2_config->GetBoolean("Data Storage", "use_virtual_sd", true);
75 79
@@ -81,7 +85,7 @@ void Config::ReadValues() {
81 85
82 // Debugging 86 // Debugging
83 Settings::values.use_gdbstub = sdl2_config->GetBoolean("Debugging", "use_gdbstub", false); 87 Settings::values.use_gdbstub = sdl2_config->GetBoolean("Debugging", "use_gdbstub", false);
84 Settings::values.gdbstub_port = sdl2_config->GetInteger("Debugging", "gdbstub_port", 24689); 88 Settings::values.gdbstub_port = static_cast<u16>(sdl2_config->GetInteger("Debugging", "gdbstub_port", 24689));
85} 89}
86 90
87void Config::Reload() { 91void Config::Reload() {
diff --git a/src/citra/default_ini.h b/src/citra/default_ini.h
index c9b490a00..49126356f 100644
--- a/src/citra/default_ini.h
+++ b/src/citra/default_ini.h
@@ -46,12 +46,21 @@ use_hw_renderer =
46# 0 : Interpreter (slow), 1 (default): JIT (fast) 46# 0 : Interpreter (slow), 1 (default): JIT (fast)
47use_shader_jit = 47use_shader_jit =
48 48
49# Whether to use native 3DS screen resolution or to scale rendering resolution to the displayed screen size.
50# 0 (default): Native, 1: Scaled
51use_scaled_resolution =
52
49# The clear color for the renderer. What shows up on the sides of the bottom screen. 53# The clear color for the renderer. What shows up on the sides of the bottom screen.
50# Must be in range of 0.0-1.0. Defaults to 1.0 for all. 54# Must be in range of 0.0-1.0. Defaults to 1.0 for all.
51bg_red = 55bg_red =
52bg_blue = 56bg_blue =
53bg_green = 57bg_green =
54 58
59[Audio]
60# Which audio output engine to use.
61# auto (default): Auto-select, null: No audio output, sdl2: SDL2 (if available)
62output_engine =
63
55[Data Storage] 64[Data Storage]
56# Whether to create a virtual SD card. 65# Whether to create a virtual SD card.
57# 1 (default): Yes, 0: No 66# 1 (default): Yes, 0: No
diff --git a/src/citra/emu_window/emu_window_sdl2.cpp b/src/citra/emu_window/emu_window_sdl2.cpp
index 924189f4c..12cdd9d95 100644
--- a/src/citra/emu_window/emu_window_sdl2.cpp
+++ b/src/citra/emu_window/emu_window_sdl2.cpp
@@ -9,6 +9,8 @@
9#define SDL_MAIN_HANDLED 9#define SDL_MAIN_HANDLED
10#include <SDL.h> 10#include <SDL.h>
11 11
12#include <glad/glad.h>
13
12#include "common/key_map.h" 14#include "common/key_map.h"
13#include "common/logging/log.h" 15#include "common/logging/log.h"
14#include "common/scm_rev.h" 16#include "common/scm_rev.h"
@@ -98,6 +100,11 @@ EmuWindow_SDL2::EmuWindow_SDL2() {
98 exit(1); 100 exit(1);
99 } 101 }
100 102
103 if (!gladLoadGLLoader(static_cast<GLADloadproc>(SDL_GL_GetProcAddress))) {
104 LOG_CRITICAL(Frontend, "Failed to initialize GL functions! Exiting...");
105 exit(1);
106 }
107
101 OnResize(); 108 OnResize();
102 OnMinimalClientAreaChangeRequest(GetActiveConfig().min_client_area_size); 109 OnMinimalClientAreaChangeRequest(GetActiveConfig().min_client_area_size);
103 SDL_PumpEvents(); 110 SDL_PumpEvents();
diff --git a/src/citra_qt/CMakeLists.txt b/src/citra_qt/CMakeLists.txt
index 6660d9879..3f0099200 100644
--- a/src/citra_qt/CMakeLists.txt
+++ b/src/citra_qt/CMakeLists.txt
@@ -55,6 +55,7 @@ set(HEADERS
55 configure_dialog.h 55 configure_dialog.h
56 configure_general.h 56 configure_general.h
57 game_list.h 57 game_list.h
58 game_list_p.h
58 hotkeys.h 59 hotkeys.h
59 main.h 60 main.h
60 ui_settings.h 61 ui_settings.h
@@ -92,7 +93,7 @@ else()
92endif() 93endif()
93target_link_libraries(citra-qt core video_core audio_core common qhexedit) 94target_link_libraries(citra-qt core video_core audio_core common qhexedit)
94target_link_libraries(citra-qt ${OPENGL_gl_LIBRARY} ${CITRA_QT_LIBS}) 95target_link_libraries(citra-qt ${OPENGL_gl_LIBRARY} ${CITRA_QT_LIBS})
95target_link_libraries(citra-qt ${PLATFORM_LIBRARIES}) 96target_link_libraries(citra-qt ${PLATFORM_LIBRARIES} Threads::Threads)
96 97
97if(${CMAKE_SYSTEM_NAME} MATCHES "Linux|FreeBSD|OpenBSD|NetBSD") 98if(${CMAKE_SYSTEM_NAME} MATCHES "Linux|FreeBSD|OpenBSD|NetBSD")
98 install(TARGETS citra-qt RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/bin") 99 install(TARGETS citra-qt RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/bin")
diff --git a/src/citra_qt/bootmanager.cpp b/src/citra_qt/bootmanager.cpp
index 8e60b9cad..01b81c11c 100644
--- a/src/citra_qt/bootmanager.cpp
+++ b/src/citra_qt/bootmanager.cpp
@@ -71,7 +71,9 @@ void EmuThread::run() {
71 // Shutdown the core emulation 71 // Shutdown the core emulation
72 System::Shutdown(); 72 System::Shutdown();
73 73
74#if MICROPROFILE_ENABLED
74 MicroProfileOnThreadExit(); 75 MicroProfileOnThreadExit();
76#endif
75 77
76 render_window->moveContext(); 78 render_window->moveContext();
77} 79}
diff --git a/src/citra_qt/config.cpp b/src/citra_qt/config.cpp
index e363be38a..b5bb75537 100644
--- a/src/citra_qt/config.cpp
+++ b/src/citra_qt/config.cpp
@@ -45,12 +45,17 @@ void Config::ReadValues() {
45 qt_config->beginGroup("Renderer"); 45 qt_config->beginGroup("Renderer");
46 Settings::values.use_hw_renderer = qt_config->value("use_hw_renderer", false).toBool(); 46 Settings::values.use_hw_renderer = qt_config->value("use_hw_renderer", false).toBool();
47 Settings::values.use_shader_jit = qt_config->value("use_shader_jit", true).toBool(); 47 Settings::values.use_shader_jit = qt_config->value("use_shader_jit", true).toBool();
48 Settings::values.use_scaled_resolution = qt_config->value("use_scaled_resolution", false).toBool();
48 49
49 Settings::values.bg_red = qt_config->value("bg_red", 1.0).toFloat(); 50 Settings::values.bg_red = qt_config->value("bg_red", 1.0).toFloat();
50 Settings::values.bg_green = qt_config->value("bg_green", 1.0).toFloat(); 51 Settings::values.bg_green = qt_config->value("bg_green", 1.0).toFloat();
51 Settings::values.bg_blue = qt_config->value("bg_blue", 1.0).toFloat(); 52 Settings::values.bg_blue = qt_config->value("bg_blue", 1.0).toFloat();
52 qt_config->endGroup(); 53 qt_config->endGroup();
53 54
55 qt_config->beginGroup("Audio");
56 Settings::values.sink_id = qt_config->value("output_engine", "auto").toString().toStdString();
57 qt_config->endGroup();
58
54 qt_config->beginGroup("Data Storage"); 59 qt_config->beginGroup("Data Storage");
55 Settings::values.use_virtual_sd = qt_config->value("use_virtual_sd", true).toBool(); 60 Settings::values.use_virtual_sd = qt_config->value("use_virtual_sd", true).toBool();
56 qt_config->endGroup(); 61 qt_config->endGroup();
@@ -129,6 +134,7 @@ void Config::SaveValues() {
129 qt_config->beginGroup("Renderer"); 134 qt_config->beginGroup("Renderer");
130 qt_config->setValue("use_hw_renderer", Settings::values.use_hw_renderer); 135 qt_config->setValue("use_hw_renderer", Settings::values.use_hw_renderer);
131 qt_config->setValue("use_shader_jit", Settings::values.use_shader_jit); 136 qt_config->setValue("use_shader_jit", Settings::values.use_shader_jit);
137 qt_config->setValue("use_scaled_resolution", Settings::values.use_scaled_resolution);
132 138
133 // Cast to double because Qt's written float values are not human-readable 139 // Cast to double because Qt's written float values are not human-readable
134 qt_config->setValue("bg_red", (double)Settings::values.bg_red); 140 qt_config->setValue("bg_red", (double)Settings::values.bg_red);
@@ -136,6 +142,10 @@ void Config::SaveValues() {
136 qt_config->setValue("bg_blue", (double)Settings::values.bg_blue); 142 qt_config->setValue("bg_blue", (double)Settings::values.bg_blue);
137 qt_config->endGroup(); 143 qt_config->endGroup();
138 144
145 qt_config->beginGroup("Audio");
146 qt_config->setValue("output_engine", QString::fromStdString(Settings::values.sink_id));
147 qt_config->endGroup();
148
139 qt_config->beginGroup("Data Storage"); 149 qt_config->beginGroup("Data Storage");
140 qt_config->setValue("use_virtual_sd", Settings::values.use_virtual_sd); 150 qt_config->setValue("use_virtual_sd", Settings::values.use_virtual_sd);
141 qt_config->endGroup(); 151 qt_config->endGroup();
diff --git a/src/citra_qt/configure_general.cpp b/src/citra_qt/configure_general.cpp
index a27d0d26c..62648e665 100644
--- a/src/citra_qt/configure_general.cpp
+++ b/src/citra_qt/configure_general.cpp
@@ -25,6 +25,7 @@ void ConfigureGeneral::setConfiguration() {
25 ui->region_combobox->setCurrentIndex(Settings::values.region_value); 25 ui->region_combobox->setCurrentIndex(Settings::values.region_value);
26 ui->toogle_hw_renderer->setChecked(Settings::values.use_hw_renderer); 26 ui->toogle_hw_renderer->setChecked(Settings::values.use_hw_renderer);
27 ui->toogle_shader_jit->setChecked(Settings::values.use_shader_jit); 27 ui->toogle_shader_jit->setChecked(Settings::values.use_shader_jit);
28 ui->toogle_scaled_resolution->setChecked(Settings::values.use_scaled_resolution);
28} 29}
29 30
30void ConfigureGeneral::applyConfiguration() { 31void ConfigureGeneral::applyConfiguration() {
@@ -33,5 +34,6 @@ void ConfigureGeneral::applyConfiguration() {
33 Settings::values.region_value = ui->region_combobox->currentIndex(); 34 Settings::values.region_value = ui->region_combobox->currentIndex();
34 Settings::values.use_hw_renderer = ui->toogle_hw_renderer->isChecked(); 35 Settings::values.use_hw_renderer = ui->toogle_hw_renderer->isChecked();
35 Settings::values.use_shader_jit = ui->toogle_shader_jit->isChecked(); 36 Settings::values.use_shader_jit = ui->toogle_shader_jit->isChecked();
37 Settings::values.use_scaled_resolution = ui->toogle_scaled_resolution->isChecked();
36 Settings::Apply(); 38 Settings::Apply();
37} 39}
diff --git a/src/citra_qt/configure_general.ui b/src/citra_qt/configure_general.ui
index 47184c5c6..5eb309793 100644
--- a/src/citra_qt/configure_general.ui
+++ b/src/citra_qt/configure_general.ui
@@ -128,6 +128,13 @@
128 </property> 128 </property>
129 </widget> 129 </widget>
130 </item> 130 </item>
131 <item>
132 <widget class="QCheckBox" name="toogle_scaled_resolution">
133 <property name="text">
134 <string>Enable scaled resolution</string>
135 </property>
136 </widget>
137 </item>
131 </layout> 138 </layout>
132 </item> 139 </item>
133 </layout> 140 </layout>
diff --git a/src/citra_qt/debugger/graphics_breakpoints.cpp b/src/citra_qt/debugger/graphics_breakpoints.cpp
index 819ec7707..fe66918a8 100644
--- a/src/citra_qt/debugger/graphics_breakpoints.cpp
+++ b/src/citra_qt/debugger/graphics_breakpoints.cpp
@@ -44,7 +44,7 @@ QVariant BreakPointModel::data(const QModelIndex& index, int role) const
44 { Pica::DebugContext::Event::PicaCommandProcessed, tr("Pica command processed") }, 44 { Pica::DebugContext::Event::PicaCommandProcessed, tr("Pica command processed") },
45 { Pica::DebugContext::Event::IncomingPrimitiveBatch, tr("Incoming primitive batch") }, 45 { Pica::DebugContext::Event::IncomingPrimitiveBatch, tr("Incoming primitive batch") },
46 { Pica::DebugContext::Event::FinishedPrimitiveBatch, tr("Finished primitive batch") }, 46 { Pica::DebugContext::Event::FinishedPrimitiveBatch, tr("Finished primitive batch") },
47 { Pica::DebugContext::Event::VertexLoaded, tr("Vertex loaded") }, 47 { Pica::DebugContext::Event::VertexShaderInvocation, tr("Vertex shader invocation") },
48 { Pica::DebugContext::Event::IncomingDisplayTransfer, tr("Incoming display transfer") }, 48 { Pica::DebugContext::Event::IncomingDisplayTransfer, tr("Incoming display transfer") },
49 { Pica::DebugContext::Event::GSPCommandProcessed, tr("GSP command processed") }, 49 { Pica::DebugContext::Event::GSPCommandProcessed, tr("GSP command processed") },
50 { Pica::DebugContext::Event::BufferSwapped, tr("Buffers swapped") } 50 { Pica::DebugContext::Event::BufferSwapped, tr("Buffers swapped") }
@@ -75,7 +75,7 @@ QVariant BreakPointModel::data(const QModelIndex& index, int role) const
75 case Role_IsEnabled: 75 case Role_IsEnabled:
76 { 76 {
77 auto context = context_weak.lock(); 77 auto context = context_weak.lock();
78 return context && context->breakpoints[event].enabled; 78 return context && context->breakpoints[(int)event].enabled;
79 } 79 }
80 80
81 default: 81 default:
@@ -110,7 +110,7 @@ bool BreakPointModel::setData(const QModelIndex& index, const QVariant& value, i
110 if (!context) 110 if (!context)
111 return false; 111 return false;
112 112
113 context->breakpoints[event].enabled = value == Qt::Checked; 113 context->breakpoints[(int)event].enabled = value == Qt::Checked;
114 QModelIndex changed_index = createIndex(index.row(), 0); 114 QModelIndex changed_index = createIndex(index.row(), 0);
115 emit dataChanged(changed_index, changed_index); 115 emit dataChanged(changed_index, changed_index);
116 return true; 116 return true;
diff --git a/src/citra_qt/debugger/graphics_framebuffer.cpp b/src/citra_qt/debugger/graphics_framebuffer.cpp
index c30e75933..68cff78b2 100644
--- a/src/citra_qt/debugger/graphics_framebuffer.cpp
+++ b/src/citra_qt/debugger/graphics_framebuffer.cpp
@@ -346,5 +346,11 @@ u32 GraphicsFramebufferWidget::BytesPerPixel(GraphicsFramebufferWidget::Format f
346 case Format::RGBA4: 346 case Format::RGBA4:
347 case Format::D16: 347 case Format::D16:
348 return 2; 348 return 2;
349 default:
350 UNREACHABLE_MSG("GraphicsFramebufferWidget::BytesPerPixel: this "
351 "should not be reached as this function should "
352 "be given a format which is in "
353 "GraphicsFramebufferWidget::Format. Instead got %i",
354 static_cast<int>(format));
349 } 355 }
350} 356}
diff --git a/src/citra_qt/debugger/graphics_tracing.cpp b/src/citra_qt/debugger/graphics_tracing.cpp
index e06498744..1402f8e79 100644
--- a/src/citra_qt/debugger/graphics_tracing.cpp
+++ b/src/citra_qt/debugger/graphics_tracing.cpp
@@ -2,6 +2,9 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
6#include <array>
7#include <iterator>
5#include <memory> 8#include <memory>
6 9
7#include <boost/range/algorithm/copy.hpp> 10#include <boost/range/algorithm/copy.hpp>
@@ -18,6 +21,7 @@
18 21
19#include "core/hw/gpu.h" 22#include "core/hw/gpu.h"
20#include "core/hw/lcd.h" 23#include "core/hw/lcd.h"
24#include "core/tracer/recorder.h"
21 25
22#include "nihstro/float24.h" 26#include "nihstro/float24.h"
23 27
diff --git a/src/citra_qt/debugger/graphics_vertex_shader.cpp b/src/citra_qt/debugger/graphics_vertex_shader.cpp
index d648d4640..854f6ff16 100644
--- a/src/citra_qt/debugger/graphics_vertex_shader.cpp
+++ b/src/citra_qt/debugger/graphics_vertex_shader.cpp
@@ -365,7 +365,7 @@ GraphicsVertexShaderWidget::GraphicsVertexShaderWidget(std::shared_ptr< Pica::De
365 input_data[i]->setValidator(new QDoubleValidator(input_data[i])); 365 input_data[i]->setValidator(new QDoubleValidator(input_data[i]));
366 } 366 }
367 367
368 breakpoint_warning = new QLabel(tr("(data only available at VertexLoaded breakpoints)")); 368 breakpoint_warning = new QLabel(tr("(data only available at vertex shader invocation breakpoints)"));
369 369
370 // TODO: Add some button for jumping to the shader entry point 370 // TODO: Add some button for jumping to the shader entry point
371 371
@@ -454,7 +454,7 @@ GraphicsVertexShaderWidget::GraphicsVertexShaderWidget(std::shared_ptr< Pica::De
454 454
455void GraphicsVertexShaderWidget::OnBreakPointHit(Pica::DebugContext::Event event, void* data) { 455void GraphicsVertexShaderWidget::OnBreakPointHit(Pica::DebugContext::Event event, void* data) {
456 auto input = static_cast<Pica::Shader::InputVertex*>(data); 456 auto input = static_cast<Pica::Shader::InputVertex*>(data);
457 if (event == Pica::DebugContext::Event::VertexLoaded) { 457 if (event == Pica::DebugContext::Event::VertexShaderInvocation) {
458 Reload(true, data); 458 Reload(true, data);
459 } else { 459 } else {
460 // No vertex data is retrievable => invalidate currently stored vertex data 460 // No vertex data is retrievable => invalidate currently stored vertex data
@@ -515,7 +515,7 @@ void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_d
515 } 515 }
516 516
517 // Initialize debug info text for current cycle count 517 // Initialize debug info text for current cycle count
518 cycle_index->setMaximum(debug_data.records.size() - 1); 518 cycle_index->setMaximum(static_cast<int>(debug_data.records.size() - 1));
519 OnCycleIndexChanged(cycle_index->value()); 519 OnCycleIndexChanged(cycle_index->value());
520 520
521 model->endResetModel(); 521 model->endResetModel();
diff --git a/src/citra_qt/debugger/profiler.cpp b/src/citra_qt/debugger/profiler.cpp
index 4f6ba0e1f..7bb010f77 100644
--- a/src/citra_qt/debugger/profiler.cpp
+++ b/src/citra_qt/debugger/profiler.cpp
@@ -9,13 +9,16 @@
9#include "citra_qt/debugger/profiler.h" 9#include "citra_qt/debugger/profiler.h"
10#include "citra_qt/util/util.h" 10#include "citra_qt/util/util.h"
11 11
12#include "common/common_types.h"
12#include "common/microprofile.h" 13#include "common/microprofile.h"
13#include "common/profiler_reporting.h" 14#include "common/profiler_reporting.h"
14 15
15// Include the implementation of the UI in this file. This isn't in microprofile.cpp because the 16// Include the implementation of the UI in this file. This isn't in microprofile.cpp because the
16// non-Qt frontends don't need it (and don't implement the UI drawing hooks either). 17// non-Qt frontends don't need it (and don't implement the UI drawing hooks either).
18#if MICROPROFILE_ENABLED
17#define MICROPROFILEUI_IMPL 1 19#define MICROPROFILEUI_IMPL 1
18#include "common/microprofileui.h" 20#include "common/microprofileui.h"
21#endif
19 22
20using namespace Common::Profiling; 23using namespace Common::Profiling;
21 24
@@ -34,21 +37,9 @@ static QVariant GetDataForColumn(int col, const AggregatedDuration& duration)
34 } 37 }
35} 38}
36 39
37static const TimingCategoryInfo* GetCategoryInfo(int id)
38{
39 const auto& categories = GetProfilingManager().GetTimingCategoriesInfo();
40 if ((size_t)id >= categories.size()) {
41 return nullptr;
42 } else {
43 return &categories[id];
44 }
45}
46
47ProfilerModel::ProfilerModel(QObject* parent) : QAbstractItemModel(parent) 40ProfilerModel::ProfilerModel(QObject* parent) : QAbstractItemModel(parent)
48{ 41{
49 updateProfilingInfo(); 42 updateProfilingInfo();
50 const auto& categories = GetProfilingManager().GetTimingCategoriesInfo();
51 results.time_per_category.resize(categories.size());
52} 43}
53 44
54QVariant ProfilerModel::headerData(int section, Qt::Orientation orientation, int role) const 45QVariant ProfilerModel::headerData(int section, Qt::Orientation orientation, int role) const
@@ -85,7 +76,7 @@ int ProfilerModel::rowCount(const QModelIndex& parent) const
85 if (parent.isValid()) { 76 if (parent.isValid()) {
86 return 0; 77 return 0;
87 } else { 78 } else {
88 return static_cast<int>(results.time_per_category.size() + 2); 79 return 2;
89 } 80 }
90} 81}
91 82
@@ -104,17 +95,6 @@ QVariant ProfilerModel::data(const QModelIndex& index, int role) const
104 } else { 95 } else {
105 return GetDataForColumn(index.column(), results.interframe_time); 96 return GetDataForColumn(index.column(), results.interframe_time);
106 } 97 }
107 } else {
108 if (index.column() == 0) {
109 const TimingCategoryInfo* info = GetCategoryInfo(index.row() - 2);
110 return info != nullptr ? QString(info->name) : QVariant();
111 } else {
112 if (index.row() - 2 < (int)results.time_per_category.size()) {
113 return GetDataForColumn(index.column(), results.time_per_category[index.row() - 2]);
114 } else {
115 return QVariant();
116 }
117 }
118 } 98 }
119 } 99 }
120 100
@@ -148,6 +128,8 @@ void ProfilerWidget::setProfilingInfoUpdateEnabled(bool enable)
148 } 128 }
149} 129}
150 130
131#if MICROPROFILE_ENABLED
132
151class MicroProfileWidget : public QWidget { 133class MicroProfileWidget : public QWidget {
152public: 134public:
153 MicroProfileWidget(QWidget* parent = nullptr); 135 MicroProfileWidget(QWidget* parent = nullptr);
@@ -171,6 +153,8 @@ private:
171 QTimer update_timer; 153 QTimer update_timer;
172}; 154};
173 155
156#endif
157
174MicroProfileDialog::MicroProfileDialog(QWidget* parent) 158MicroProfileDialog::MicroProfileDialog(QWidget* parent)
175 : QWidget(parent, Qt::Dialog) 159 : QWidget(parent, Qt::Dialog)
176{ 160{
@@ -180,6 +164,8 @@ MicroProfileDialog::MicroProfileDialog(QWidget* parent)
180 // Remove the "?" button from the titlebar and enable the maximize button 164 // Remove the "?" button from the titlebar and enable the maximize button
181 setWindowFlags(windowFlags() & ~Qt::WindowContextHelpButtonHint | Qt::WindowMaximizeButtonHint); 165 setWindowFlags(windowFlags() & ~Qt::WindowContextHelpButtonHint | Qt::WindowMaximizeButtonHint);
182 166
167#if MICROPROFILE_ENABLED
168
183 MicroProfileWidget* widget = new MicroProfileWidget(this); 169 MicroProfileWidget* widget = new MicroProfileWidget(this);
184 170
185 QLayout* layout = new QVBoxLayout(this); 171 QLayout* layout = new QVBoxLayout(this);
@@ -191,6 +177,7 @@ MicroProfileDialog::MicroProfileDialog(QWidget* parent)
191 setFocusProxy(widget); 177 setFocusProxy(widget);
192 widget->setFocusPolicy(Qt::StrongFocus); 178 widget->setFocusPolicy(Qt::StrongFocus);
193 widget->setFocus(); 179 widget->setFocus();
180#endif
194} 181}
195 182
196QAction* MicroProfileDialog::toggleViewAction() { 183QAction* MicroProfileDialog::toggleViewAction() {
@@ -218,6 +205,9 @@ void MicroProfileDialog::hideEvent(QHideEvent* ev) {
218 QWidget::hideEvent(ev); 205 QWidget::hideEvent(ev);
219} 206}
220 207
208
209#if MICROPROFILE_ENABLED
210
221/// There's no way to pass a user pointer to MicroProfile, so this variable is used to make the 211/// There's no way to pass a user pointer to MicroProfile, so this variable is used to make the
222/// QPainter available inside the drawing callbacks. 212/// QPainter available inside the drawing callbacks.
223static QPainter* mp_painter = nullptr; 213static QPainter* mp_painter = nullptr;
@@ -337,3 +327,4 @@ void MicroProfileDrawLine2D(u32 vertices_length, float* vertices, u32 hex_color)
337 mp_painter->drawPolyline(point_buf.data(), vertices_length); 327 mp_painter->drawPolyline(point_buf.data(), vertices_length);
338 point_buf.clear(); 328 point_buf.clear();
339} 329}
330#endif
diff --git a/src/citra_qt/debugger/profiler.h b/src/citra_qt/debugger/profiler.h
index 036054740..3b38ed8ec 100644
--- a/src/citra_qt/debugger/profiler.h
+++ b/src/citra_qt/debugger/profiler.h
@@ -7,8 +7,10 @@
7#include <QAbstractItemModel> 7#include <QAbstractItemModel>
8#include <QDockWidget> 8#include <QDockWidget>
9#include <QTimer> 9#include <QTimer>
10
10#include "ui_profiler.h" 11#include "ui_profiler.h"
11 12
13#include "common/microprofile.h"
12#include "common/profiler_reporting.h" 14#include "common/profiler_reporting.h"
13 15
14class ProfilerModel : public QAbstractItemModel 16class ProfilerModel : public QAbstractItemModel
@@ -49,6 +51,7 @@ private:
49 QTimer update_timer; 51 QTimer update_timer;
50}; 52};
51 53
54
52class MicroProfileDialog : public QWidget { 55class MicroProfileDialog : public QWidget {
53 Q_OBJECT 56 Q_OBJECT
54 57
diff --git a/src/citra_qt/game_list.cpp b/src/citra_qt/game_list.cpp
index d14532102..d4ac9c96e 100644
--- a/src/citra_qt/game_list.cpp
+++ b/src/citra_qt/game_list.cpp
@@ -34,8 +34,8 @@ GameList::GameList(QWidget* parent)
34 tree_view->setUniformRowHeights(true); 34 tree_view->setUniformRowHeights(true);
35 35
36 item_model->insertColumns(0, COLUMN_COUNT); 36 item_model->insertColumns(0, COLUMN_COUNT);
37 item_model->setHeaderData(COLUMN_FILE_TYPE, Qt::Horizontal, "File type");
38 item_model->setHeaderData(COLUMN_NAME, Qt::Horizontal, "Name"); 37 item_model->setHeaderData(COLUMN_NAME, Qt::Horizontal, "Name");
38 item_model->setHeaderData(COLUMN_FILE_TYPE, Qt::Horizontal, "File type");
39 item_model->setHeaderData(COLUMN_SIZE, Qt::Horizontal, "Size"); 39 item_model->setHeaderData(COLUMN_SIZE, Qt::Horizontal, "Size");
40 40
41 connect(tree_view, SIGNAL(activated(const QModelIndex&)), this, SLOT(ValidateEntry(const QModelIndex&))); 41 connect(tree_view, SIGNAL(activated(const QModelIndex&)), this, SLOT(ValidateEntry(const QModelIndex&)));
@@ -109,7 +109,11 @@ void GameList::SaveInterfaceLayout()
109void GameList::LoadInterfaceLayout() 109void GameList::LoadInterfaceLayout()
110{ 110{
111 auto header = tree_view->header(); 111 auto header = tree_view->header();
112 header->restoreState(UISettings::values.gamelist_header_state); 112 if (!header->restoreState(UISettings::values.gamelist_header_state)) {
113 // We are using the name column to display icons and titles
114 // so make it as large as possible as default.
115 header->resizeSection(COLUMN_NAME, header->width());
116 }
113 117
114 item_model->sort(header->sortIndicatorSection(), header->sortIndicatorOrder()); 118 item_model->sort(header->sortIndicatorSection(), header->sortIndicatorOrder());
115} 119}
@@ -143,9 +147,15 @@ void GameListWorker::AddFstEntriesToGameList(const std::string& dir_path, bool d
143 LOG_WARNING(Frontend, "Filetype and extension of file %s do not match.", physical_name.c_str()); 147 LOG_WARNING(Frontend, "Filetype and extension of file %s do not match.", physical_name.c_str());
144 } 148 }
145 149
150 std::vector<u8> smdh;
151 std::unique_ptr<Loader::AppLoader> loader = Loader::GetLoader(FileUtil::IOFile(physical_name, "rb"), filetype, filename_filename, physical_name);
152
153 if (loader)
154 loader->ReadIcon(smdh);
155
146 emit EntryReady({ 156 emit EntryReady({
157 new GameListItemPath(QString::fromStdString(physical_name), smdh),
147 new GameListItem(QString::fromStdString(Loader::GetFileTypeString(filetype))), 158 new GameListItem(QString::fromStdString(Loader::GetFileTypeString(filetype))),
148 new GameListItemPath(QString::fromStdString(physical_name)),
149 new GameListItemSize(FileUtil::GetSize(physical_name)), 159 new GameListItemSize(FileUtil::GetSize(physical_name)),
150 }); 160 });
151 } 161 }
diff --git a/src/citra_qt/game_list.h b/src/citra_qt/game_list.h
index 48febdc60..198674f04 100644
--- a/src/citra_qt/game_list.h
+++ b/src/citra_qt/game_list.h
@@ -20,8 +20,8 @@ class GameList : public QWidget {
20 20
21public: 21public:
22 enum { 22 enum {
23 COLUMN_FILE_TYPE,
24 COLUMN_NAME, 23 COLUMN_NAME,
24 COLUMN_FILE_TYPE,
25 COLUMN_SIZE, 25 COLUMN_SIZE,
26 COLUMN_COUNT, // Number of columns 26 COLUMN_COUNT, // Number of columns
27 }; 27 };
diff --git a/src/citra_qt/game_list_p.h b/src/citra_qt/game_list_p.h
index 820012bce..284f5da81 100644
--- a/src/citra_qt/game_list_p.h
+++ b/src/citra_qt/game_list_p.h
@@ -6,13 +6,85 @@
6 6
7#include <atomic> 7#include <atomic>
8 8
9#include <QImage>
9#include <QRunnable> 10#include <QRunnable>
10#include <QStandardItem> 11#include <QStandardItem>
11#include <QString> 12#include <QString>
12 13
13#include "citra_qt/util/util.h" 14#include "citra_qt/util/util.h"
14#include "common/string_util.h" 15#include "common/string_util.h"
16#include "common/color.h"
15 17
18#include "core/loader/loader.h"
19
20#include "video_core/utils.h"
21
22/**
23 * Tests if data is a valid SMDH by its length and magic number.
24 * @param smdh_data data buffer to test
25 * @return bool test result
26 */
27static bool IsValidSMDH(const std::vector<u8>& smdh_data) {
28 if (smdh_data.size() < sizeof(Loader::SMDH))
29 return false;
30
31 u32 magic;
32 memcpy(&magic, smdh_data.data(), 4);
33
34 return Loader::MakeMagic('S', 'M', 'D', 'H') == magic;
35}
36
37/**
38 * Gets game icon from SMDH
39 * @param sdmh SMDH data
40 * @param large If true, returns large icon (48x48), otherwise returns small icon (24x24)
41 * @return QPixmap game icon
42 */
43static QPixmap GetIconFromSMDH(const Loader::SMDH& smdh, bool large) {
44 u32 size;
45 const u8* icon_data;
46
47 if (large) {
48 size = 48;
49 icon_data = smdh.large_icon.data();
50 } else {
51 size = 24;
52 icon_data = smdh.small_icon.data();
53 }
54
55 QImage icon(size, size, QImage::Format::Format_RGB888);
56 for (u32 x = 0; x < size; ++x) {
57 for (u32 y = 0; y < size; ++y) {
58 u32 coarse_y = y & ~7;
59 auto v = Color::DecodeRGB565(
60 icon_data + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * size * 2);
61 icon.setPixel(x, y, qRgb(v.r(), v.g(), v.b()));
62 }
63 }
64 return QPixmap::fromImage(icon);
65}
66
67/**
68 * Gets the default icon (for games without valid SMDH)
69 * @param large If true, returns large icon (48x48), otherwise returns small icon (24x24)
70 * @return QPixmap default icon
71 */
72static QPixmap GetDefaultIcon(bool large) {
73 int size = large ? 48 : 24;
74 QPixmap icon(size, size);
75 icon.fill(Qt::transparent);
76 return icon;
77}
78
79/**
80 * Gets the short game title fromn SMDH
81 * @param sdmh SMDH data
82 * @param language title language
83 * @return QString short title
84 */
85static QString GetShortTitleFromSMDH(const Loader::SMDH& smdh, Loader::SMDH::TitleLanguage language) {
86 return QString::fromUtf16(smdh.titles[static_cast<int>(language)].short_title.data());
87}
16 88
17class GameListItem : public QStandardItem { 89class GameListItem : public QStandardItem {
18 90
@@ -27,29 +99,43 @@ public:
27 * A specialization of GameListItem for path values. 99 * A specialization of GameListItem for path values.
28 * This class ensures that for every full path value it holds, a correct string representation 100 * This class ensures that for every full path value it holds, a correct string representation
29 * of just the filename (with no extension) will be displayed to the user. 101 * of just the filename (with no extension) will be displayed to the user.
102 * If this class recieves valid SMDH data, it will also display game icons and titles.
30 */ 103 */
31class GameListItemPath : public GameListItem { 104class GameListItemPath : public GameListItem {
32 105
33public: 106public:
34 static const int FullPathRole = Qt::UserRole + 1; 107 static const int FullPathRole = Qt::UserRole + 1;
108 static const int TitleRole = Qt::UserRole + 2;
35 109
36 GameListItemPath(): GameListItem() {} 110 GameListItemPath(): GameListItem() {}
37 GameListItemPath(const QString& game_path): GameListItem() 111 GameListItemPath(const QString& game_path, const std::vector<u8>& smdh_data): GameListItem()
38 { 112 {
39 setData(game_path, FullPathRole); 113 setData(game_path, FullPathRole);
114
115 if (!IsValidSMDH(smdh_data)) {
116 // SMDH is not valid, set a default icon
117 setData(GetDefaultIcon(true), Qt::DecorationRole);
118 return;
119 }
120
121 Loader::SMDH smdh;
122 memcpy(&smdh, smdh_data.data(), sizeof(Loader::SMDH));
123
124 // Get icon from SMDH
125 setData(GetIconFromSMDH(smdh, true), Qt::DecorationRole);
126
127 // Get title form SMDH
128 setData(GetShortTitleFromSMDH(smdh, Loader::SMDH::TitleLanguage::English), TitleRole);
40 } 129 }
41 130
42 void setData(const QVariant& value, int role) override 131 QVariant data(int role) const override {
43 { 132 if (role == Qt::DisplayRole) {
44 // By specializing setData for FullPathRole, we can ensure that the two string
45 // representations of the data are always accurate and in the correct format.
46 if (role == FullPathRole) {
47 std::string filename; 133 std::string filename;
48 Common::SplitPath(value.toString().toStdString(), nullptr, &filename, nullptr); 134 Common::SplitPath(data(FullPathRole).toString().toStdString(), nullptr, &filename, nullptr);
49 GameListItem::setData(QString::fromStdString(filename), Qt::DisplayRole); 135 QString title = data(TitleRole).toString();
50 GameListItem::setData(value, FullPathRole); 136 return QString::fromStdString(filename) + (title.isEmpty() ? "" : "\n " + title);
51 } else { 137 } else {
52 GameListItem::setData(value, role); 138 return GameListItem::data(role);
53 } 139 }
54 } 140 }
55}; 141};
diff --git a/src/citra_qt/main.cpp b/src/citra_qt/main.cpp
index 2ca1e51f6..a85c94a4b 100644
--- a/src/citra_qt/main.cpp
+++ b/src/citra_qt/main.cpp
@@ -6,6 +6,9 @@
6#include <memory> 6#include <memory>
7#include <thread> 7#include <thread>
8 8
9#include <glad/glad.h>
10
11#define QT_NO_OPENGL
9#include <QDesktopWidget> 12#include <QDesktopWidget>
10#include <QtGui> 13#include <QtGui>
11#include <QFileDialog> 14#include <QFileDialog>
@@ -69,8 +72,10 @@ GMainWindow::GMainWindow() : config(new Config()), emu_thread(nullptr)
69 addDockWidget(Qt::BottomDockWidgetArea, profilerWidget); 72 addDockWidget(Qt::BottomDockWidgetArea, profilerWidget);
70 profilerWidget->hide(); 73 profilerWidget->hide();
71 74
75#if MICROPROFILE_ENABLED
72 microProfileDialog = new MicroProfileDialog(this); 76 microProfileDialog = new MicroProfileDialog(this);
73 microProfileDialog->hide(); 77 microProfileDialog->hide();
78#endif
74 79
75 disasmWidget = new DisassemblerWidget(this, emu_thread.get()); 80 disasmWidget = new DisassemblerWidget(this, emu_thread.get());
76 addDockWidget(Qt::BottomDockWidgetArea, disasmWidget); 81 addDockWidget(Qt::BottomDockWidgetArea, disasmWidget);
@@ -110,7 +115,9 @@ GMainWindow::GMainWindow() : config(new Config()), emu_thread(nullptr)
110 115
111 QMenu* debug_menu = ui.menu_View->addMenu(tr("Debugging")); 116 QMenu* debug_menu = ui.menu_View->addMenu(tr("Debugging"));
112 debug_menu->addAction(profilerWidget->toggleViewAction()); 117 debug_menu->addAction(profilerWidget->toggleViewAction());
118#if MICROPROFILE_ENABLED
113 debug_menu->addAction(microProfileDialog->toggleViewAction()); 119 debug_menu->addAction(microProfileDialog->toggleViewAction());
120#endif
114 debug_menu->addAction(disasmWidget->toggleViewAction()); 121 debug_menu->addAction(disasmWidget->toggleViewAction());
115 debug_menu->addAction(registersWidget->toggleViewAction()); 122 debug_menu->addAction(registersWidget->toggleViewAction());
116 debug_menu->addAction(callstackWidget->toggleViewAction()); 123 debug_menu->addAction(callstackWidget->toggleViewAction());
@@ -136,8 +143,10 @@ GMainWindow::GMainWindow() : config(new Config()), emu_thread(nullptr)
136 restoreGeometry(UISettings::values.geometry); 143 restoreGeometry(UISettings::values.geometry);
137 restoreState(UISettings::values.state); 144 restoreState(UISettings::values.state);
138 render_window->restoreGeometry(UISettings::values.renderwindow_geometry); 145 render_window->restoreGeometry(UISettings::values.renderwindow_geometry);
146#if MICROPROFILE_ENABLED
139 microProfileDialog->restoreGeometry(UISettings::values.microprofile_geometry); 147 microProfileDialog->restoreGeometry(UISettings::values.microprofile_geometry);
140 microProfileDialog->setVisible(UISettings::values.microprofile_visible); 148 microProfileDialog->setVisible(UISettings::values.microprofile_visible);
149#endif
141 150
142 game_list->LoadInterfaceLayout(); 151 game_list->LoadInterfaceLayout();
143 152
@@ -234,6 +243,14 @@ bool GMainWindow::InitializeSystem() {
234 if (emu_thread != nullptr) 243 if (emu_thread != nullptr)
235 ShutdownGame(); 244 ShutdownGame();
236 245
246 render_window->MakeCurrent();
247 if (!gladLoadGL()) {
248 QMessageBox::critical(this, tr("Error while starting Citra!"),
249 tr("Failed to initialize the video core!\n\n"
250 "Please ensure that your GPU supports OpenGL 3.3 and that you have the latest graphics driver."));
251 return false;
252 }
253
237 // Initialize the core emulation 254 // Initialize the core emulation
238 System::Result system_result = System::Init(render_window); 255 System::Result system_result = System::Init(render_window);
239 if (System::Result::Success != system_result) { 256 if (System::Result::Success != system_result) {
@@ -511,9 +528,10 @@ void GMainWindow::closeEvent(QCloseEvent* event) {
511 UISettings::values.geometry = saveGeometry(); 528 UISettings::values.geometry = saveGeometry();
512 UISettings::values.state = saveState(); 529 UISettings::values.state = saveState();
513 UISettings::values.renderwindow_geometry = render_window->saveGeometry(); 530 UISettings::values.renderwindow_geometry = render_window->saveGeometry();
531#if MICROPROFILE_ENABLED
514 UISettings::values.microprofile_geometry = microProfileDialog->saveGeometry(); 532 UISettings::values.microprofile_geometry = microProfileDialog->saveGeometry();
515 UISettings::values.microprofile_visible = microProfileDialog->isVisible(); 533 UISettings::values.microprofile_visible = microProfileDialog->isVisible();
516 534#endif
517 UISettings::values.single_window_mode = ui.action_Single_Window_Mode->isChecked(); 535 UISettings::values.single_window_mode = ui.action_Single_Window_Mode->isChecked();
518 UISettings::values.display_titlebar = ui.actionDisplay_widget_title_bars->isChecked(); 536 UISettings::values.display_titlebar = ui.actionDisplay_widget_title_bars->isChecked();
519 UISettings::values.first_start = false; 537 UISettings::values.first_start = false;
diff --git a/src/citra_qt/util/util.cpp b/src/citra_qt/util/util.cpp
index 8734a8efd..2f9beb5cc 100644
--- a/src/citra_qt/util/util.cpp
+++ b/src/citra_qt/util/util.cpp
@@ -19,7 +19,7 @@ QString ReadableByteSize(qulonglong size) {
19 static const std::array<const char*, 6> units = { "B", "KiB", "MiB", "GiB", "TiB", "PiB" }; 19 static const std::array<const char*, 6> units = { "B", "KiB", "MiB", "GiB", "TiB", "PiB" };
20 if (size == 0) 20 if (size == 0)
21 return "0"; 21 return "0";
22 int digit_groups = std::min<int>((int)(std::log10(size) / std::log10(1024)), units.size()); 22 int digit_groups = std::min<int>(static_cast<int>(std::log10(size) / std::log10(1024)), static_cast<int>(units.size()));
23 return QString("%L1 %2").arg(size / std::pow(1024, digit_groups), 0, 'f', 1) 23 return QString("%L1 %2").arg(size / std::pow(1024, digit_groups), 0, 'f', 1)
24 .arg(units[digit_groups]); 24 .arg(units[digit_groups]);
25} 25}
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index c839ce173..aa6eee2a3 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -47,7 +47,6 @@ set(HEADERS
47 microprofile.h 47 microprofile.h
48 microprofileui.h 48 microprofileui.h
49 platform.h 49 platform.h
50 profiler.h
51 profiler_reporting.h 50 profiler_reporting.h
52 scm_rev.h 51 scm_rev.h
53 scope_exit.h 52 scope_exit.h
diff --git a/src/common/assert.h b/src/common/assert.h
index 6849778b7..cd9b819a9 100644
--- a/src/common/assert.h
+++ b/src/common/assert.h
@@ -39,6 +39,7 @@ static void assert_noinline_call(const Fn& fn) {
39 }); } while (0) 39 }); } while (0)
40 40
41#define UNREACHABLE() ASSERT_MSG(false, "Unreachable code!") 41#define UNREACHABLE() ASSERT_MSG(false, "Unreachable code!")
42#define UNREACHABLE_MSG(...) ASSERT_MSG(false, __VA_ARGS__)
42 43
43#ifdef _DEBUG 44#ifdef _DEBUG
44#define DEBUG_ASSERT(_a_) ASSERT(_a_) 45#define DEBUG_ASSERT(_a_) ASSERT(_a_)
@@ -49,3 +50,4 @@ static void assert_noinline_call(const Fn& fn) {
49#endif 50#endif
50 51
51#define UNIMPLEMENTED() DEBUG_ASSERT_MSG(false, "Unimplemented code!") 52#define UNIMPLEMENTED() DEBUG_ASSERT_MSG(false, "Unimplemented code!")
53#define UNIMPLEMENTED_MSG(_a_, ...) ASSERT_MSG(false, _a_, __VA_ARGS__) \ No newline at end of file
diff --git a/src/common/bit_field.h b/src/common/bit_field.h
index 371eb17a1..4748999ed 100644
--- a/src/common/bit_field.h
+++ b/src/common/bit_field.h
@@ -186,5 +186,5 @@ private:
186#pragma pack() 186#pragma pack()
187 187
188#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER) 188#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER)
189static_assert(std::is_trivially_copyable<BitField<0, 1, u32>>::value, "BitField must be trivially copyable"); 189static_assert(std::is_trivially_copyable<BitField<0, 1, unsigned>>::value, "BitField must be trivially copyable");
190#endif 190#endif
diff --git a/src/common/bit_set.h b/src/common/bit_set.h
index 85f91e786..7f5de8df2 100644
--- a/src/common/bit_set.h
+++ b/src/common/bit_set.h
@@ -7,6 +7,7 @@
7#include <intrin.h> 7#include <intrin.h>
8#endif 8#endif
9#include <initializer_list> 9#include <initializer_list>
10#include <new>
10#include <type_traits> 11#include <type_traits>
11#include "common/common_types.h" 12#include "common/common_types.h"
12 13
@@ -186,4 +187,4 @@ public:
186typedef Common::BitSet<u8> BitSet8; 187typedef Common::BitSet<u8> BitSet8;
187typedef Common::BitSet<u16> BitSet16; 188typedef Common::BitSet<u16> BitSet16;
188typedef Common::BitSet<u32> BitSet32; 189typedef Common::BitSet<u32> BitSet32;
189typedef Common::BitSet<u64> BitSet64; \ No newline at end of file 190typedef Common::BitSet<u64> BitSet64;
diff --git a/src/common/code_block.h b/src/common/code_block.h
index 9ef7296d3..2fa4a0090 100644
--- a/src/common/code_block.h
+++ b/src/common/code_block.h
@@ -4,8 +4,10 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "common_types.h" 7#include <cstddef>
8#include "memory_util.h" 8
9#include "common/common_types.h"
10#include "common/memory_util.h"
9 11
10// Everything that needs to generate code should inherit from this. 12// Everything that needs to generate code should inherit from this.
11// You get memory management for free, plus, you can use all emitter functions without 13// You get memory management for free, plus, you can use all emitter functions without
diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h
index aa6aff7b9..ab3515683 100644
--- a/src/common/common_funcs.h
+++ b/src/common/common_funcs.h
@@ -4,6 +4,10 @@
4 4
5#pragma once 5#pragma once
6 6
7#if !defined(ARCHITECTURE_x86_64) && !defined(_M_ARM)
8#include <cstdlib> // for exit
9#endif
10
7#include "common_types.h" 11#include "common_types.h"
8 12
9#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) 13#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
diff --git a/src/common/file_util.cpp b/src/common/file_util.cpp
index 53700c865..6e2867658 100644
--- a/src/common/file_util.cpp
+++ b/src/common/file_util.cpp
@@ -69,9 +69,10 @@ static void StripTailDirSlashes(std::string &fname)
69{ 69{
70 if (fname.length() > 1) 70 if (fname.length() > 1)
71 { 71 {
72 size_t i = fname.length() - 1; 72 size_t i = fname.length();
73 while (fname[i] == DIR_SEP_CHR) 73 while (i > 0 && fname[i - 1] == DIR_SEP_CHR)
74 fname[i--] = '\0'; 74 --i;
75 fname.resize(i);
75 } 76 }
76 return; 77 return;
77} 78}
@@ -85,6 +86,10 @@ bool Exists(const std::string &filename)
85 StripTailDirSlashes(copy); 86 StripTailDirSlashes(copy);
86 87
87#ifdef _WIN32 88#ifdef _WIN32
89 // Windows needs a slash to identify a driver root
90 if (copy.size() != 0 && copy.back() == ':')
91 copy += DIR_SEP_CHR;
92
88 int result = _wstat64(Common::UTF8ToUTF16W(copy).c_str(), &file_info); 93 int result = _wstat64(Common::UTF8ToUTF16W(copy).c_str(), &file_info);
89#else 94#else
90 int result = stat64(copy.c_str(), &file_info); 95 int result = stat64(copy.c_str(), &file_info);
@@ -102,6 +107,10 @@ bool IsDirectory(const std::string &filename)
102 StripTailDirSlashes(copy); 107 StripTailDirSlashes(copy);
103 108
104#ifdef _WIN32 109#ifdef _WIN32
110 // Windows needs a slash to identify a driver root
111 if (copy.size() != 0 && copy.back() == ':')
112 copy += DIR_SEP_CHR;
113
105 int result = _wstat64(Common::UTF8ToUTF16W(copy).c_str(), &file_info); 114 int result = _wstat64(Common::UTF8ToUTF16W(copy).c_str(), &file_info);
106#else 115#else
107 int result = stat64(copy.c_str(), &file_info); 116 int result = stat64(copy.c_str(), &file_info);
diff --git a/src/common/file_util.h b/src/common/file_util.h
index b54a9fb72..c6a8694ce 100644
--- a/src/common/file_util.h
+++ b/src/common/file_util.h
@@ -7,9 +7,9 @@
7#include <array> 7#include <array>
8#include <fstream> 8#include <fstream>
9#include <functional> 9#include <functional>
10#include <cstddef>
11#include <cstdio> 10#include <cstdio>
12#include <string> 11#include <string>
12#include <type_traits>
13#include <vector> 13#include <vector>
14 14
15#include "common/common_types.h" 15#include "common/common_types.h"
@@ -192,7 +192,9 @@ public:
192 size_t ReadArray(T* data, size_t length) 192 size_t ReadArray(T* data, size_t length)
193 { 193 {
194 static_assert(std::is_standard_layout<T>(), "Given array does not consist of standard layout objects"); 194 static_assert(std::is_standard_layout<T>(), "Given array does not consist of standard layout objects");
195#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER)
195 static_assert(std::is_trivially_copyable<T>(), "Given array does not consist of trivially copyable objects"); 196 static_assert(std::is_trivially_copyable<T>(), "Given array does not consist of trivially copyable objects");
197#endif
196 198
197 if (!IsOpen()) { 199 if (!IsOpen()) {
198 m_good = false; 200 m_good = false;
@@ -210,7 +212,9 @@ public:
210 size_t WriteArray(const T* data, size_t length) 212 size_t WriteArray(const T* data, size_t length)
211 { 213 {
212 static_assert(std::is_standard_layout<T>(), "Given array does not consist of standard layout objects"); 214 static_assert(std::is_standard_layout<T>(), "Given array does not consist of standard layout objects");
215#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER)
213 static_assert(std::is_trivially_copyable<T>(), "Given array does not consist of trivially copyable objects"); 216 static_assert(std::is_trivially_copyable<T>(), "Given array does not consist of trivially copyable objects");
217#endif
214 218
215 if (!IsOpen()) { 219 if (!IsOpen()) {
216 m_good = false; 220 m_good = false;
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp
index 3d39f94d5..d7008fc66 100644
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -65,6 +65,7 @@ namespace Log {
65 SUB(Render, OpenGL) \ 65 SUB(Render, OpenGL) \
66 CLS(Audio) \ 66 CLS(Audio) \
67 SUB(Audio, DSP) \ 67 SUB(Audio, DSP) \
68 SUB(Audio, Sink) \
68 CLS(Loader) 69 CLS(Loader)
69 70
70// GetClassName is a macro defined by Windows.h, grrr... 71// GetClassName is a macro defined by Windows.h, grrr...
diff --git a/src/common/logging/log.h b/src/common/logging/log.h
index 521362317..c6910b1c7 100644
--- a/src/common/logging/log.h
+++ b/src/common/logging/log.h
@@ -78,8 +78,9 @@ enum class Class : ClassType {
78 Render, ///< Emulator video output and hardware acceleration 78 Render, ///< Emulator video output and hardware acceleration
79 Render_Software, ///< Software renderer backend 79 Render_Software, ///< Software renderer backend
80 Render_OpenGL, ///< OpenGL backend 80 Render_OpenGL, ///< OpenGL backend
81 Audio, ///< Emulator audio output 81 Audio, ///< Audio emulation
82 Audio_DSP, ///< The HLE implementation of the DSP 82 Audio_DSP, ///< The HLE implementation of the DSP
83 Audio_Sink, ///< Emulator audio output backend
83 Loader, ///< ROM loader 84 Loader, ///< ROM loader
84 85
85 Count ///< Total number of logging classes 86 Count ///< Total number of logging classes
diff --git a/src/common/microprofile.h b/src/common/microprofile.h
index d3b6cb97c..ef312c6e1 100644
--- a/src/common/microprofile.h
+++ b/src/common/microprofile.h
@@ -4,6 +4,10 @@
4 4
5#pragma once 5#pragma once
6 6
7// Uncomment this to disable microprofile. This will get you cleaner profiles when using
8// external sampling profilers like "Very Sleepy", and will improve performance somewhat.
9// #define MICROPROFILE_ENABLED 0
10
7// Customized Citra settings. 11// Customized Citra settings.
8// This file wraps the MicroProfile header so that these are consistent everywhere. 12// This file wraps the MicroProfile header so that these are consistent everywhere.
9#define MICROPROFILE_WEBSERVER 0 13#define MICROPROFILE_WEBSERVER 0
diff --git a/src/common/microprofileui.h b/src/common/microprofileui.h
index 97c369bd9..41abe6b75 100644
--- a/src/common/microprofileui.h
+++ b/src/common/microprofileui.h
@@ -13,4 +13,7 @@
13#define MICROPROFILE_HELP_ALT "Right-Click" 13#define MICROPROFILE_HELP_ALT "Right-Click"
14#define MICROPROFILE_HELP_MOD "Ctrl" 14#define MICROPROFILE_HELP_MOD "Ctrl"
15 15
16// This isn't included by microprofileui.h :(
17#include <cstdlib> // For std::abs
18
16#include <microprofileui.h> 19#include <microprofileui.h>
diff --git a/src/common/profiler.cpp b/src/common/profiler.cpp
index 7792edd2f..49eb3f40c 100644
--- a/src/common/profiler.cpp
+++ b/src/common/profiler.cpp
@@ -7,71 +7,16 @@
7#include <vector> 7#include <vector>
8 8
9#include "common/assert.h" 9#include "common/assert.h"
10#include "common/profiler.h"
11#include "common/profiler_reporting.h" 10#include "common/profiler_reporting.h"
12#include "common/synchronized_wrapper.h" 11#include "common/synchronized_wrapper.h"
13 12
14#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013.
15 #define WIN32_LEAN_AND_MEAN
16 #include <Windows.h> // For QueryPerformanceCounter/Frequency
17#endif
18
19namespace Common { 13namespace Common {
20namespace Profiling { 14namespace Profiling {
21 15
22#if ENABLE_PROFILING
23thread_local Timer* Timer::current_timer = nullptr;
24#endif
25
26#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013
27QPCClock::time_point QPCClock::now() {
28 static LARGE_INTEGER freq;
29 // Use this dummy local static to ensure this gets initialized once.
30 static BOOL dummy = QueryPerformanceFrequency(&freq);
31
32 LARGE_INTEGER ticks;
33 QueryPerformanceCounter(&ticks);
34
35 // This is prone to overflow when multiplying, which is why I'm using micro instead of nano. The
36 // correct way to approach this would be to just return ticks as a time_point and then subtract
37 // and do this conversion when creating a duration from two time_points, however, as far as I
38 // could tell the C++ requirements for these types are incompatible with this approach.
39 return time_point(duration(ticks.QuadPart * std::micro::den / freq.QuadPart));
40}
41#endif
42
43TimingCategory::TimingCategory(const char* name, TimingCategory* parent)
44 : accumulated_duration(0) {
45
46 ProfilingManager& manager = GetProfilingManager();
47 category_id = manager.RegisterTimingCategory(this, name);
48 if (parent != nullptr)
49 manager.SetTimingCategoryParent(category_id, parent->category_id);
50}
51
52ProfilingManager::ProfilingManager() 16ProfilingManager::ProfilingManager()
53 : last_frame_end(Clock::now()), this_frame_start(Clock::now()) { 17 : last_frame_end(Clock::now()), this_frame_start(Clock::now()) {
54} 18}
55 19
56unsigned int ProfilingManager::RegisterTimingCategory(TimingCategory* category, const char* name) {
57 TimingCategoryInfo info;
58 info.category = category;
59 info.name = name;
60 info.parent = TimingCategoryInfo::NO_PARENT;
61
62 unsigned int id = (unsigned int)timing_categories.size();
63 timing_categories.push_back(std::move(info));
64
65 return id;
66}
67
68void ProfilingManager::SetTimingCategoryParent(unsigned int category, unsigned int parent) {
69 ASSERT(category < timing_categories.size());
70 ASSERT(parent < timing_categories.size());
71
72 timing_categories[category].parent = parent;
73}
74
75void ProfilingManager::BeginFrame() { 20void ProfilingManager::BeginFrame() {
76 this_frame_start = Clock::now(); 21 this_frame_start = Clock::now();
77} 22}
@@ -82,11 +27,6 @@ void ProfilingManager::FinishFrame() {
82 results.interframe_time = now - last_frame_end; 27 results.interframe_time = now - last_frame_end;
83 results.frame_time = now - this_frame_start; 28 results.frame_time = now - this_frame_start;
84 29
85 results.time_per_category.resize(timing_categories.size());
86 for (size_t i = 0; i < timing_categories.size(); ++i) {
87 results.time_per_category[i] = timing_categories[i].category->GetAccumulatedTime();
88 }
89
90 last_frame_end = now; 30 last_frame_end = now;
91} 31}
92 32
@@ -100,26 +40,9 @@ void TimingResultsAggregator::Clear() {
100 window_size = cursor = 0; 40 window_size = cursor = 0;
101} 41}
102 42
103void TimingResultsAggregator::SetNumberOfCategories(size_t n) {
104 size_t old_size = times_per_category.size();
105 if (n == old_size)
106 return;
107
108 times_per_category.resize(n);
109
110 for (size_t i = old_size; i < n; ++i) {
111 times_per_category[i].resize(max_window_size, Duration::zero());
112 }
113}
114
115void TimingResultsAggregator::AddFrame(const ProfilingFrameResult& frame_result) { 43void TimingResultsAggregator::AddFrame(const ProfilingFrameResult& frame_result) {
116 SetNumberOfCategories(frame_result.time_per_category.size());
117
118 interframe_times[cursor] = frame_result.interframe_time; 44 interframe_times[cursor] = frame_result.interframe_time;
119 frame_times[cursor] = frame_result.frame_time; 45 frame_times[cursor] = frame_result.frame_time;
120 for (size_t i = 0; i < frame_result.time_per_category.size(); ++i) {
121 times_per_category[i][cursor] = frame_result.time_per_category[i];
122 }
123 46
124 ++cursor; 47 ++cursor;
125 if (cursor == max_window_size) 48 if (cursor == max_window_size)
@@ -162,11 +85,6 @@ AggregatedFrameResult TimingResultsAggregator::GetAggregatedResults() const {
162 result.fps = 0.0f; 85 result.fps = 0.0f;
163 } 86 }
164 87
165 result.time_per_category.resize(times_per_category.size());
166 for (size_t i = 0; i < times_per_category.size(); ++i) {
167 result.time_per_category[i] = AggregateField(times_per_category[i], window_size);
168 }
169
170 return result; 88 return result;
171} 89}
172 90
diff --git a/src/common/profiler.h b/src/common/profiler.h
deleted file mode 100644
index 3e967b4bc..000000000
--- a/src/common/profiler.h
+++ /dev/null
@@ -1,152 +0,0 @@
1// Copyright 2015 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <atomic>
8#include <chrono>
9
10#include "common/assert.h"
11#include "common/thread.h"
12
13namespace Common {
14namespace Profiling {
15
16// If this is defined to 0, it turns all Timers into no-ops.
17#ifndef ENABLE_PROFILING
18#define ENABLE_PROFILING 1
19#endif
20
21#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013
22// MSVC up to 2013 doesn't use QueryPerformanceCounter for high_resolution_clock, so it has bad
23// precision. We manually implement a clock based on QPC to get good results.
24
25struct QPCClock {
26 using duration = std::chrono::microseconds;
27 using time_point = std::chrono::time_point<QPCClock>;
28 using rep = duration::rep;
29 using period = duration::period;
30 static const bool is_steady = false;
31
32 static time_point now();
33};
34
35using Clock = QPCClock;
36#else
37using Clock = std::chrono::high_resolution_clock;
38#endif
39
40using Duration = Clock::duration;
41
42/**
43 * Represents a timing category that measured time can be accounted towards. Should be declared as a
44 * global variable and passed to Timers.
45 */
46class TimingCategory final {
47public:
48 TimingCategory(const char* name, TimingCategory* parent = nullptr);
49
50 unsigned int GetCategoryId() const {
51 return category_id;
52 }
53
54 /// Adds some time to this category. Can safely be called from multiple threads at the same time.
55 void AddTime(Duration amount) {
56 std::atomic_fetch_add_explicit(
57 &accumulated_duration, amount.count(),
58 std::memory_order_relaxed);
59 }
60
61 /**
62 * Atomically retrieves the accumulated measured time for this category and resets the counter
63 * to zero. Can be safely called concurrently with AddTime.
64 */
65 Duration GetAccumulatedTime() {
66 return Duration(std::atomic_exchange_explicit(
67 &accumulated_duration, (Duration::rep)0,
68 std::memory_order_relaxed));
69 }
70
71private:
72 unsigned int category_id;
73 std::atomic<Duration::rep> accumulated_duration;
74};
75
76/**
77 * Measures time elapsed between a call to Start and a call to Stop and attributes it to the given
78 * TimingCategory. Start/Stop can be called multiple times on the same timer, but each call must be
79 * appropriately paired.
80 *
81 * When a Timer is started, it automatically pauses a previously running timer on the same thread,
82 * which is resumed when it is stopped. As such, no special action needs to be taken to avoid
83 * double-accounting of time on two categories.
84 */
85class Timer {
86public:
87 Timer(TimingCategory& category) : category(category) {
88 }
89
90 void Start() {
91#if ENABLE_PROFILING
92 ASSERT(!running);
93 previous_timer = current_timer;
94 current_timer = this;
95 if (previous_timer != nullptr)
96 previous_timer->StopTiming();
97
98 StartTiming();
99#endif
100 }
101
102 void Stop() {
103#if ENABLE_PROFILING
104 ASSERT(running);
105 StopTiming();
106
107 if (previous_timer != nullptr)
108 previous_timer->StartTiming();
109 current_timer = previous_timer;
110#endif
111 }
112
113private:
114#if ENABLE_PROFILING
115 void StartTiming() {
116 start = Clock::now();
117 running = true;
118 }
119
120 void StopTiming() {
121 auto duration = Clock::now() - start;
122 running = false;
123 category.AddTime(std::chrono::duration_cast<Duration>(duration));
124 }
125
126 Clock::time_point start;
127 bool running = false;
128
129 Timer* previous_timer;
130 static thread_local Timer* current_timer;
131#endif
132
133 TimingCategory& category;
134};
135
136/**
137 * A Timer that automatically starts timing when created and stops at the end of the scope. Should
138 * be used in the majority of cases.
139 */
140class ScopeTimer : public Timer {
141public:
142 ScopeTimer(TimingCategory& category) : Timer(category) {
143 Start();
144 }
145
146 ~ScopeTimer() {
147 Stop();
148 }
149};
150
151} // namespace Profiling
152} // namespace Common
diff --git a/src/common/profiler_reporting.h b/src/common/profiler_reporting.h
index df98e05b7..fa1ac883f 100644
--- a/src/common/profiler_reporting.h
+++ b/src/common/profiler_reporting.h
@@ -4,22 +4,17 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <chrono>
7#include <cstddef> 8#include <cstddef>
8#include <vector> 9#include <vector>
9 10
10#include "common/profiler.h"
11#include "common/synchronized_wrapper.h" 11#include "common/synchronized_wrapper.h"
12 12
13namespace Common { 13namespace Common {
14namespace Profiling { 14namespace Profiling {
15 15
16struct TimingCategoryInfo { 16using Clock = std::chrono::high_resolution_clock;
17 static const unsigned int NO_PARENT = -1; 17using Duration = Clock::duration;
18
19 TimingCategory* category;
20 const char* name;
21 unsigned int parent;
22};
23 18
24struct ProfilingFrameResult { 19struct ProfilingFrameResult {
25 /// Time since the last delivered frame 20 /// Time since the last delivered frame
@@ -27,22 +22,12 @@ struct ProfilingFrameResult {
27 22
28 /// Time spent processing a frame, excluding VSync 23 /// Time spent processing a frame, excluding VSync
29 Duration frame_time; 24 Duration frame_time;
30
31 /// Total amount of time spent inside each category in this frame. Indexed by the category id
32 std::vector<Duration> time_per_category;
33}; 25};
34 26
35class ProfilingManager final { 27class ProfilingManager final {
36public: 28public:
37 ProfilingManager(); 29 ProfilingManager();
38 30
39 unsigned int RegisterTimingCategory(TimingCategory* category, const char* name);
40 void SetTimingCategoryParent(unsigned int category, unsigned int parent);
41
42 const std::vector<TimingCategoryInfo>& GetTimingCategoriesInfo() const {
43 return timing_categories;
44 }
45
46 /// This should be called after swapping screen buffers. 31 /// This should be called after swapping screen buffers.
47 void BeginFrame(); 32 void BeginFrame();
48 /// This should be called before swapping screen buffers. 33 /// This should be called before swapping screen buffers.
@@ -54,7 +39,6 @@ public:
54 } 39 }
55 40
56private: 41private:
57 std::vector<TimingCategoryInfo> timing_categories;
58 Clock::time_point last_frame_end; 42 Clock::time_point last_frame_end;
59 Clock::time_point this_frame_start; 43 Clock::time_point this_frame_start;
60 44
@@ -73,9 +57,6 @@ struct AggregatedFrameResult {
73 AggregatedDuration frame_time; 57 AggregatedDuration frame_time;
74 58
75 float fps; 59 float fps;
76
77 /// Total amount of time spent inside each category in this frame. Indexed by the category id
78 std::vector<AggregatedDuration> time_per_category;
79}; 60};
80 61
81class TimingResultsAggregator final { 62class TimingResultsAggregator final {
@@ -83,7 +64,6 @@ public:
83 TimingResultsAggregator(size_t window_size); 64 TimingResultsAggregator(size_t window_size);
84 65
85 void Clear(); 66 void Clear();
86 void SetNumberOfCategories(size_t n);
87 67
88 void AddFrame(const ProfilingFrameResult& frame_result); 68 void AddFrame(const ProfilingFrameResult& frame_result);
89 69
@@ -95,7 +75,6 @@ public:
95 75
96 std::vector<Duration> interframe_times; 76 std::vector<Duration> interframe_times;
97 std::vector<Duration> frame_times; 77 std::vector<Duration> frame_times;
98 std::vector<std::vector<Duration>> times_per_category;
99}; 78};
100 79
101ProfilingManager& GetProfilingManager(); 80ProfilingManager& GetProfilingManager();
diff --git a/src/common/x64/emitter.h b/src/common/x64/emitter.h
index a33724146..60a77dfe1 100644
--- a/src/common/x64/emitter.h
+++ b/src/common/x64/emitter.h
@@ -17,6 +17,8 @@
17 17
18#pragma once 18#pragma once
19 19
20#include <cstddef>
21
20#include "common/assert.h" 22#include "common/assert.h"
21#include "common/bit_set.h" 23#include "common/bit_set.h"
22#include "common/common_types.h" 24#include "common/common_types.h"
diff --git a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
index 647784208..8d4b26815 100644
--- a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
+++ b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
@@ -10,7 +10,6 @@
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "common/logging/log.h" 11#include "common/logging/log.h"
12#include "common/microprofile.h" 12#include "common/microprofile.h"
13#include "common/profiler.h"
14 13
15#include "core/memory.h" 14#include "core/memory.h"
16#include "core/hle/svc.h" 15#include "core/hle/svc.h"
@@ -25,9 +24,6 @@
25 24
26#include "core/gdbstub/gdbstub.h" 25#include "core/gdbstub/gdbstub.h"
27 26
28Common::Profiling::TimingCategory profile_execute("DynCom::Execute");
29Common::Profiling::TimingCategory profile_decode("DynCom::Decode");
30
31enum { 27enum {
32 COND = (1 << 0), 28 COND = (1 << 0),
33 NON_BRANCH = (1 << 1), 29 NON_BRANCH = (1 << 1),
@@ -3496,7 +3492,6 @@ static unsigned int InterpreterTranslateInstruction(const ARMul_State* cpu, cons
3496} 3492}
3497 3493
3498static int InterpreterTranslateBlock(ARMul_State* cpu, int& bb_start, u32 addr) { 3494static int InterpreterTranslateBlock(ARMul_State* cpu, int& bb_start, u32 addr) {
3499 Common::Profiling::ScopeTimer timer_decode(profile_decode);
3500 MICROPROFILE_SCOPE(DynCom_Decode); 3495 MICROPROFILE_SCOPE(DynCom_Decode);
3501 3496
3502 // Decode instruction, get index 3497 // Decode instruction, get index
@@ -3530,7 +3525,6 @@ static int InterpreterTranslateBlock(ARMul_State* cpu, int& bb_start, u32 addr)
3530} 3525}
3531 3526
3532static int InterpreterTranslateSingle(ARMul_State* cpu, int& bb_start, u32 addr) { 3527static int InterpreterTranslateSingle(ARMul_State* cpu, int& bb_start, u32 addr) {
3533 Common::Profiling::ScopeTimer timer_decode(profile_decode);
3534 MICROPROFILE_SCOPE(DynCom_Decode); 3528 MICROPROFILE_SCOPE(DynCom_Decode);
3535 3529
3536 ARM_INST_PTR inst_base = nullptr; 3530 ARM_INST_PTR inst_base = nullptr;
@@ -3565,7 +3559,6 @@ static int clz(unsigned int x) {
3565MICROPROFILE_DEFINE(DynCom_Execute, "DynCom", "Execute", MP_RGB(255, 0, 0)); 3559MICROPROFILE_DEFINE(DynCom_Execute, "DynCom", "Execute", MP_RGB(255, 0, 0));
3566 3560
3567unsigned InterpreterMainLoop(ARMul_State* cpu) { 3561unsigned InterpreterMainLoop(ARMul_State* cpu) {
3568 Common::Profiling::ScopeTimer timer_execute(profile_execute);
3569 MICROPROFILE_SCOPE(DynCom_Execute); 3562 MICROPROFILE_SCOPE(DynCom_Execute);
3570 3563
3571 GDBStub::BreakpointAddress breakpoint_data; 3564 GDBStub::BreakpointAddress breakpoint_data;
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 3bb843aab..cabab744a 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -51,7 +51,7 @@ void RunLoop(int tight_loop) {
51 } 51 }
52 52
53 HW::Update(); 53 HW::Update();
54 if (HLE::g_reschedule) { 54 if (HLE::IsReschedulePending()) {
55 Kernel::Reschedule(); 55 Kernel::Reschedule();
56 } 56 }
57} 57}
diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp
index c1a7ec5bf..1360ee845 100644
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -374,7 +374,7 @@ static void SendReply(const char* reply) {
374 374
375 memset(command_buffer, 0, sizeof(command_buffer)); 375 memset(command_buffer, 0, sizeof(command_buffer));
376 376
377 command_length = strlen(reply); 377 command_length = static_cast<u32>(strlen(reply));
378 if (command_length + 4 > sizeof(command_buffer)) { 378 if (command_length + 4 > sizeof(command_buffer)) {
379 LOG_ERROR(Debug_GDBStub, "command_buffer overflow in SendReply"); 379 LOG_ERROR(Debug_GDBStub, "command_buffer overflow in SendReply");
380 return; 380 return;
@@ -515,7 +515,7 @@ static bool IsDataAvailable() {
515 return false; 515 return false;
516 } 516 }
517 517
518 return FD_ISSET(gdbserver_socket, &fd_socket); 518 return FD_ISSET(gdbserver_socket, &fd_socket) != 0;
519} 519}
520 520
521/// Send requested register to gdb client. 521/// Send requested register to gdb client.
@@ -529,7 +529,7 @@ static void ReadRegister() {
529 id |= HexCharToValue(command_buffer[2]); 529 id |= HexCharToValue(command_buffer[2]);
530 } 530 }
531 531
532 if (id >= R0_REGISTER && id <= R15_REGISTER) { 532 if (id <= R15_REGISTER) {
533 IntToGdbHex(reply, Core::g_app_core->GetReg(id)); 533 IntToGdbHex(reply, Core::g_app_core->GetReg(id));
534 } else if (id == CPSR_REGISTER) { 534 } else if (id == CPSR_REGISTER) {
535 IntToGdbHex(reply, Core::g_app_core->GetCPSR()); 535 IntToGdbHex(reply, Core::g_app_core->GetCPSR());
@@ -584,7 +584,7 @@ static void WriteRegister() {
584 id |= HexCharToValue(command_buffer[2]); 584 id |= HexCharToValue(command_buffer[2]);
585 } 585 }
586 586
587 if (id >= R0_REGISTER && id <= R15_REGISTER) { 587 if (id <= R15_REGISTER) {
588 Core::g_app_core->SetReg(id, GdbHexToInt(buffer_ptr)); 588 Core::g_app_core->SetReg(id, GdbHexToInt(buffer_ptr));
589 } else if (id == CPSR_REGISTER) { 589 } else if (id == CPSR_REGISTER) {
590 Core::g_app_core->SetCPSR(GdbHexToInt(buffer_ptr)); 590 Core::g_app_core->SetCPSR(GdbHexToInt(buffer_ptr));
@@ -633,10 +633,10 @@ static void ReadMemory() {
633 633
634 auto start_offset = command_buffer+1; 634 auto start_offset = command_buffer+1;
635 auto addr_pos = std::find(start_offset, command_buffer+command_length, ','); 635 auto addr_pos = std::find(start_offset, command_buffer+command_length, ',');
636 PAddr addr = HexToInt(start_offset, addr_pos - start_offset); 636 PAddr addr = HexToInt(start_offset, static_cast<u32>(addr_pos - start_offset));
637 637
638 start_offset = addr_pos+1; 638 start_offset = addr_pos+1;
639 u32 len = HexToInt(start_offset, (command_buffer + command_length) - start_offset); 639 u32 len = HexToInt(start_offset, static_cast<u32>((command_buffer + command_length) - start_offset));
640 640
641 LOG_DEBUG(Debug_GDBStub, "gdb: addr: %08x len: %08x\n", addr, len); 641 LOG_DEBUG(Debug_GDBStub, "gdb: addr: %08x len: %08x\n", addr, len);
642 642
@@ -658,11 +658,11 @@ static void ReadMemory() {
658static void WriteMemory() { 658static void WriteMemory() {
659 auto start_offset = command_buffer+1; 659 auto start_offset = command_buffer+1;
660 auto addr_pos = std::find(start_offset, command_buffer+command_length, ','); 660 auto addr_pos = std::find(start_offset, command_buffer+command_length, ',');
661 PAddr addr = HexToInt(start_offset, addr_pos - start_offset); 661 PAddr addr = HexToInt(start_offset, static_cast<u32>(addr_pos - start_offset));
662 662
663 start_offset = addr_pos+1; 663 start_offset = addr_pos+1;
664 auto len_pos = std::find(start_offset, command_buffer+command_length, ':'); 664 auto len_pos = std::find(start_offset, command_buffer+command_length, ':');
665 u32 len = HexToInt(start_offset, len_pos - start_offset); 665 u32 len = HexToInt(start_offset, static_cast<u32>(len_pos - start_offset));
666 666
667 u8* dst = Memory::GetPointer(addr); 667 u8* dst = Memory::GetPointer(addr);
668 if (!dst) { 668 if (!dst) {
@@ -752,10 +752,10 @@ static void AddBreakpoint() {
752 752
753 auto start_offset = command_buffer+3; 753 auto start_offset = command_buffer+3;
754 auto addr_pos = std::find(start_offset, command_buffer+command_length, ','); 754 auto addr_pos = std::find(start_offset, command_buffer+command_length, ',');
755 PAddr addr = HexToInt(start_offset, addr_pos - start_offset); 755 PAddr addr = HexToInt(start_offset, static_cast<u32>(addr_pos - start_offset));
756 756
757 start_offset = addr_pos+1; 757 start_offset = addr_pos+1;
758 u32 len = HexToInt(start_offset, (command_buffer + command_length) - start_offset); 758 u32 len = HexToInt(start_offset, static_cast<u32>((command_buffer + command_length) - start_offset));
759 759
760 if (type == BreakpointType::Access) { 760 if (type == BreakpointType::Access) {
761 // Access is made up of Read and Write types, so add both breakpoints 761 // Access is made up of Read and Write types, so add both breakpoints
@@ -800,10 +800,10 @@ static void RemoveBreakpoint() {
800 800
801 auto start_offset = command_buffer+3; 801 auto start_offset = command_buffer+3;
802 auto addr_pos = std::find(start_offset, command_buffer+command_length, ','); 802 auto addr_pos = std::find(start_offset, command_buffer+command_length, ',');
803 PAddr addr = HexToInt(start_offset, addr_pos - start_offset); 803 PAddr addr = HexToInt(start_offset, static_cast<u32>(addr_pos - start_offset));
804 804
805 start_offset = addr_pos+1; 805 start_offset = addr_pos+1;
806 u32 len = HexToInt(start_offset, (command_buffer + command_length) - start_offset); 806 u32 len = HexToInt(start_offset, static_cast<u32>((command_buffer + command_length) - start_offset));
807 807
808 if (type == BreakpointType::Access) { 808 if (type == BreakpointType::Access) {
809 // Access is made up of Read and Write types, so add both breakpoints 809 // Access is made up of Read and Write types, so add both breakpoints
diff --git a/src/core/hle/applets/mii_selector.cpp b/src/core/hle/applets/mii_selector.cpp
index 708d2f630..b4456ca90 100644
--- a/src/core/hle/applets/mii_selector.cpp
+++ b/src/core/hle/applets/mii_selector.cpp
@@ -21,13 +21,6 @@
21namespace HLE { 21namespace HLE {
22namespace Applets { 22namespace Applets {
23 23
24MiiSelector::MiiSelector(Service::APT::AppletId id) : Applet(id), started(false) {
25 // Create the SharedMemory that will hold the framebuffer data
26 // TODO(Subv): What size should we use here?
27 using Kernel::MemoryPermission;
28 framebuffer_memory = Kernel::SharedMemory::Create(0x1000, MemoryPermission::ReadWrite, MemoryPermission::ReadWrite, "MiiSelector Memory");
29}
30
31ResultCode MiiSelector::ReceiveParameter(const Service::APT::MessageParameter& parameter) { 24ResultCode MiiSelector::ReceiveParameter(const Service::APT::MessageParameter& parameter) {
32 if (parameter.signal != static_cast<u32>(Service::APT::SignalType::LibAppJustStarted)) { 25 if (parameter.signal != static_cast<u32>(Service::APT::SignalType::LibAppJustStarted)) {
33 LOG_ERROR(Service_APT, "unsupported signal %u", parameter.signal); 26 LOG_ERROR(Service_APT, "unsupported signal %u", parameter.signal);
@@ -36,8 +29,18 @@ ResultCode MiiSelector::ReceiveParameter(const Service::APT::MessageParameter& p
36 return ResultCode(-1); 29 return ResultCode(-1);
37 } 30 }
38 31
32 // The LibAppJustStarted message contains a buffer with the size of the framebuffer shared memory.
33 // Create the SharedMemory that will hold the framebuffer data
34 Service::APT::CaptureBufferInfo capture_info;
35 ASSERT(sizeof(capture_info) == parameter.buffer_size);
36
37 memcpy(&capture_info, parameter.data, sizeof(capture_info));
38 using Kernel::MemoryPermission;
39 framebuffer_memory = Kernel::SharedMemory::Create(capture_info.size, MemoryPermission::ReadWrite,
40 MemoryPermission::ReadWrite, "MiiSelector Memory");
41
42 // Send the response message with the newly created SharedMemory
39 Service::APT::MessageParameter result; 43 Service::APT::MessageParameter result;
40 // The buffer passed in parameter contains the data returned by GSPGPU::ImportDisplayCaptureInfo
41 result.signal = static_cast<u32>(Service::APT::SignalType::LibAppFinished); 44 result.signal = static_cast<u32>(Service::APT::SignalType::LibAppFinished);
42 result.data = nullptr; 45 result.data = nullptr;
43 result.buffer_size = 0; 46 result.buffer_size = 0;
@@ -55,6 +58,11 @@ ResultCode MiiSelector::StartImpl(const Service::APT::AppletStartupParameter& pa
55 // TODO(Subv): Set the expected fields in the response buffer before resending it to the application. 58 // TODO(Subv): Set the expected fields in the response buffer before resending it to the application.
56 // TODO(Subv): Reverse the parameter format for the Mii Selector 59 // TODO(Subv): Reverse the parameter format for the Mii Selector
57 60
61 if(parameter.buffer_size >= sizeof(u32)) {
62 // TODO: defaults return no error, but garbage in other unknown fields
63 memset(parameter.data, 0, sizeof(u32));
64 }
65
58 // Let the application know that we're closing 66 // Let the application know that we're closing
59 Service::APT::MessageParameter message; 67 Service::APT::MessageParameter message;
60 message.buffer_size = parameter.buffer_size; 68 message.buffer_size = parameter.buffer_size;
diff --git a/src/core/hle/applets/mii_selector.h b/src/core/hle/applets/mii_selector.h
index 6a3e7c8eb..be6b04642 100644
--- a/src/core/hle/applets/mii_selector.h
+++ b/src/core/hle/applets/mii_selector.h
@@ -16,17 +16,61 @@
16namespace HLE { 16namespace HLE {
17namespace Applets { 17namespace Applets {
18 18
19struct MiiConfig {
20 u8 unk_000;
21 u8 unk_001;
22 u8 unk_002;
23 u8 unk_003;
24 u8 unk_004;
25 INSERT_PADDING_BYTES(3);
26 u16 unk_008;
27 INSERT_PADDING_BYTES(0x8C - 0xA);
28 u8 unk_08C;
29 INSERT_PADDING_BYTES(3);
30 u16 unk_090;
31 INSERT_PADDING_BYTES(2);
32 u32 unk_094;
33 u16 unk_098;
34 u8 unk_09A[0x64];
35 u8 unk_0FE;
36 u8 unk_0FF;
37 u32 unk_100;
38};
39
40static_assert(sizeof(MiiConfig) == 0x104, "MiiConfig structure has incorrect size");
41#define ASSERT_REG_POSITION(field_name, position) static_assert(offsetof(MiiConfig, field_name) == position, "Field "#field_name" has invalid position")
42ASSERT_REG_POSITION(unk_008, 0x08);
43ASSERT_REG_POSITION(unk_08C, 0x8C);
44ASSERT_REG_POSITION(unk_090, 0x90);
45ASSERT_REG_POSITION(unk_094, 0x94);
46ASSERT_REG_POSITION(unk_0FE, 0xFE);
47#undef ASSERT_REG_POSITION
48
49struct MiiResult {
50 u32 result_code;
51 u8 unk_04;
52 INSERT_PADDING_BYTES(7);
53 u8 unk_0C[0x60];
54 u8 unk_6C[0x16];
55 INSERT_PADDING_BYTES(2);
56};
57static_assert(sizeof(MiiResult) == 0x84, "MiiResult structure has incorrect size");
58#define ASSERT_REG_POSITION(field_name, position) static_assert(offsetof(MiiResult, field_name) == position, "Field "#field_name" has invalid position")
59ASSERT_REG_POSITION(unk_0C, 0x0C);
60ASSERT_REG_POSITION(unk_6C, 0x6C);
61#undef ASSERT_REG_POSITION
62
19class MiiSelector final : public Applet { 63class MiiSelector final : public Applet {
20public: 64public:
21 MiiSelector(Service::APT::AppletId id); 65 MiiSelector(Service::APT::AppletId id) : Applet(id), started(false) { }
22 66
23 ResultCode ReceiveParameter(const Service::APT::MessageParameter& parameter) override; 67 ResultCode ReceiveParameter(const Service::APT::MessageParameter& parameter) override;
24 ResultCode StartImpl(const Service::APT::AppletStartupParameter& parameter) override; 68 ResultCode StartImpl(const Service::APT::AppletStartupParameter& parameter) override;
25 void Update() override; 69 void Update() override;
26 bool IsRunning() const override { return started; } 70 bool IsRunning() const override { return started; }
27 71
28 /// TODO(Subv): Find out what this is actually used for. 72 /// This SharedMemory will be created when we receive the LibAppJustStarted message.
29 /// It is believed that the application stores the current screen image here. 73 /// It holds the framebuffer info retrieved by the application with GSPGPU::ImportDisplayCaptureInfo
30 Kernel::SharedPtr<Kernel::SharedMemory> framebuffer_memory; 74 Kernel::SharedPtr<Kernel::SharedMemory> framebuffer_memory;
31 75
32 /// Whether this applet is currently running instead of the host application or not. 76 /// Whether this applet is currently running instead of the host application or not.
diff --git a/src/core/hle/applets/swkbd.cpp b/src/core/hle/applets/swkbd.cpp
index 1db6b5a17..87238aa1c 100644
--- a/src/core/hle/applets/swkbd.cpp
+++ b/src/core/hle/applets/swkbd.cpp
@@ -24,13 +24,6 @@
24namespace HLE { 24namespace HLE {
25namespace Applets { 25namespace Applets {
26 26
27SoftwareKeyboard::SoftwareKeyboard(Service::APT::AppletId id) : Applet(id), started(false) {
28 // Create the SharedMemory that will hold the framebuffer data
29 // TODO(Subv): What size should we use here?
30 using Kernel::MemoryPermission;
31 framebuffer_memory = Kernel::SharedMemory::Create(0x1000, MemoryPermission::ReadWrite, MemoryPermission::ReadWrite, "SoftwareKeyboard Memory");
32}
33
34ResultCode SoftwareKeyboard::ReceiveParameter(Service::APT::MessageParameter const& parameter) { 27ResultCode SoftwareKeyboard::ReceiveParameter(Service::APT::MessageParameter const& parameter) {
35 if (parameter.signal != static_cast<u32>(Service::APT::SignalType::LibAppJustStarted)) { 28 if (parameter.signal != static_cast<u32>(Service::APT::SignalType::LibAppJustStarted)) {
36 LOG_ERROR(Service_APT, "unsupported signal %u", parameter.signal); 29 LOG_ERROR(Service_APT, "unsupported signal %u", parameter.signal);
@@ -39,8 +32,19 @@ ResultCode SoftwareKeyboard::ReceiveParameter(Service::APT::MessageParameter con
39 return ResultCode(-1); 32 return ResultCode(-1);
40 } 33 }
41 34
35 // The LibAppJustStarted message contains a buffer with the size of the framebuffer shared memory.
36 // Create the SharedMemory that will hold the framebuffer data
37 Service::APT::CaptureBufferInfo capture_info;
38 ASSERT(sizeof(capture_info) == parameter.buffer_size);
39
40 memcpy(&capture_info, parameter.data, sizeof(capture_info));
41
42 using Kernel::MemoryPermission;
43 framebuffer_memory = Kernel::SharedMemory::Create(capture_info.size, MemoryPermission::ReadWrite,
44 MemoryPermission::ReadWrite, "SoftwareKeyboard Memory");
45
46 // Send the response message with the newly created SharedMemory
42 Service::APT::MessageParameter result; 47 Service::APT::MessageParameter result;
43 // The buffer passed in parameter contains the data returned by GSPGPU::ImportDisplayCaptureInfo
44 result.signal = static_cast<u32>(Service::APT::SignalType::LibAppFinished); 48 result.signal = static_cast<u32>(Service::APT::SignalType::LibAppFinished);
45 result.data = nullptr; 49 result.data = nullptr;
46 result.buffer_size = 0; 50 result.buffer_size = 0;
diff --git a/src/core/hle/applets/swkbd.h b/src/core/hle/applets/swkbd.h
index cb95b8d90..cf26a8fb7 100644
--- a/src/core/hle/applets/swkbd.h
+++ b/src/core/hle/applets/swkbd.h
@@ -53,8 +53,7 @@ static_assert(sizeof(SoftwareKeyboardConfig) == 0x400, "Software Keyboard Config
53 53
54class SoftwareKeyboard final : public Applet { 54class SoftwareKeyboard final : public Applet {
55public: 55public:
56 SoftwareKeyboard(Service::APT::AppletId id); 56 SoftwareKeyboard(Service::APT::AppletId id) : Applet(id), started(false) { }
57 ~SoftwareKeyboard() {}
58 57
59 ResultCode ReceiveParameter(const Service::APT::MessageParameter& parameter) override; 58 ResultCode ReceiveParameter(const Service::APT::MessageParameter& parameter) override;
60 ResultCode StartImpl(const Service::APT::AppletStartupParameter& parameter) override; 59 ResultCode StartImpl(const Service::APT::AppletStartupParameter& parameter) override;
@@ -72,8 +71,8 @@ public:
72 */ 71 */
73 void Finalize(); 72 void Finalize();
74 73
75 /// TODO(Subv): Find out what this is actually used for. 74 /// This SharedMemory will be created when we receive the LibAppJustStarted message.
76 /// It is believed that the application stores the current screen image here. 75 /// It holds the framebuffer info retrieved by the application with GSPGPU::ImportDisplayCaptureInfo
77 Kernel::SharedPtr<Kernel::SharedMemory> framebuffer_memory; 76 Kernel::SharedPtr<Kernel::SharedMemory> framebuffer_memory;
78 77
79 /// SharedMemory where the output text will be stored 78 /// SharedMemory where the output text will be stored
diff --git a/src/core/hle/hle.cpp b/src/core/hle/hle.cpp
index e545de3b5..5c5373517 100644
--- a/src/core/hle/hle.cpp
+++ b/src/core/hle/hle.cpp
@@ -12,9 +12,13 @@
12 12
13//////////////////////////////////////////////////////////////////////////////////////////////////// 13////////////////////////////////////////////////////////////////////////////////////////////////////
14 14
15namespace HLE { 15namespace {
16
17bool reschedule; ///< If true, immediately reschedules the CPU to a new thread
16 18
17bool g_reschedule; ///< If true, immediately reschedules the CPU to a new thread 19}
20
21namespace HLE {
18 22
19void Reschedule(const char *reason) { 23void Reschedule(const char *reason) {
20 DEBUG_ASSERT_MSG(reason != nullptr && strlen(reason) < 256, "Reschedule: Invalid or too long reason."); 24 DEBUG_ASSERT_MSG(reason != nullptr && strlen(reason) < 256, "Reschedule: Invalid or too long reason.");
@@ -27,13 +31,21 @@ void Reschedule(const char *reason) {
27 31
28 Core::g_app_core->PrepareReschedule(); 32 Core::g_app_core->PrepareReschedule();
29 33
30 g_reschedule = true; 34 reschedule = true;
35}
36
37bool IsReschedulePending() {
38 return reschedule;
39}
40
41void DoneRescheduling() {
42 reschedule = false;
31} 43}
32 44
33void Init() { 45void Init() {
34 Service::Init(); 46 Service::Init();
35 47
36 g_reschedule = false; 48 reschedule = false;
37 49
38 LOG_DEBUG(Kernel, "initialized OK"); 50 LOG_DEBUG(Kernel, "initialized OK");
39} 51}
diff --git a/src/core/hle/hle.h b/src/core/hle/hle.h
index e0b97797c..69ac0ade6 100644
--- a/src/core/hle/hle.h
+++ b/src/core/hle/hle.h
@@ -13,9 +13,9 @@ const Handle INVALID_HANDLE = 0;
13 13
14namespace HLE { 14namespace HLE {
15 15
16extern bool g_reschedule; ///< If true, immediately reschedules the CPU to a new thread
17
18void Reschedule(const char *reason); 16void Reschedule(const char *reason);
17bool IsReschedulePending();
18void DoneRescheduling();
19 19
20void Init(); 20void Init();
21void Shutdown(); 21void Shutdown();
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index bf32f653d..6dc95d0f1 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -483,7 +483,8 @@ void Reschedule() {
483 483
484 Thread* cur = GetCurrentThread(); 484 Thread* cur = GetCurrentThread();
485 Thread* next = PopNextReadyThread(); 485 Thread* next = PopNextReadyThread();
486 HLE::g_reschedule = false; 486
487 HLE::DoneRescheduling();
487 488
488 // Don't bother switching to the same thread 489 // Don't bother switching to the same thread
489 if (next == cur) 490 if (next == cur)
diff --git a/src/core/hle/result.h b/src/core/hle/result.h
index 2d22652d9..3fc1ab4ee 100644
--- a/src/core/hle/result.h
+++ b/src/core/hle/result.h
@@ -5,7 +5,6 @@
5#pragma once 5#pragma once
6 6
7#include <new> 7#include <new>
8#include <type_traits>
9#include <utility> 8#include <utility>
10 9
11#include "common/assert.h" 10#include "common/assert.h"
@@ -18,6 +17,7 @@
18/// Detailed description of the error. This listing is likely incomplete. 17/// Detailed description of the error. This listing is likely incomplete.
19enum class ErrorDescription : u32 { 18enum class ErrorDescription : u32 {
20 Success = 0, 19 Success = 0,
20 OS_InvalidBufferDescriptor = 48,
21 WrongAddress = 53, 21 WrongAddress = 53,
22 FS_NotFound = 120, 22 FS_NotFound = 120,
23 FS_AlreadyExists = 190, 23 FS_AlreadyExists = 190,
diff --git a/src/core/hle/service/ac_u.cpp b/src/core/hle/service/ac_u.cpp
index d67325506..5241dd3e7 100644
--- a/src/core/hle/service/ac_u.cpp
+++ b/src/core/hle/service/ac_u.cpp
@@ -3,6 +3,8 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/logging/log.h" 5#include "common/logging/log.h"
6
7#include "core/hle/kernel/event.h"
6#include "core/hle/service/ac_u.h" 8#include "core/hle/service/ac_u.h"
7 9
8//////////////////////////////////////////////////////////////////////////////////////////////////// 10////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -11,6 +13,28 @@
11namespace AC_U { 13namespace AC_U {
12 14
13/** 15/**
16 * AC_U::CloseAsync service function
17 * Inputs:
18 * 1 : Always 0x20
19 * 3 : Always 0
20 * 4 : Event handle, should be signaled when AC connection is closed
21 * Outputs:
22 * 1 : Result of function, 0 on success, otherwise error code
23 */
24static void CloseAsync(Service::Interface* self) {
25 u32* cmd_buff = Kernel::GetCommandBuffer();
26
27 auto evt = Kernel::g_handle_table.Get<Kernel::Event>(cmd_buff[4]);
28
29 if (evt) {
30 evt->name = "AC_U:close_event";
31 evt->Signal();
32 }
33 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
34
35 LOG_WARNING(Service_AC, "(STUBBED) called");
36}
37/**
14 * AC_U::GetWifiStatus service function 38 * AC_U::GetWifiStatus service function
15 * Outputs: 39 * Outputs:
16 * 1 : Result of function, 0 on success, otherwise error code 40 * 1 : Result of function, 0 on success, otherwise error code
@@ -47,7 +71,7 @@ const Interface::FunctionInfo FunctionTable[] = {
47 {0x00010000, nullptr, "CreateDefaultConfig"}, 71 {0x00010000, nullptr, "CreateDefaultConfig"},
48 {0x00040006, nullptr, "ConnectAsync"}, 72 {0x00040006, nullptr, "ConnectAsync"},
49 {0x00050002, nullptr, "GetConnectResult"}, 73 {0x00050002, nullptr, "GetConnectResult"},
50 {0x00080004, nullptr, "CloseAsync"}, 74 {0x00080004, CloseAsync, "CloseAsync"},
51 {0x00090002, nullptr, "GetCloseResult"}, 75 {0x00090002, nullptr, "GetCloseResult"},
52 {0x000A0000, nullptr, "GetLastErrorCode"}, 76 {0x000A0000, nullptr, "GetLastErrorCode"},
53 {0x000D0000, GetWifiStatus, "GetWifiStatus"}, 77 {0x000D0000, GetWifiStatus, "GetWifiStatus"},
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index 9591522e5..3f71e7f2b 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -43,7 +43,7 @@ void FindContentInfos(Service::Interface* self) {
43 am_content_count[media_type] = cmd_buff[4]; 43 am_content_count[media_type] = cmd_buff[4];
44 44
45 cmd_buff[1] = RESULT_SUCCESS.raw; 45 cmd_buff[1] = RESULT_SUCCESS.raw;
46 LOG_WARNING(Service_AM, "(STUBBED) media_type=%u, title_id=0x%016lx, content_cound=%u, content_ids_pointer=0x%08x, content_info_pointer=0x%08x", 46 LOG_WARNING(Service_AM, "(STUBBED) media_type=%u, title_id=0x%016llx, content_cound=%u, content_ids_pointer=0x%08x, content_info_pointer=0x%08x",
47 media_type, title_id, am_content_count[media_type], content_ids_pointer, content_info_pointer); 47 media_type, title_id, am_content_count[media_type], content_ids_pointer, content_info_pointer);
48} 48}
49 49
diff --git a/src/core/hle/service/apt/apt.h b/src/core/hle/service/apt/apt.h
index 668b4a66f..1a1034fcc 100644
--- a/src/core/hle/service/apt/apt.h
+++ b/src/core/hle/service/apt/apt.h
@@ -5,6 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include "common/common_types.h" 7#include "common/common_types.h"
8#include "common/swap.h"
8 9
9#include "core/hle/kernel/kernel.h" 10#include "core/hle/kernel/kernel.h"
10 11
@@ -31,6 +32,20 @@ struct AppletStartupParameter {
31 u8* data = nullptr; 32 u8* data = nullptr;
32}; 33};
33 34
35/// Used by the application to pass information about the current framebuffer to applets.
36struct CaptureBufferInfo {
37 u32_le size;
38 u8 is_3d;
39 INSERT_PADDING_BYTES(0x3); // Padding for alignment
40 u32_le top_screen_left_offset;
41 u32_le top_screen_right_offset;
42 u32_le top_screen_format;
43 u32_le bottom_screen_left_offset;
44 u32_le bottom_screen_right_offset;
45 u32_le bottom_screen_format;
46};
47static_assert(sizeof(CaptureBufferInfo) == 0x20, "CaptureBufferInfo struct has incorrect size");
48
34/// Signals used by APT functions 49/// Signals used by APT functions
35enum class SignalType : u32 { 50enum class SignalType : u32 {
36 None = 0x0, 51 None = 0x0,
diff --git a/src/core/hle/service/cfg/cfg.cpp b/src/core/hle/service/cfg/cfg.cpp
index 525432957..b9322c55d 100644
--- a/src/core/hle/service/cfg/cfg.cpp
+++ b/src/core/hle/service/cfg/cfg.cpp
@@ -389,6 +389,10 @@ ResultCode FormatConfig() {
389 res = CreateConfigInfoBlk(0x000F0004, sizeof(CONSOLE_MODEL), 0xC, &CONSOLE_MODEL); 389 res = CreateConfigInfoBlk(0x000F0004, sizeof(CONSOLE_MODEL), 0xC, &CONSOLE_MODEL);
390 if (!res.IsSuccess()) return res; 390 if (!res.IsSuccess()) return res;
391 391
392 // 0x00170000 - Unknown
393 res = CreateConfigInfoBlk(0x00170000, 0x4, 0xE, zero_buffer);
394 if (!res.IsSuccess()) return res;
395
392 // Save the buffer to the file 396 // Save the buffer to the file
393 res = UpdateConfigNANDSavegame(); 397 res = UpdateConfigNANDSavegame();
394 if (!res.IsSuccess()) 398 if (!res.IsSuccess())
diff --git a/src/core/hle/service/cfg/cfg.h b/src/core/hle/service/cfg/cfg.h
index 606ab99cf..c01806836 100644
--- a/src/core/hle/service/cfg/cfg.h
+++ b/src/core/hle/service/cfg/cfg.h
@@ -98,19 +98,6 @@ void GetCountryCodeString(Service::Interface* self);
98void GetCountryCodeID(Service::Interface* self); 98void GetCountryCodeID(Service::Interface* self);
99 99
100/** 100/**
101 * CFG::GetConfigInfoBlk2 service function
102 * Inputs:
103 * 0 : 0x00010082
104 * 1 : Size
105 * 2 : Block ID
106 * 3 : Descriptor for the output buffer
107 * 4 : Output buffer pointer
108 * Outputs:
109 * 1 : Result of function, 0 on success, otherwise error code
110 */
111void GetConfigInfoBlk2(Service::Interface* self);
112
113/**
114 * CFG::SecureInfoGetRegion service function 101 * CFG::SecureInfoGetRegion service function
115 * Inputs: 102 * Inputs:
116 * 1 : None 103 * 1 : None
diff --git a/src/core/hle/service/dsp_dsp.cpp b/src/core/hle/service/dsp_dsp.cpp
index 08e437125..274fc751a 100644
--- a/src/core/hle/service/dsp_dsp.cpp
+++ b/src/core/hle/service/dsp_dsp.cpp
@@ -2,6 +2,7 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
5#include <cinttypes> 6#include <cinttypes>
6 7
7#include "audio_core/hle/pipe.h" 8#include "audio_core/hle/pipe.h"
@@ -12,37 +13,80 @@
12#include "core/hle/kernel/event.h" 13#include "core/hle/kernel/event.h"
13#include "core/hle/service/dsp_dsp.h" 14#include "core/hle/service/dsp_dsp.h"
14 15
16using DspPipe = DSP::HLE::DspPipe;
17
15//////////////////////////////////////////////////////////////////////////////////////////////////// 18////////////////////////////////////////////////////////////////////////////////////////////////////
16// Namespace DSP_DSP 19// Namespace DSP_DSP
17 20
18namespace DSP_DSP { 21namespace DSP_DSP {
19 22
20static u32 read_pipe_count;
21static Kernel::SharedPtr<Kernel::Event> semaphore_event; 23static Kernel::SharedPtr<Kernel::Event> semaphore_event;
22 24
23struct PairHash { 25/// There are three types of interrupts
24 template <typename T, typename U> 26enum class InterruptType {
25 std::size_t operator()(const std::pair<T, U> &x) const { 27 Zero, One, Pipe
26 // TODO(yuriks): Replace with better hash combining function. 28};
27 return std::hash<T>()(x.first) ^ std::hash<U>()(x.second); 29constexpr size_t NUM_INTERRUPT_TYPE = 3;
30
31class InterruptEvents final {
32public:
33 void Signal(InterruptType type, DspPipe pipe) {
34 Kernel::SharedPtr<Kernel::Event>& event = Get(type, pipe);
35 if (event) {
36 event->Signal();
37 }
28 } 38 }
39
40 Kernel::SharedPtr<Kernel::Event>& Get(InterruptType type, DspPipe dsp_pipe) {
41 switch (type) {
42 case InterruptType::Zero:
43 return zero;
44 case InterruptType::One:
45 return one;
46 case InterruptType::Pipe: {
47 const size_t pipe_index = static_cast<size_t>(dsp_pipe);
48 ASSERT(pipe_index < DSP::HLE::NUM_DSP_PIPE);
49 return pipe[pipe_index];
50 }
51 }
52
53 UNREACHABLE_MSG("Invalid interrupt type = %zu", static_cast<size_t>(type));
54 }
55
56 bool HasTooManyEventsRegistered() const {
57 // Actual service implementation only has 6 'slots' for interrupts.
58 constexpr size_t max_number_of_interrupt_events = 6;
59
60 size_t number = std::count_if(pipe.begin(), pipe.end(), [](const auto& evt) {
61 return evt != nullptr;
62 });
63
64 if (zero != nullptr)
65 number++;
66 if (one != nullptr)
67 number++;
68
69 return number >= max_number_of_interrupt_events;
70 }
71
72private:
73 /// Currently unknown purpose
74 Kernel::SharedPtr<Kernel::Event> zero = nullptr;
75 /// Currently unknown purpose
76 Kernel::SharedPtr<Kernel::Event> one = nullptr;
77 /// Each DSP pipe has an associated interrupt
78 std::array<Kernel::SharedPtr<Kernel::Event>, DSP::HLE::NUM_DSP_PIPE> pipe = {{}};
29}; 79};
30 80
31/// Map of (audio interrupt number, channel number) to Kernel::Events. See: RegisterInterruptEvents 81static InterruptEvents interrupt_events;
32static std::unordered_map<std::pair<u32, u32>, Kernel::SharedPtr<Kernel::Event>, PairHash> interrupt_events;
33 82
34// DSP Interrupts: 83// DSP Interrupts:
35// Interrupt #2 occurs every frame tick. Userland programs normally have a thread that's waiting 84// The audio-pipe interrupt occurs every frame tick. Userland programs normally have a thread
36// for an interrupt event. Immediately after this interrupt event, userland normally updates the 85// that's waiting for an interrupt event. Immediately after this interrupt event, userland
37// state in the next region and increments the relevant frame counter by two. 86// normally updates the state in the next region and increments the relevant frame counter by
38void SignalAllInterrupts() { 87// two.
39 // HACK: The other interrupts have currently unknown purpose, we trigger them each tick in any case. 88void SignalPipeInterrupt(DspPipe pipe) {
40 for (auto& interrupt_event : interrupt_events) 89 interrupt_events.Signal(InterruptType::Pipe, pipe);
41 interrupt_event.second->Signal();
42}
43
44void SignalInterrupt(u32 interrupt, u32 channel) {
45 interrupt_events[std::make_pair(interrupt, channel)]->Signal();
46} 90}
47 91
48/** 92/**
@@ -58,7 +102,10 @@ static void ConvertProcessAddressFromDspDram(Service::Interface* self) {
58 102
59 u32 addr = cmd_buff[1]; 103 u32 addr = cmd_buff[1];
60 104
105 cmd_buff[0] = IPC::MakeHeader(0xC, 2, 0);
61 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 106 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
107
108 // TODO(merry): There is a per-region offset missing in this calculation (that seems to be always zero).
62 cmd_buff[2] = (addr << 1) + (Memory::DSP_RAM_VADDR + 0x40000); 109 cmd_buff[2] = (addr << 1) + (Memory::DSP_RAM_VADDR + 0x40000);
63 110
64 LOG_DEBUG(Service_DSP, "addr=0x%08X", addr); 111 LOG_DEBUG(Service_DSP, "addr=0x%08X", addr);
@@ -113,7 +160,9 @@ static void LoadComponent(Service::Interface* self) {
113static void GetSemaphoreEventHandle(Service::Interface* self) { 160static void GetSemaphoreEventHandle(Service::Interface* self) {
114 u32* cmd_buff = Kernel::GetCommandBuffer(); 161 u32* cmd_buff = Kernel::GetCommandBuffer();
115 162
163 cmd_buff[0] = IPC::MakeHeader(0x16, 1, 2);
116 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 164 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
165 // cmd_buff[2] not set
117 cmd_buff[3] = Kernel::g_handle_table.Create(semaphore_event).MoveFrom(); // Event handle 166 cmd_buff[3] = Kernel::g_handle_table.Create(semaphore_event).MoveFrom(); // Event handle
118 167
119 LOG_WARNING(Service_DSP, "(STUBBED) called"); 168 LOG_WARNING(Service_DSP, "(STUBBED) called");
@@ -138,8 +187,7 @@ static void FlushDataCache(Service::Interface* self) {
138 u32 size = cmd_buff[2]; 187 u32 size = cmd_buff[2];
139 u32 process = cmd_buff[4]; 188 u32 process = cmd_buff[4];
140 189
141 // TODO(purpasmart96): Verify return header on HW 190 cmd_buff[0] = IPC::MakeHeader(0x13, 1, 0);
142
143 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 191 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
144 192
145 LOG_TRACE(Service_DSP, "called address=0x%08X, size=0x%X, process=0x%08X", address, size, process); 193 LOG_TRACE(Service_DSP, "called address=0x%08X, size=0x%X, process=0x%08X", address, size, process);
@@ -148,8 +196,8 @@ static void FlushDataCache(Service::Interface* self) {
148/** 196/**
149 * DSP_DSP::RegisterInterruptEvents service function 197 * DSP_DSP::RegisterInterruptEvents service function
150 * Inputs: 198 * Inputs:
151 * 1 : Interrupt Number 199 * 1 : Interrupt Type
152 * 2 : Channel Number 200 * 2 : Pipe Number
153 * 4 : Interrupt event handle 201 * 4 : Interrupt event handle
154 * Outputs: 202 * Outputs:
155 * 1 : Result of function, 0 on success, otherwise error code 203 * 1 : Result of function, 0 on success, otherwise error code
@@ -157,23 +205,40 @@ static void FlushDataCache(Service::Interface* self) {
157static void RegisterInterruptEvents(Service::Interface* self) { 205static void RegisterInterruptEvents(Service::Interface* self) {
158 u32* cmd_buff = Kernel::GetCommandBuffer(); 206 u32* cmd_buff = Kernel::GetCommandBuffer();
159 207
160 u32 interrupt = cmd_buff[1]; 208 u32 type_index = cmd_buff[1];
161 u32 channel = cmd_buff[2]; 209 u32 pipe_index = cmd_buff[2];
162 u32 event_handle = cmd_buff[4]; 210 u32 event_handle = cmd_buff[4];
163 211
212 ASSERT_MSG(type_index < NUM_INTERRUPT_TYPE && pipe_index < DSP::HLE::NUM_DSP_PIPE,
213 "Invalid type or pipe: type = %u, pipe = %u", type_index, pipe_index);
214
215 InterruptType type = static_cast<InterruptType>(cmd_buff[1]);
216 DspPipe pipe = static_cast<DspPipe>(cmd_buff[2]);
217
218 cmd_buff[0] = IPC::MakeHeader(0x15, 1, 0);
219
164 if (event_handle) { 220 if (event_handle) {
165 auto evt = Kernel::g_handle_table.Get<Kernel::Event>(cmd_buff[4]); 221 auto evt = Kernel::g_handle_table.Get<Kernel::Event>(cmd_buff[4]);
166 if (evt) { 222
167 interrupt_events[std::make_pair(interrupt, channel)] = evt; 223 if (!evt) {
168 cmd_buff[1] = RESULT_SUCCESS.raw; 224 LOG_INFO(Service_DSP, "Invalid event handle! type=%u, pipe=%u, event_handle=0x%08X", type_index, pipe_index, event_handle);
169 LOG_INFO(Service_DSP, "Registered interrupt=%u, channel=%u, event_handle=0x%08X", interrupt, channel, event_handle); 225 ASSERT(false); // TODO: This should really be handled at an IPC translation layer.
170 } else { 226 }
171 LOG_CRITICAL(Service_DSP, "Invalid event handle! interrupt=%u, channel=%u, event_handle=0x%08X", interrupt, channel, event_handle); 227
172 ASSERT(false); // This should really be handled at a IPC translation layer. 228 if (interrupt_events.HasTooManyEventsRegistered()) {
229 LOG_INFO(Service_DSP, "Ran out of space to register interrupts (Attempted to register type=%u, pipe=%u, event_handle=0x%08X)",
230 type_index, pipe_index, event_handle);
231 cmd_buff[1] = ResultCode(ErrorDescription::InvalidResultValue, ErrorModule::DSP, ErrorSummary::OutOfResource, ErrorLevel::Status).raw;
232 return;
173 } 233 }
234
235 interrupt_events.Get(type, pipe) = evt;
236 LOG_INFO(Service_DSP, "Registered type=%u, pipe=%u, event_handle=0x%08X", type_index, pipe_index, event_handle);
237 cmd_buff[1] = RESULT_SUCCESS.raw;
174 } else { 238 } else {
175 interrupt_events.erase(std::make_pair(interrupt, channel)); 239 interrupt_events.Get(type, pipe) = nullptr;
176 LOG_INFO(Service_DSP, "Unregistered interrupt=%u, channel=%u, event_handle=0x%08X", interrupt, channel, event_handle); 240 LOG_INFO(Service_DSP, "Unregistered interrupt=%u, channel=%u, event_handle=0x%08X", type_index, pipe_index, event_handle);
241 cmd_buff[1] = RESULT_SUCCESS.raw;
177 } 242 }
178} 243}
179 244
@@ -187,6 +252,7 @@ static void RegisterInterruptEvents(Service::Interface* self) {
187static void SetSemaphore(Service::Interface* self) { 252static void SetSemaphore(Service::Interface* self) {
188 u32* cmd_buff = Kernel::GetCommandBuffer(); 253 u32* cmd_buff = Kernel::GetCommandBuffer();
189 254
255 cmd_buff[0] = IPC::MakeHeader(0x7, 1, 0);
190 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 256 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
191 257
192 LOG_WARNING(Service_DSP, "(STUBBED) called"); 258 LOG_WARNING(Service_DSP, "(STUBBED) called");
@@ -195,7 +261,7 @@ static void SetSemaphore(Service::Interface* self) {
195/** 261/**
196 * DSP_DSP::WriteProcessPipe service function 262 * DSP_DSP::WriteProcessPipe service function
197 * Inputs: 263 * Inputs:
198 * 1 : Channel 264 * 1 : Pipe Number
199 * 2 : Size 265 * 2 : Size
200 * 3 : (size << 14) | 0x402 266 * 3 : (size << 14) | 0x402
201 * 4 : Buffer 267 * 4 : Buffer
@@ -206,24 +272,32 @@ static void SetSemaphore(Service::Interface* self) {
206static void WriteProcessPipe(Service::Interface* self) { 272static void WriteProcessPipe(Service::Interface* self) {
207 u32* cmd_buff = Kernel::GetCommandBuffer(); 273 u32* cmd_buff = Kernel::GetCommandBuffer();
208 274
209 DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(cmd_buff[1]); 275 u32 pipe_index = cmd_buff[1];
210 u32 size = cmd_buff[2]; 276 u32 size = cmd_buff[2];
211 u32 buffer = cmd_buff[4]; 277 u32 buffer = cmd_buff[4];
212 278
213 ASSERT_MSG(IPC::StaticBufferDesc(size, 1) == cmd_buff[3], "IPC static buffer descriptor failed validation (0x%X). pipe=%u, size=0x%X, buffer=0x%08X", cmd_buff[3], pipe, size, buffer); 279 DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(pipe_index);
214 ASSERT_MSG(Memory::GetPointer(buffer) != nullptr, "Invalid Buffer: pipe=%u, size=0x%X, buffer=0x%08X", pipe, size, buffer);
215 280
216 std::vector<u8> message(size); 281 if (IPC::StaticBufferDesc(size, 1) != cmd_buff[3]) {
282 LOG_ERROR(Service_DSP, "IPC static buffer descriptor failed validation (0x%X). pipe=%u, size=0x%X, buffer=0x%08X", cmd_buff[3], pipe_index, size, buffer);
283 cmd_buff[0] = IPC::MakeHeader(0, 1, 0);
284 cmd_buff[1] = ResultCode(ErrorDescription::OS_InvalidBufferDescriptor, ErrorModule::OS, ErrorSummary::WrongArgument, ErrorLevel::Permanent).raw;
285 return;
286 }
287
288 ASSERT_MSG(Memory::GetPointer(buffer) != nullptr, "Invalid Buffer: pipe=%u, size=0x%X, buffer=0x%08X", pipe_index, size, buffer);
217 289
218 for (size_t i = 0; i < size; i++) { 290 std::vector<u8> message(size);
291 for (u32 i = 0; i < size; i++) {
219 message[i] = Memory::Read8(buffer + i); 292 message[i] = Memory::Read8(buffer + i);
220 } 293 }
221 294
222 DSP::HLE::PipeWrite(pipe, message); 295 DSP::HLE::PipeWrite(pipe, message);
223 296
297 cmd_buff[0] = IPC::MakeHeader(0xD, 1, 0);
224 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 298 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
225 299
226 LOG_DEBUG(Service_DSP, "pipe=%u, size=0x%X, buffer=0x%08X", pipe, size, buffer); 300 LOG_DEBUG(Service_DSP, "pipe=%u, size=0x%X, buffer=0x%08X", pipe_index, size, buffer);
227} 301}
228 302
229/** 303/**
@@ -243,13 +317,16 @@ static void WriteProcessPipe(Service::Interface* self) {
243static void ReadPipeIfPossible(Service::Interface* self) { 317static void ReadPipeIfPossible(Service::Interface* self) {
244 u32* cmd_buff = Kernel::GetCommandBuffer(); 318 u32* cmd_buff = Kernel::GetCommandBuffer();
245 319
246 DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(cmd_buff[1]); 320 u32 pipe_index = cmd_buff[1];
247 u32 unknown = cmd_buff[2]; 321 u32 unknown = cmd_buff[2];
248 u32 size = cmd_buff[3] & 0xFFFF; // Lower 16 bits are size 322 u32 size = cmd_buff[3] & 0xFFFF; // Lower 16 bits are size
249 VAddr addr = cmd_buff[0x41]; 323 VAddr addr = cmd_buff[0x41];
250 324
251 ASSERT_MSG(Memory::GetPointer(addr) != nullptr, "Invalid addr: pipe=0x%08X, unknown=0x%08X, size=0x%X, buffer=0x%08X", pipe, unknown, size, addr); 325 DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(pipe_index);
326
327 ASSERT_MSG(Memory::GetPointer(addr) != nullptr, "Invalid addr: pipe=%u, unknown=0x%08X, size=0x%X, buffer=0x%08X", pipe_index, unknown, size, addr);
252 328
329 cmd_buff[0] = IPC::MakeHeader(0x10, 1, 2);
253 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 330 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
254 if (DSP::HLE::GetPipeReadableSize(pipe) >= size) { 331 if (DSP::HLE::GetPipeReadableSize(pipe) >= size) {
255 std::vector<u8> response = DSP::HLE::PipeRead(pipe, size); 332 std::vector<u8> response = DSP::HLE::PipeRead(pipe, size);
@@ -260,8 +337,10 @@ static void ReadPipeIfPossible(Service::Interface* self) {
260 } else { 337 } else {
261 cmd_buff[2] = 0; // Return no data 338 cmd_buff[2] = 0; // Return no data
262 } 339 }
340 cmd_buff[3] = IPC::StaticBufferDesc(size, 0);
341 cmd_buff[4] = addr;
263 342
264 LOG_DEBUG(Service_DSP, "pipe=0x%08X, unknown=0x%08X, size=0x%X, buffer=0x%08X, return cmd_buff[2]=0x%08X", pipe, unknown, size, addr, cmd_buff[2]); 343 LOG_DEBUG(Service_DSP, "pipe=%u, unknown=0x%08X, size=0x%X, buffer=0x%08X, return cmd_buff[2]=0x%08X", pipe_index, unknown, size, addr, cmd_buff[2]);
265} 344}
266 345
267/** 346/**
@@ -278,26 +357,31 @@ static void ReadPipeIfPossible(Service::Interface* self) {
278static void ReadPipe(Service::Interface* self) { 357static void ReadPipe(Service::Interface* self) {
279 u32* cmd_buff = Kernel::GetCommandBuffer(); 358 u32* cmd_buff = Kernel::GetCommandBuffer();
280 359
281 DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(cmd_buff[1]); 360 u32 pipe_index = cmd_buff[1];
282 u32 unknown = cmd_buff[2]; 361 u32 unknown = cmd_buff[2];
283 u32 size = cmd_buff[3] & 0xFFFF; // Lower 16 bits are size 362 u32 size = cmd_buff[3] & 0xFFFF; // Lower 16 bits are size
284 VAddr addr = cmd_buff[0x41]; 363 VAddr addr = cmd_buff[0x41];
285 364
286 ASSERT_MSG(Memory::GetPointer(addr) != nullptr, "Invalid addr: pipe=0x%08X, unknown=0x%08X, size=0x%X, buffer=0x%08X", pipe, unknown, size, addr); 365 DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(pipe_index);
366
367 ASSERT_MSG(Memory::GetPointer(addr) != nullptr, "Invalid addr: pipe=%u, unknown=0x%08X, size=0x%X, buffer=0x%08X", pipe_index, unknown, size, addr);
287 368
288 if (DSP::HLE::GetPipeReadableSize(pipe) >= size) { 369 if (DSP::HLE::GetPipeReadableSize(pipe) >= size) {
289 std::vector<u8> response = DSP::HLE::PipeRead(pipe, size); 370 std::vector<u8> response = DSP::HLE::PipeRead(pipe, size);
290 371
291 Memory::WriteBlock(addr, response.data(), response.size()); 372 Memory::WriteBlock(addr, response.data(), response.size());
292 373
374 cmd_buff[0] = IPC::MakeHeader(0xE, 2, 2);
293 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 375 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
294 cmd_buff[2] = static_cast<u32>(response.size()); 376 cmd_buff[2] = static_cast<u32>(response.size());
377 cmd_buff[3] = IPC::StaticBufferDesc(size, 0);
378 cmd_buff[4] = addr;
295 } else { 379 } else {
296 // No more data is in pipe. Hardware hangs in this case; this should never happen. 380 // No more data is in pipe. Hardware hangs in this case; this should never happen.
297 UNREACHABLE(); 381 UNREACHABLE();
298 } 382 }
299 383
300 LOG_DEBUG(Service_DSP, "pipe=0x%08X, unknown=0x%08X, size=0x%X, buffer=0x%08X, return cmd_buff[2]=0x%08X", pipe, unknown, size, addr, cmd_buff[2]); 384 LOG_DEBUG(Service_DSP, "pipe=%u, unknown=0x%08X, size=0x%X, buffer=0x%08X, return cmd_buff[2]=0x%08X", pipe_index, unknown, size, addr, cmd_buff[2]);
301} 385}
302 386
303/** 387/**
@@ -312,13 +396,16 @@ static void ReadPipe(Service::Interface* self) {
312static void GetPipeReadableSize(Service::Interface* self) { 396static void GetPipeReadableSize(Service::Interface* self) {
313 u32* cmd_buff = Kernel::GetCommandBuffer(); 397 u32* cmd_buff = Kernel::GetCommandBuffer();
314 398
315 DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(cmd_buff[1]); 399 u32 pipe_index = cmd_buff[1];
316 u32 unknown = cmd_buff[2]; 400 u32 unknown = cmd_buff[2];
317 401
402 DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(pipe_index);
403
404 cmd_buff[0] = IPC::MakeHeader(0xF, 2, 0);
318 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 405 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
319 cmd_buff[2] = DSP::HLE::GetPipeReadableSize(pipe); 406 cmd_buff[2] = static_cast<u32>(DSP::HLE::GetPipeReadableSize(pipe));
320 407
321 LOG_DEBUG(Service_DSP, "pipe=0x%08X, unknown=0x%08X, return cmd_buff[2]=0x%08X", pipe, unknown, cmd_buff[2]); 408 LOG_DEBUG(Service_DSP, "pipe=%u, unknown=0x%08X, return cmd_buff[2]=0x%08X", pipe_index, unknown, cmd_buff[2]);
322} 409}
323 410
324/** 411/**
@@ -333,6 +420,7 @@ static void SetSemaphoreMask(Service::Interface* self) {
333 420
334 u32 mask = cmd_buff[1]; 421 u32 mask = cmd_buff[1];
335 422
423 cmd_buff[0] = IPC::MakeHeader(0x17, 1, 0);
336 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 424 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
337 425
338 LOG_WARNING(Service_DSP, "(STUBBED) called mask=0x%08X", mask); 426 LOG_WARNING(Service_DSP, "(STUBBED) called mask=0x%08X", mask);
@@ -350,6 +438,7 @@ static void SetSemaphoreMask(Service::Interface* self) {
350static void GetHeadphoneStatus(Service::Interface* self) { 438static void GetHeadphoneStatus(Service::Interface* self) {
351 u32* cmd_buff = Kernel::GetCommandBuffer(); 439 u32* cmd_buff = Kernel::GetCommandBuffer();
352 440
441 cmd_buff[0] = IPC::MakeHeader(0x1F, 2, 0);
353 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 442 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
354 cmd_buff[2] = 0; // Not using headphones? 443 cmd_buff[2] = 0; // Not using headphones?
355 444
@@ -376,6 +465,7 @@ static void RecvData(Service::Interface* self) {
376 465
377 // Application reads this after requesting DSP shutdown, to verify the DSP has indeed shutdown or slept. 466 // Application reads this after requesting DSP shutdown, to verify the DSP has indeed shutdown or slept.
378 467
468 cmd_buff[0] = IPC::MakeHeader(0x1, 2, 0);
379 cmd_buff[1] = RESULT_SUCCESS.raw; 469 cmd_buff[1] = RESULT_SUCCESS.raw;
380 switch (DSP::HLE::GetDspState()) { 470 switch (DSP::HLE::GetDspState()) {
381 case DSP::HLE::DspState::On: 471 case DSP::HLE::DspState::On:
@@ -411,6 +501,7 @@ static void RecvDataIsReady(Service::Interface* self) {
411 501
412 ASSERT_MSG(register_number == 0, "Unknown register_number %u", register_number); 502 ASSERT_MSG(register_number == 0, "Unknown register_number %u", register_number);
413 503
504 cmd_buff[0] = IPC::MakeHeader(0x2, 2, 0);
414 cmd_buff[1] = RESULT_SUCCESS.raw; 505 cmd_buff[1] = RESULT_SUCCESS.raw;
415 cmd_buff[2] = 1; // Ready to read 506 cmd_buff[2] = 1; // Ready to read
416 507
@@ -458,14 +549,14 @@ const Interface::FunctionInfo FunctionTable[] = {
458 549
459Interface::Interface() { 550Interface::Interface() {
460 semaphore_event = Kernel::Event::Create(Kernel::ResetType::OneShot, "DSP_DSP::semaphore_event"); 551 semaphore_event = Kernel::Event::Create(Kernel::ResetType::OneShot, "DSP_DSP::semaphore_event");
461 read_pipe_count = 0; 552 interrupt_events = {};
462 553
463 Register(FunctionTable); 554 Register(FunctionTable);
464} 555}
465 556
466Interface::~Interface() { 557Interface::~Interface() {
467 semaphore_event = nullptr; 558 semaphore_event = nullptr;
468 interrupt_events.clear(); 559 interrupt_events = {};
469} 560}
470 561
471} // namespace 562} // namespace
diff --git a/src/core/hle/service/dsp_dsp.h b/src/core/hle/service/dsp_dsp.h
index 32b89e9bb..22f6687cc 100644
--- a/src/core/hle/service/dsp_dsp.h
+++ b/src/core/hle/service/dsp_dsp.h
@@ -8,6 +8,12 @@
8 8
9#include "core/hle/service/service.h" 9#include "core/hle/service/service.h"
10 10
11namespace DSP {
12namespace HLE {
13enum class DspPipe;
14}
15}
16
11//////////////////////////////////////////////////////////////////////////////////////////////////// 17////////////////////////////////////////////////////////////////////////////////////////////////////
12// Namespace DSP_DSP 18// Namespace DSP_DSP
13 19
@@ -23,15 +29,10 @@ public:
23 } 29 }
24}; 30};
25 31
26/// Signal all audio related interrupts.
27void SignalAllInterrupts();
28
29/** 32/**
30 * Signal a specific audio related interrupt based on interrupt id and channel id. 33 * Signal a specific DSP related interrupt of type == InterruptType::Pipe, pipe == pipe.
31 * @param interrupt_id The interrupt id 34 * @param pipe The DSP pipe for which to signal an interrupt for.
32 * @param channel_id The channel id
33 * The significance of various values of interrupt_id and channel_id is not yet known.
34 */ 35 */
35void SignalInterrupt(u32 interrupt_id, u32 channel_id); 36void SignalPipeInterrupt(DSP::HLE::DspPipe pipe);
36 37
37} // namespace 38} // namespace DSP_DSP
diff --git a/src/core/hle/service/fs/archive.cpp b/src/core/hle/service/fs/archive.cpp
index e9588cb72..cc51ede0c 100644
--- a/src/core/hle/service/fs/archive.cpp
+++ b/src/core/hle/service/fs/archive.cpp
@@ -114,6 +114,7 @@ ResultVal<bool> File::SyncRequest() {
114 return read.Code(); 114 return read.Code();
115 } 115 }
116 cmd_buff[2] = static_cast<u32>(*read); 116 cmd_buff[2] = static_cast<u32>(*read);
117 Memory::RasterizerFlushAndInvalidateRegion(Memory::VirtualToPhysicalAddress(address), length);
117 break; 118 break;
118 } 119 }
119 120
diff --git a/src/core/hle/service/fs/fs_user.cpp b/src/core/hle/service/fs/fs_user.cpp
index 3ec7ceb30..7df7da5a4 100644
--- a/src/core/hle/service/fs/fs_user.cpp
+++ b/src/core/hle/service/fs/fs_user.cpp
@@ -250,7 +250,7 @@ static void CreateFile(Service::Interface* self) {
250 250
251 FileSys::Path file_path(filename_type, filename_size, filename_ptr); 251 FileSys::Path file_path(filename_type, filename_size, filename_ptr);
252 252
253 LOG_DEBUG(Service_FS, "type=%d size=%llu data=%s", filename_type, filename_size, file_path.DebugStr().c_str()); 253 LOG_DEBUG(Service_FS, "type=%d size=%llu data=%s", filename_type, file_size, file_path.DebugStr().c_str());
254 254
255 cmd_buff[1] = CreateFileInArchive(archive_handle, file_path, file_size).raw; 255 cmd_buff[1] = CreateFileInArchive(archive_handle, file_path, file_size).raw;
256} 256}
diff --git a/src/core/hle/service/gsp_gpu.cpp b/src/core/hle/service/gsp_gpu.cpp
index 0c655395e..b4c146e08 100644
--- a/src/core/hle/service/gsp_gpu.cpp
+++ b/src/core/hle/service/gsp_gpu.cpp
@@ -15,8 +15,6 @@
15 15
16#include "video_core/gpu_debugger.h" 16#include "video_core/gpu_debugger.h"
17#include "video_core/debug_utils/debug_utils.h" 17#include "video_core/debug_utils/debug_utils.h"
18#include "video_core/renderer_base.h"
19#include "video_core/video_core.h"
20 18
21#include "gsp_gpu.h" 19#include "gsp_gpu.h"
22 20
@@ -45,6 +43,8 @@ Kernel::SharedPtr<Kernel::SharedMemory> g_shared_memory;
45/// Thread index into interrupt relay queue 43/// Thread index into interrupt relay queue
46u32 g_thread_id = 0; 44u32 g_thread_id = 0;
47 45
46static bool gpu_right_acquired = false;
47
48/// Gets a pointer to a thread command buffer in GSP shared memory 48/// Gets a pointer to a thread command buffer in GSP shared memory
49static inline u8* GetCommandBuffer(u32 thread_id) { 49static inline u8* GetCommandBuffer(u32 thread_id) {
50 return g_shared_memory->GetPointer(0x800 + (thread_id * sizeof(CommandBuffer))); 50 return g_shared_memory->GetPointer(0x800 + (thread_id * sizeof(CommandBuffer)));
@@ -291,8 +291,6 @@ static void FlushDataCache(Service::Interface* self) {
291 u32 size = cmd_buff[2]; 291 u32 size = cmd_buff[2];
292 u32 process = cmd_buff[4]; 292 u32 process = cmd_buff[4];
293 293
294 VideoCore::g_renderer->Rasterizer()->InvalidateRegion(Memory::VirtualToPhysicalAddress(address), size);
295
296 // TODO(purpasmart96): Verify return header on HW 294 // TODO(purpasmart96): Verify return header on HW
297 295
298 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 296 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
@@ -374,6 +372,9 @@ static void UnregisterInterruptRelayQueue(Service::Interface* self) {
374 * @todo This probably does not belong in the GSP module, instead move to video_core 372 * @todo This probably does not belong in the GSP module, instead move to video_core
375 */ 373 */
376void SignalInterrupt(InterruptId interrupt_id) { 374void SignalInterrupt(InterruptId interrupt_id) {
375 if (!gpu_right_acquired) {
376 return;
377 }
377 if (nullptr == g_interrupt_event) { 378 if (nullptr == g_interrupt_event) {
378 LOG_WARNING(Service_GSP, "cannot synchronize until GSP event has been created!"); 379 LOG_WARNING(Service_GSP, "cannot synchronize until GSP event has been created!");
379 return; 380 return;
@@ -408,6 +409,8 @@ void SignalInterrupt(InterruptId interrupt_id) {
408 g_interrupt_event->Signal(); 409 g_interrupt_event->Signal();
409} 410}
410 411
412MICROPROFILE_DEFINE(GPU_GSP_DMA, "GPU", "GSP DMA", MP_RGB(100, 0, 255));
413
411/// Executes the next GSP command 414/// Executes the next GSP command
412static void ExecuteCommand(const Command& command, u32 thread_id) { 415static void ExecuteCommand(const Command& command, u32 thread_id) {
413 // Utility function to convert register ID to address 416 // Utility function to convert register ID to address
@@ -419,18 +422,21 @@ static void ExecuteCommand(const Command& command, u32 thread_id) {
419 422
420 // GX request DMA - typically used for copying memory from GSP heap to VRAM 423 // GX request DMA - typically used for copying memory from GSP heap to VRAM
421 case CommandId::REQUEST_DMA: 424 case CommandId::REQUEST_DMA:
422 VideoCore::g_renderer->Rasterizer()->FlushRegion(Memory::VirtualToPhysicalAddress(command.dma_request.source_address), 425 {
423 command.dma_request.size); 426 MICROPROFILE_SCOPE(GPU_GSP_DMA);
427
428 // TODO: Consider attempting rasterizer-accelerated surface blit if that usage is ever possible/likely
429 Memory::RasterizerFlushRegion(Memory::VirtualToPhysicalAddress(command.dma_request.source_address),
430 command.dma_request.size);
431 Memory::RasterizerFlushAndInvalidateRegion(Memory::VirtualToPhysicalAddress(command.dma_request.dest_address),
432 command.dma_request.size);
424 433
425 memcpy(Memory::GetPointer(command.dma_request.dest_address), 434 memcpy(Memory::GetPointer(command.dma_request.dest_address),
426 Memory::GetPointer(command.dma_request.source_address), 435 Memory::GetPointer(command.dma_request.source_address),
427 command.dma_request.size); 436 command.dma_request.size);
428 SignalInterrupt(InterruptId::DMA); 437 SignalInterrupt(InterruptId::DMA);
429
430 VideoCore::g_renderer->Rasterizer()->InvalidateRegion(Memory::VirtualToPhysicalAddress(command.dma_request.dest_address),
431 command.dma_request.size);
432 break; 438 break;
433 439 }
434 // TODO: This will need some rework in the future. (why?) 440 // TODO: This will need some rework in the future. (why?)
435 case CommandId::SUBMIT_GPU_CMDLIST: 441 case CommandId::SUBMIT_GPU_CMDLIST:
436 { 442 {
@@ -517,13 +523,8 @@ static void ExecuteCommand(const Command& command, u32 thread_id) {
517 523
518 case CommandId::CACHE_FLUSH: 524 case CommandId::CACHE_FLUSH:
519 { 525 {
520 for (auto& region : command.cache_flush.regions) { 526 // NOTE: Rasterizer flushing handled elsewhere in CPU read/write and other GPU handlers
521 if (region.size == 0) 527 // Use command.cache_flush.regions to implement this handler
522 break;
523
524 VideoCore::g_renderer->Rasterizer()->InvalidateRegion(
525 Memory::VirtualToPhysicalAddress(region.address), region.size);
526 }
527 break; 528 break;
528 } 529 }
529 530
@@ -628,6 +629,35 @@ static void ImportDisplayCaptureInfo(Service::Interface* self) {
628 LOG_WARNING(Service_GSP, "called"); 629 LOG_WARNING(Service_GSP, "called");
629} 630}
630 631
632/**
633 * GSP_GPU::AcquireRight service function
634 * Outputs:
635 * 1: Result code
636 */
637static void AcquireRight(Service::Interface* self) {
638 u32* cmd_buff = Kernel::GetCommandBuffer();
639
640 gpu_right_acquired = true;
641
642 cmd_buff[1] = RESULT_SUCCESS.raw;
643
644 LOG_WARNING(Service_GSP, "called");
645}
646
647/**
648 * GSP_GPU::ReleaseRight service function
649 * Outputs:
650 * 1: Result code
651 */
652static void ReleaseRight(Service::Interface* self) {
653 u32* cmd_buff = Kernel::GetCommandBuffer();
654
655 gpu_right_acquired = false;
656
657 cmd_buff[1] = RESULT_SUCCESS.raw;
658
659 LOG_WARNING(Service_GSP, "called");
660}
631 661
632const Interface::FunctionInfo FunctionTable[] = { 662const Interface::FunctionInfo FunctionTable[] = {
633 {0x00010082, WriteHWRegs, "WriteHWRegs"}, 663 {0x00010082, WriteHWRegs, "WriteHWRegs"},
@@ -651,8 +681,8 @@ const Interface::FunctionInfo FunctionTable[] = {
651 {0x00130042, RegisterInterruptRelayQueue, "RegisterInterruptRelayQueue"}, 681 {0x00130042, RegisterInterruptRelayQueue, "RegisterInterruptRelayQueue"},
652 {0x00140000, UnregisterInterruptRelayQueue, "UnregisterInterruptRelayQueue"}, 682 {0x00140000, UnregisterInterruptRelayQueue, "UnregisterInterruptRelayQueue"},
653 {0x00150002, nullptr, "TryAcquireRight"}, 683 {0x00150002, nullptr, "TryAcquireRight"},
654 {0x00160042, nullptr, "AcquireRight"}, 684 {0x00160042, AcquireRight, "AcquireRight"},
655 {0x00170000, nullptr, "ReleaseRight"}, 685 {0x00170000, ReleaseRight, "ReleaseRight"},
656 {0x00180000, ImportDisplayCaptureInfo, "ImportDisplayCaptureInfo"}, 686 {0x00180000, ImportDisplayCaptureInfo, "ImportDisplayCaptureInfo"},
657 {0x00190000, nullptr, "SaveVramSysArea"}, 687 {0x00190000, nullptr, "SaveVramSysArea"},
658 {0x001A0000, nullptr, "RestoreVramSysArea"}, 688 {0x001A0000, nullptr, "RestoreVramSysArea"},
@@ -673,11 +703,13 @@ Interface::Interface() {
673 g_shared_memory = nullptr; 703 g_shared_memory = nullptr;
674 704
675 g_thread_id = 0; 705 g_thread_id = 0;
706 gpu_right_acquired = false;
676} 707}
677 708
678Interface::~Interface() { 709Interface::~Interface() {
679 g_interrupt_event = nullptr; 710 g_interrupt_event = nullptr;
680 g_shared_memory = nullptr; 711 g_shared_memory = nullptr;
712 gpu_right_acquired = false;
681} 713}
682 714
683} // namespace 715} // namespace
diff --git a/src/core/hle/service/gsp_gpu.h b/src/core/hle/service/gsp_gpu.h
index 55a993bb8..3b4b678a3 100644
--- a/src/core/hle/service/gsp_gpu.h
+++ b/src/core/hle/service/gsp_gpu.h
@@ -10,6 +10,7 @@
10#include "common/bit_field.h" 10#include "common/bit_field.h"
11#include "common/common_types.h" 11#include "common/common_types.h"
12 12
13#include "core/hle/result.h"
13#include "core/hle/service/service.h" 14#include "core/hle/service/service.h"
14 15
15//////////////////////////////////////////////////////////////////////////////////////////////////// 16////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/core/hle/service/ndm/ndm.cpp b/src/core/hle/service/ndm/ndm.cpp
index 47076a7b8..bc9c3413d 100644
--- a/src/core/hle/service/ndm/ndm.cpp
+++ b/src/core/hle/service/ndm/ndm.cpp
@@ -11,28 +11,217 @@
11namespace Service { 11namespace Service {
12namespace NDM { 12namespace NDM {
13 13
14void SuspendDaemons(Service::Interface* self) { 14enum : u32 {
15 DEFAULT_RETRY_INTERVAL = 10,
16 DEFAULT_SCAN_INTERVAL = 30
17};
18
19static DaemonMask daemon_bit_mask = DaemonMask::Default;
20static DaemonMask default_daemon_bit_mask = DaemonMask::Default;
21static std::array<DaemonStatus, 4> daemon_status = { DaemonStatus::Idle, DaemonStatus::Idle, DaemonStatus::Idle, DaemonStatus::Idle };
22static ExclusiveState exclusive_state = ExclusiveState::None;
23static u32 scan_interval = DEFAULT_SCAN_INTERVAL;
24static u32 retry_interval = DEFAULT_RETRY_INTERVAL;
25static bool daemon_lock_enabled = false;
26
27void EnterExclusiveState(Service::Interface* self) {
28 u32* cmd_buff = Kernel::GetCommandBuffer();
29 exclusive_state = static_cast<ExclusiveState>(cmd_buff[1]);
30
31 cmd_buff[0] = IPC::MakeHeader(0x1, 1, 0);
32 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
33 LOG_WARNING(Service_NDM, "(STUBBED) exclusive_state=0x%08X ", exclusive_state);
34}
35
36void LeaveExclusiveState(Service::Interface* self) {
37 u32* cmd_buff = Kernel::GetCommandBuffer();
38 exclusive_state = ExclusiveState::None;
39
40 cmd_buff[0] = IPC::MakeHeader(0x2, 1, 0);
41 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
42 LOG_WARNING(Service_NDM, "(STUBBED) exclusive_state=0x%08X ", exclusive_state);
43}
44
45void QueryExclusiveMode(Service::Interface* self) {
15 u32* cmd_buff = Kernel::GetCommandBuffer(); 46 u32* cmd_buff = Kernel::GetCommandBuffer();
16 47
17 LOG_WARNING(Service_NDM, "(STUBBED) bit_mask=0x%08X ", cmd_buff[1]); 48 cmd_buff[0] = IPC::MakeHeader(0x3, 2, 0);
49 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
50 cmd_buff[2] = static_cast<u32>(exclusive_state);
51 LOG_WARNING(Service_NDM, "(STUBBED) exclusive_state=0x%08X ", exclusive_state);
52}
53
54void LockState(Service::Interface* self) {
55 u32* cmd_buff = Kernel::GetCommandBuffer();
56 daemon_lock_enabled = true;
57
58 cmd_buff[0] = IPC::MakeHeader(0x4, 1, 0);
59 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
60 LOG_WARNING(Service_NDM, "(STUBBED) daemon_lock_enabled=0x%08X ", daemon_lock_enabled);
61}
62
63void UnlockState(Service::Interface* self) {
64 u32* cmd_buff = Kernel::GetCommandBuffer();
65 daemon_lock_enabled = false;
18 66
67 cmd_buff[0] = IPC::MakeHeader(0x5, 1, 0);
19 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 68 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
69 LOG_WARNING(Service_NDM, "(STUBBED) daemon_lock_enabled=0x%08X ", daemon_lock_enabled);
70}
71
72void SuspendDaemons(Service::Interface* self) {
73 u32* cmd_buff = Kernel::GetCommandBuffer();
74 u32 bit_mask = cmd_buff[1] & 0xF;
75 daemon_bit_mask = static_cast<DaemonMask>(static_cast<u32>(default_daemon_bit_mask) & ~bit_mask);
76 for (size_t index = 0; index < daemon_status.size(); ++index) {
77 if (bit_mask & (1 << index)) {
78 daemon_status[index] = DaemonStatus::Suspended;
79 }
80 }
81
82 cmd_buff[0] = IPC::MakeHeader(0x6, 1, 0);
83 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
84 LOG_WARNING(Service_NDM, "(STUBBED) daemon_bit_mask=0x%08X ", daemon_bit_mask);
20} 85}
21 86
22void ResumeDaemons(Service::Interface* self) { 87void ResumeDaemons(Service::Interface* self) {
23 u32* cmd_buff = Kernel::GetCommandBuffer(); 88 u32* cmd_buff = Kernel::GetCommandBuffer();
89 u32 bit_mask = cmd_buff[1] & 0xF;
90 daemon_bit_mask = static_cast<DaemonMask>(static_cast<u32>(daemon_bit_mask) | bit_mask);
91 for (size_t index = 0; index < daemon_status.size(); ++index) {
92 if (bit_mask & (1 << index)) {
93 daemon_status[index] = DaemonStatus::Idle;
94 }
95 }
96
97 cmd_buff[0] = IPC::MakeHeader(0x7, 1, 0);
98 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
99 LOG_WARNING(Service_NDM, "(STUBBED) daemon_bit_mask=0x%08X ", daemon_bit_mask);
100}
101
102void SuspendScheduler(Service::Interface* self) {
103 u32* cmd_buff = Kernel::GetCommandBuffer();
104
105 cmd_buff[0] = IPC::MakeHeader(0x8, 1, 0);
106 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
107 LOG_WARNING(Service_NDM, "(STUBBED) called");
108}
109
110void ResumeScheduler(Service::Interface* self) {
111 u32* cmd_buff = Kernel::GetCommandBuffer();
112
113 cmd_buff[0] = IPC::MakeHeader(0x9, 1, 0);
114 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
115 LOG_WARNING(Service_NDM, "(STUBBED) called");
116}
117
118void QueryStatus(Service::Interface* self) {
119 u32* cmd_buff = Kernel::GetCommandBuffer();
120 u32 daemon = cmd_buff[1] & 0xF;
24 121
25 LOG_WARNING(Service_NDM, "(STUBBED) bit_mask=0x%08X ", cmd_buff[1]); 122 cmd_buff[0] = IPC::MakeHeader(0xD, 2, 0);
123 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
124 cmd_buff[2] = static_cast<u32>(daemon_status.at(daemon));
125 LOG_WARNING(Service_NDM, "(STUBBED) daemon=0x%08X, daemon_status=0x%08X", daemon, cmd_buff[2]);
126}
127
128void GetDaemonDisableCount(Service::Interface* self) {
129 u32* cmd_buff = Kernel::GetCommandBuffer();
130 u32 daemon = cmd_buff[1] & 0xF;
131
132 cmd_buff[0] = IPC::MakeHeader(0xE, 3, 0);
133 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
134 cmd_buff[2] = 0;
135 cmd_buff[3] = 0;
136 LOG_WARNING(Service_NDM, "(STUBBED) daemon=0x%08X", daemon);
137}
138
139void GetSchedulerDisableCount(Service::Interface* self) {
140 u32* cmd_buff = Kernel::GetCommandBuffer();
141
142 cmd_buff[0] = IPC::MakeHeader(0xF, 3, 0);
143 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
144 cmd_buff[2] = 0;
145 cmd_buff[3] = 0;
146 LOG_WARNING(Service_NDM, "(STUBBED) called");
147}
148
149void SetScanInterval(Service::Interface* self) {
150 u32* cmd_buff = Kernel::GetCommandBuffer();
151 scan_interval = cmd_buff[1];
26 152
153 cmd_buff[0] = IPC::MakeHeader(0x10, 1, 0);
27 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 154 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
155 LOG_WARNING(Service_NDM, "(STUBBED) scan_interval=0x%08X ", scan_interval);
156}
157
158void GetScanInterval(Service::Interface* self) {
159 u32* cmd_buff = Kernel::GetCommandBuffer();
160
161 cmd_buff[0] = IPC::MakeHeader(0x11, 2, 0);
162 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
163 cmd_buff[2] = scan_interval;
164 LOG_WARNING(Service_NDM, "(STUBBED) scan_interval=0x%08X ", scan_interval);
165}
166
167void SetRetryInterval(Service::Interface* self) {
168 u32* cmd_buff = Kernel::GetCommandBuffer();
169 retry_interval = cmd_buff[1];
170
171 cmd_buff[0] = IPC::MakeHeader(0x12, 1, 0);
172 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
173 LOG_WARNING(Service_NDM, "(STUBBED) retry_interval=0x%08X ", retry_interval);
174}
175
176void GetRetryInterval(Service::Interface* self) {
177 u32* cmd_buff = Kernel::GetCommandBuffer();
178
179 cmd_buff[0] = IPC::MakeHeader(0x13, 2, 0);
180 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
181 cmd_buff[2] = retry_interval;
182 LOG_WARNING(Service_NDM, "(STUBBED) retry_interval=0x%08X ", retry_interval);
28} 183}
29 184
30void OverrideDefaultDaemons(Service::Interface* self) { 185void OverrideDefaultDaemons(Service::Interface* self) {
31 u32* cmd_buff = Kernel::GetCommandBuffer(); 186 u32* cmd_buff = Kernel::GetCommandBuffer();
187 u32 bit_mask = cmd_buff[1] & 0xF;
188 default_daemon_bit_mask = static_cast<DaemonMask>(bit_mask);
189 daemon_bit_mask = default_daemon_bit_mask;
190 for (size_t index = 0; index < daemon_status.size(); ++index) {
191 if (bit_mask & (1 << index)) {
192 daemon_status[index] = DaemonStatus::Idle;
193 }
194 }
32 195
33 LOG_WARNING(Service_NDM, "(STUBBED) bit_mask=0x%08X ", cmd_buff[1]); 196 cmd_buff[0] = IPC::MakeHeader(0x14, 1, 0);
197 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
198 LOG_WARNING(Service_NDM, "(STUBBED) default_daemon_bit_mask=0x%08X ", default_daemon_bit_mask);
199}
200
201void ResetDefaultDaemons(Service::Interface* self) {
202 u32* cmd_buff = Kernel::GetCommandBuffer();
203 default_daemon_bit_mask = DaemonMask::Default;
204
205 cmd_buff[0] = IPC::MakeHeader(0x15, 1, 0);
206 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
207 LOG_WARNING(Service_NDM, "(STUBBED) default_daemon_bit_mask=0x%08X ", default_daemon_bit_mask);
208}
209
210void GetDefaultDaemons(Service::Interface* self) {
211 u32* cmd_buff = Kernel::GetCommandBuffer();
212
213 cmd_buff[0] = IPC::MakeHeader(0x16, 2, 0);
214 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
215 cmd_buff[2] = static_cast<u32>(default_daemon_bit_mask);
216 LOG_WARNING(Service_NDM, "(STUBBED) default_daemon_bit_mask=0x%08X ", default_daemon_bit_mask);
217}
218
219void ClearHalfAwakeMacFilter(Service::Interface* self) {
220 u32* cmd_buff = Kernel::GetCommandBuffer();
34 221
222 cmd_buff[0] = IPC::MakeHeader(0x17, 1, 0);
35 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 223 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
224 LOG_WARNING(Service_NDM, "(STUBBED) called");
36} 225}
37 226
38void Init() { 227void Init() {
diff --git a/src/core/hle/service/ndm/ndm.h b/src/core/hle/service/ndm/ndm.h
index 734730f8c..5c2b968dc 100644
--- a/src/core/hle/service/ndm/ndm.h
+++ b/src/core/hle/service/ndm/ndm.h
@@ -12,10 +12,91 @@ class Interface;
12 12
13namespace NDM { 13namespace NDM {
14 14
15enum class Daemon : u32 {
16 Cec = 0,
17 Boss = 1,
18 Nim = 2,
19 Friend = 3
20};
21
22enum class DaemonMask : u32 {
23 None = 0,
24 Cec = (1 << static_cast<u32>(Daemon::Cec)),
25 Boss = (1 << static_cast<u32>(Daemon::Boss)),
26 Nim = (1 << static_cast<u32>(Daemon::Nim)),
27 Friend = (1 << static_cast<u32>(Daemon::Friend)),
28 Default = Cec | Friend,
29 All = Cec | Boss | Nim | Friend
30};
31
32enum class DaemonStatus : u32 {
33 Busy = 0,
34 Idle = 1,
35 Suspending = 2,
36 Suspended = 3
37};
38
39enum class ExclusiveState : u32 {
40 None = 0,
41 Infrastructure = 1,
42 LocalCommunications = 2,
43 Streetpass = 3,
44 StreetpassData = 4,
45};
46
47/**
48 * NDM::EnterExclusiveState service function
49 * Inputs:
50 * 0 : Header code [0x00010042]
51 * 1 : Exclusive State
52 * 2 : 0x20
53 * Outputs:
54 * 1 : Result, 0 on success, otherwise error code
55 */
56void EnterExclusiveState(Service::Interface* self);
57
58/**
59 * NDM::LeaveExclusiveState service function
60 * Inputs:
61 * 0 : Header code [0x00020002]
62 * 1 : 0x20
63 * Outputs:
64 * 1 : Result, 0 on success, otherwise error code
65 */
66void LeaveExclusiveState(Service::Interface* self);
67
68/**
69 * NDM::QueryExclusiveMode service function
70 * Inputs:
71 * 0 : Header code [0x00030000]
72 * Outputs:
73 * 1 : Result, 0 on success, otherwise error code
74 * 2 : Current Exclusive State
75 */
76void QueryExclusiveMode(Service::Interface* self);
77
78/**
79 * NDM::LockState service function
80 * Inputs:
81 * 0 : Header code [0x00040002]
82 * Outputs:
83 * 1 : Result, 0 on success, otherwise error code
84 */
85void LockState(Service::Interface* self);
86
87/**
88 * NDM::UnlockState service function
89 * Inputs:
90 * 0 : Header code [0x00050002]
91 * Outputs:
92 * 1 : Result, 0 on success, otherwise error code
93 */
94void UnlockState(Service::Interface* self);
95
15/** 96/**
16 * SuspendDaemons 97 * NDM::SuspendDaemons service function
17 * Inputs: 98 * Inputs:
18 * 0 : Command header (0x00020082) 99 * 0 : Header code [0x00060040]
19 * 1 : Daemon bit mask 100 * 1 : Daemon bit mask
20 * Outputs: 101 * Outputs:
21 * 1 : Result, 0 on success, otherwise error code 102 * 1 : Result, 0 on success, otherwise error code
@@ -23,9 +104,9 @@ namespace NDM {
23void SuspendDaemons(Service::Interface* self); 104void SuspendDaemons(Service::Interface* self);
24 105
25/** 106/**
26 * ResumeDaemons 107 * NDM::ResumeDaemons service function
27 * Inputs: 108 * Inputs:
28 * 0 : Command header (0x00020082) 109 * 0 : Header code [0x00070040]
29 * 1 : Daemon bit mask 110 * 1 : Daemon bit mask
30 * Outputs: 111 * Outputs:
31 * 1 : Result, 0 on success, otherwise error code 112 * 1 : Result, 0 on success, otherwise error code
@@ -33,15 +114,138 @@ void SuspendDaemons(Service::Interface* self);
33void ResumeDaemons(Service::Interface* self); 114void ResumeDaemons(Service::Interface* self);
34 115
35/** 116/**
36 * OverrideDefaultDaemons 117 * NDM::SuspendScheduler service function
37 * Inputs: 118 * Inputs:
38 * 0 : Command header (0x00020082) 119 * 0 : Header code [0x00080040]
120 * Outputs:
121 * 1 : Result, 0 on success, otherwise error code
122 */
123void SuspendScheduler(Service::Interface* self);
124
125/**
126 * NDM::ResumeScheduler service function
127 * Inputs:
128 * 0 : Header code [0x00090000]
129 * Outputs:
130 * 1 : Result, 0 on success, otherwise error code
131 */
132void ResumeScheduler(Service::Interface* self);
133
134/**
135 * NDM::QueryStatus service function
136 * Inputs:
137 * 0 : Header code [0x000D0040]
138 * 1 : Daemon
139 * Outputs:
140 * 1 : Result, 0 on success, otherwise error code
141 * 2 : Daemon status
142 */
143void QueryStatus(Service::Interface* self);
144
145/**
146 * NDM::GetDaemonDisableCount service function
147 * Inputs:
148 * 0 : Header code [0x000E0040]
149 * 1 : Daemon
150 * Outputs:
151 * 1 : Result, 0 on success, otherwise error code
152 * 2 : Current process disable count
153 * 3 : Total disable count
154 */
155void GetDaemonDisableCount(Service::Interface* self);
156
157/**
158 * NDM::GetSchedulerDisableCount service function
159 * Inputs:
160 * 0 : Header code [0x000F0000]
161 * Outputs:
162 * 1 : Result, 0 on success, otherwise error code
163 * 2 : Current process disable count
164 * 3 : Total disable count
165 */
166void GetSchedulerDisableCount(Service::Interface* self);
167
168/**
169 * NDM::SetScanInterval service function
170 * Inputs:
171 * 0 : Header code [0x00100040]
172 * 1 : Interval (default = 30)
173 * Outputs:
174 * 1 : Result, 0 on success, otherwise error code
175 */
176void SetScanInterval(Service::Interface* self);
177
178/**
179 * NDM::GetScanInterval service function
180 * Inputs:
181 * 0 : Header code [0x00110000]
182 * Outputs:
183 * 1 : Result, 0 on success, otherwise error code
184 * 2 : Interval (default = 30)
185 */
186void GetScanInterval(Service::Interface* self);
187
188/**
189 * NDM::SetRetryInterval service function
190 * Inputs:
191 * 0 : Header code [0x00120040]
192 * 1 : Interval (default = 10)
193 * Outputs:
194 * 1 : Result, 0 on success, otherwise error code
195 */
196void SetRetryInterval(Service::Interface* self);
197
198/**
199 * NDM::GetRetryInterval service function
200 * Inputs:
201 * 0 : Header code [0x00130000]
202 * Outputs:
203 * 1 : Result, 0 on success, otherwise error code
204 * 2 : Interval (default = 10)
205 */
206void GetRetryInterval(Service::Interface* self);
207
208
209/**
210 * NDM::OverrideDefaultDaemons service function
211 * Inputs:
212 * 0 : Header code [0x00140040]
39 * 1 : Daemon bit mask 213 * 1 : Daemon bit mask
40 * Outputs: 214 * Outputs:
41 * 1 : Result, 0 on success, otherwise error code 215 * 1 : Result, 0 on success, otherwise error code
42 */ 216 */
43void OverrideDefaultDaemons(Service::Interface* self); 217void OverrideDefaultDaemons(Service::Interface* self);
44 218
219/**
220 * NDM::ResetDefaultDaemons service function
221 * Inputs:
222 * 0 : Header code [0x00150000]
223 * Outputs:
224 * 1 : Result, 0 on success, otherwise error code
225 */
226void ResetDefaultDaemons(Service::Interface* self);
227
228/**
229 * NDM::GetDefaultDaemons service function
230 * Inputs:
231 * 0 : Header code [0x00160000]
232 * Outputs:
233 * 1 : Result, 0 on success, otherwise error code
234 * 2 : Daemon bit mask
235 * Note:
236 * Gets the current default daemon bit mask. The default value is (DAEMONMASK_CEC | DAEMONMASK_FRIENDS)
237 */
238void GetDefaultDaemons(Service::Interface* self);
239
240/**
241 * NDM::ClearHalfAwakeMacFilter service function
242 * Inputs:
243 * 0 : Header code [0x00170000]
244 * Outputs:
245 * 1 : Result, 0 on success, otherwise error code
246 */
247void ClearHalfAwakeMacFilter(Service::Interface* self);
248
45/// Initialize NDM service 249/// Initialize NDM service
46void Init(); 250void Init();
47 251
diff --git a/src/core/hle/service/ndm/ndm_u.cpp b/src/core/hle/service/ndm/ndm_u.cpp
index bf95cc7aa..3ff0744ee 100644
--- a/src/core/hle/service/ndm/ndm_u.cpp
+++ b/src/core/hle/service/ndm/ndm_u.cpp
@@ -9,29 +9,29 @@ namespace Service {
9namespace NDM { 9namespace NDM {
10 10
11const Interface::FunctionInfo FunctionTable[] = { 11const Interface::FunctionInfo FunctionTable[] = {
12 {0x00010042, nullptr, "EnterExclusiveState"}, 12 {0x00010042, EnterExclusiveState, "EnterExclusiveState"},
13 {0x00020002, nullptr, "LeaveExclusiveState"}, 13 {0x00020002, LeaveExclusiveState, "LeaveExclusiveState"},
14 {0x00030000, nullptr, "QueryExclusiveMode"}, 14 {0x00030000, QueryExclusiveMode, "QueryExclusiveMode"},
15 {0x00040002, nullptr, "LockState"}, 15 {0x00040002, LockState, "LockState"},
16 {0x00050002, nullptr, "UnlockState"}, 16 {0x00050002, UnlockState, "UnlockState"},
17 {0x00060040, SuspendDaemons, "SuspendDaemons"}, 17 {0x00060040, SuspendDaemons, "SuspendDaemons"},
18 {0x00070040, ResumeDaemons, "ResumeDaemons"}, 18 {0x00070040, ResumeDaemons, "ResumeDaemons"},
19 {0x00080040, nullptr, "DisableWifiUsage"}, 19 {0x00080040, SuspendScheduler, "SuspendScheduler"},
20 {0x00090000, nullptr, "EnableWifiUsage"}, 20 {0x00090000, ResumeScheduler, "ResumeScheduler"},
21 {0x000A0000, nullptr, "GetCurrentState"}, 21 {0x000A0000, nullptr, "GetCurrentState"},
22 {0x000B0000, nullptr, "GetTargetState"}, 22 {0x000B0000, nullptr, "GetTargetState"},
23 {0x000C0000, nullptr, "<Stubbed>"}, 23 {0x000C0000, nullptr, "<Stubbed>"},
24 {0x000D0040, nullptr, "QueryStatus"}, 24 {0x000D0040, QueryStatus, "QueryStatus"},
25 {0x000E0040, nullptr, "GetDaemonDisableCount"}, 25 {0x000E0040, GetDaemonDisableCount, "GetDaemonDisableCount"},
26 {0x000F0000, nullptr, "GetSchedulerDisableCount"}, 26 {0x000F0000, GetSchedulerDisableCount,"GetSchedulerDisableCount"},
27 {0x00100040, nullptr, "SetScanInterval"}, 27 {0x00100040, SetScanInterval, "SetScanInterval"},
28 {0x00110000, nullptr, "GetScanInterval"}, 28 {0x00110000, GetScanInterval, "GetScanInterval"},
29 {0x00120040, nullptr, "SetRetryInterval"}, 29 {0x00120040, SetRetryInterval, "SetRetryInterval"},
30 {0x00130000, nullptr, "GetRetryInterval"}, 30 {0x00130000, GetRetryInterval, "GetRetryInterval"},
31 {0x00140040, OverrideDefaultDaemons, "OverrideDefaultDaemons"}, 31 {0x00140040, OverrideDefaultDaemons, "OverrideDefaultDaemons"},
32 {0x00150000, nullptr, "ResetDefaultDaemons"}, 32 {0x00150000, ResetDefaultDaemons, "ResetDefaultDaemons"},
33 {0x00160000, nullptr, "GetDefaultDaemons"}, 33 {0x00160000, GetDefaultDaemons, "GetDefaultDaemons"},
34 {0x00170000, nullptr, "ClearHalfAwakeMacFilter"}, 34 {0x00170000, ClearHalfAwakeMacFilter, "ClearHalfAwakeMacFilter"},
35}; 35};
36 36
37NDM_U_Interface::NDM_U_Interface() { 37NDM_U_Interface::NDM_U_Interface() {
diff --git a/src/core/hle/service/soc_u.cpp b/src/core/hle/service/soc_u.cpp
index ff0af8f12..d3e5d4bca 100644
--- a/src/core/hle/service/soc_u.cpp
+++ b/src/core/hle/service/soc_u.cpp
@@ -151,6 +151,34 @@ static int TranslateError(int error) {
151 return error; 151 return error;
152} 152}
153 153
154/// Holds the translation from system network socket options to 3DS network socket options
155/// Note: -1 = No effect/unavailable
156static const std::unordered_map<int, int> sockopt_map = { {
157 { 0x0004, SO_REUSEADDR },
158 { 0x0080, -1 },
159 { 0x0100, -1 },
160 { 0x1001, SO_SNDBUF },
161 { 0x1002, SO_RCVBUF },
162 { 0x1003, -1 },
163#ifdef _WIN32
164 /// Unsupported in WinSock2
165 { 0x1004, -1 },
166#else
167 { 0x1004, SO_RCVLOWAT },
168#endif
169 { 0x1008, SO_TYPE },
170 { 0x1009, SO_ERROR },
171}};
172
173/// Converts a socket option from 3ds-specific to platform-specific
174static int TranslateSockOpt(int console_opt_name) {
175 auto found = sockopt_map.find(console_opt_name);
176 if (found != sockopt_map.end()) {
177 return found->second;
178 }
179 return console_opt_name;
180}
181
154/// Holds information about a particular socket 182/// Holds information about a particular socket
155struct SocketHolder { 183struct SocketHolder {
156 u32 socket_fd; ///< The socket descriptor 184 u32 socket_fd; ///< The socket descriptor
@@ -568,7 +596,7 @@ static void RecvFrom(Service::Interface* self) {
568 socklen_t src_addr_len = sizeof(src_addr); 596 socklen_t src_addr_len = sizeof(src_addr);
569 int ret = ::recvfrom(socket_handle, (char*)output_buff, len, flags, &src_addr, &src_addr_len); 597 int ret = ::recvfrom(socket_handle, (char*)output_buff, len, flags, &src_addr, &src_addr_len);
570 598
571 if (buffer_parameters.output_src_address_buffer != 0) { 599 if (ret >= 0 && buffer_parameters.output_src_address_buffer != 0 && src_addr_len > 0) {
572 CTRSockAddr* ctr_src_addr = reinterpret_cast<CTRSockAddr*>(Memory::GetPointer(buffer_parameters.output_src_address_buffer)); 600 CTRSockAddr* ctr_src_addr = reinterpret_cast<CTRSockAddr*>(Memory::GetPointer(buffer_parameters.output_src_address_buffer));
573 *ctr_src_addr = CTRSockAddr::FromPlatform(src_addr); 601 *ctr_src_addr = CTRSockAddr::FromPlatform(src_addr);
574 } 602 }
@@ -724,6 +752,72 @@ static void ShutdownSockets(Service::Interface* self) {
724 cmd_buffer[1] = 0; 752 cmd_buffer[1] = 0;
725} 753}
726 754
755static void GetSockOpt(Service::Interface* self) {
756 u32* cmd_buffer = Kernel::GetCommandBuffer();
757 u32 socket_handle = cmd_buffer[1];
758 u32 level = cmd_buffer[2];
759 int optname = TranslateSockOpt(cmd_buffer[3]);
760 socklen_t optlen = (socklen_t)cmd_buffer[4];
761
762 int ret = -1;
763 int err = 0;
764
765 if(optname < 0) {
766#ifdef _WIN32
767 err = WSAEINVAL;
768#else
769 err = EINVAL;
770#endif
771 } else {
772 // 0x100 = static buffer offset (bytes)
773 // + 0x4 = 2nd pointer (u32) position
774 // >> 2 = convert to u32 offset instead of byte offset (cmd_buffer = u32*)
775 char* optval = reinterpret_cast<char *>(Memory::GetPointer(cmd_buffer[0x104 >> 2]));
776
777 ret = ::getsockopt(socket_handle, level, optname, optval, &optlen);
778 err = 0;
779 if (ret == SOCKET_ERROR_VALUE) {
780 err = TranslateError(GET_ERRNO);
781 }
782 }
783
784 cmd_buffer[0] = IPC::MakeHeader(0x11, 4, 2);
785 cmd_buffer[1] = ret;
786 cmd_buffer[2] = err;
787 cmd_buffer[3] = optlen;
788}
789
790static void SetSockOpt(Service::Interface* self) {
791 u32* cmd_buffer = Kernel::GetCommandBuffer();
792 u32 socket_handle = cmd_buffer[1];
793 u32 level = cmd_buffer[2];
794 int optname = TranslateSockOpt(cmd_buffer[3]);
795
796 int ret = -1;
797 int err = 0;
798
799 if(optname < 0) {
800#ifdef _WIN32
801 err = WSAEINVAL;
802#else
803 err = EINVAL;
804#endif
805 } else {
806 socklen_t optlen = static_cast<socklen_t>(cmd_buffer[4]);
807 const char* optval = reinterpret_cast<const char *>(Memory::GetPointer(cmd_buffer[8]));
808
809 ret = static_cast<u32>(::setsockopt(socket_handle, level, optname, optval, optlen));
810 err = 0;
811 if (ret == SOCKET_ERROR_VALUE) {
812 err = TranslateError(GET_ERRNO);
813 }
814 }
815
816 cmd_buffer[0] = IPC::MakeHeader(0x12, 4, 4);
817 cmd_buffer[1] = ret;
818 cmd_buffer[2] = err;
819}
820
727const Interface::FunctionInfo FunctionTable[] = { 821const Interface::FunctionInfo FunctionTable[] = {
728 {0x00010044, InitializeSockets, "InitializeSockets"}, 822 {0x00010044, InitializeSockets, "InitializeSockets"},
729 {0x000200C2, Socket, "Socket"}, 823 {0x000200C2, Socket, "Socket"},
@@ -741,8 +835,8 @@ const Interface::FunctionInfo FunctionTable[] = {
741 {0x000E00C2, nullptr, "GetHostByAddr"}, 835 {0x000E00C2, nullptr, "GetHostByAddr"},
742 {0x000F0106, nullptr, "GetAddrInfo"}, 836 {0x000F0106, nullptr, "GetAddrInfo"},
743 {0x00100102, nullptr, "GetNameInfo"}, 837 {0x00100102, nullptr, "GetNameInfo"},
744 {0x00110102, nullptr, "GetSockOpt"}, 838 {0x00110102, GetSockOpt, "GetSockOpt"},
745 {0x00120104, nullptr, "SetSockOpt"}, 839 {0x00120104, SetSockOpt, "SetSockOpt"},
746 {0x001300C2, Fcntl, "Fcntl"}, 840 {0x001300C2, Fcntl, "Fcntl"},
747 {0x00140084, Poll, "Poll"}, 841 {0x00140084, Poll, "Poll"},
748 {0x00150042, nullptr, "SockAtMark"}, 842 {0x00150042, nullptr, "SockAtMark"},
diff --git a/src/core/hle/service/y2r_u.cpp b/src/core/hle/service/y2r_u.cpp
index 22f373adf..d16578f87 100644
--- a/src/core/hle/service/y2r_u.cpp
+++ b/src/core/hle/service/y2r_u.cpp
@@ -4,6 +4,7 @@
4 4
5#include <cstring> 5#include <cstring>
6 6
7#include "common/common_funcs.h"
7#include "common/common_types.h" 8#include "common/common_types.h"
8#include "common/logging/log.h" 9#include "common/logging/log.h"
9 10
@@ -12,9 +13,6 @@
12#include "core/hle/service/y2r_u.h" 13#include "core/hle/service/y2r_u.h"
13#include "core/hw/y2r.h" 14#include "core/hw/y2r.h"
14 15
15#include "video_core/renderer_base.h"
16#include "video_core/video_core.h"
17
18//////////////////////////////////////////////////////////////////////////////////////////////////// 16////////////////////////////////////////////////////////////////////////////////////////////////////
19// Namespace Y2R_U 17// Namespace Y2R_U
20 18
@@ -28,13 +26,17 @@ struct ConversionParameters {
28 u16 input_line_width; 26 u16 input_line_width;
29 u16 input_lines; 27 u16 input_lines;
30 StandardCoefficient standard_coefficient; 28 StandardCoefficient standard_coefficient;
31 u8 reserved; 29 u8 padding;
32 u16 alpha; 30 u16 alpha;
33}; 31};
34static_assert(sizeof(ConversionParameters) == 12, "ConversionParameters struct has incorrect size"); 32static_assert(sizeof(ConversionParameters) == 12, "ConversionParameters struct has incorrect size");
35 33
36static Kernel::SharedPtr<Kernel::Event> completion_event; 34static Kernel::SharedPtr<Kernel::Event> completion_event;
37static ConversionConfiguration conversion; 35static ConversionConfiguration conversion;
36static DitheringWeightParams dithering_weight_params;
37static u32 temporal_dithering_enabled = 0;
38static u32 transfer_end_interrupt_enabled = 0;
39static u32 spacial_dithering_enabled = 0;
38 40
39static const CoefficientSet standard_coefficients[4] = { 41static const CoefficientSet standard_coefficients[4] = {
40 {{ 0x100, 0x166, 0xB6, 0x58, 0x1C5, -0x166F, 0x10EE, -0x1C5B }}, // ITU_Rec601 42 {{ 0x100, 0x166, 0xB6, 0x58, 0x1C5, -0x166F, 0x10EE, -0x1C5B }}, // ITU_Rec601
@@ -73,7 +75,7 @@ ResultCode ConversionConfiguration::SetInputLines(u16 lines) {
73 75
74ResultCode ConversionConfiguration::SetStandardCoefficient(StandardCoefficient standard_coefficient) { 76ResultCode ConversionConfiguration::SetStandardCoefficient(StandardCoefficient standard_coefficient) {
75 size_t index = static_cast<size_t>(standard_coefficient); 77 size_t index = static_cast<size_t>(standard_coefficient);
76 if (index >= 4) { 78 if (index >= ARRAY_SIZE(standard_coefficients)) {
77 return ResultCode(ErrorDescription::InvalidEnumValue, ErrorModule::CAM, 79 return ResultCode(ErrorDescription::InvalidEnumValue, ErrorModule::CAM,
78 ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E053ED 80 ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E053ED
79 } 81 }
@@ -86,44 +88,183 @@ static void SetInputFormat(Service::Interface* self) {
86 u32* cmd_buff = Kernel::GetCommandBuffer(); 88 u32* cmd_buff = Kernel::GetCommandBuffer();
87 89
88 conversion.input_format = static_cast<InputFormat>(cmd_buff[1]); 90 conversion.input_format = static_cast<InputFormat>(cmd_buff[1]);
91
92 cmd_buff[0] = IPC::MakeHeader(0x1, 1, 0);
93 cmd_buff[1] = RESULT_SUCCESS.raw;
94
89 LOG_DEBUG(Service_Y2R, "called input_format=%hhu", conversion.input_format); 95 LOG_DEBUG(Service_Y2R, "called input_format=%hhu", conversion.input_format);
96}
97
98static void GetInputFormat(Service::Interface* self) {
99 u32* cmd_buff = Kernel::GetCommandBuffer();
90 100
101 cmd_buff[0] = IPC::MakeHeader(0x2, 2, 0);
91 cmd_buff[1] = RESULT_SUCCESS.raw; 102 cmd_buff[1] = RESULT_SUCCESS.raw;
103 cmd_buff[2] = static_cast<u32>(conversion.input_format);
104
105 LOG_DEBUG(Service_Y2R, "called input_format=%hhu", conversion.input_format);
92} 106}
93 107
94static void SetOutputFormat(Service::Interface* self) { 108static void SetOutputFormat(Service::Interface* self) {
95 u32* cmd_buff = Kernel::GetCommandBuffer(); 109 u32* cmd_buff = Kernel::GetCommandBuffer();
96 110
97 conversion.output_format = static_cast<OutputFormat>(cmd_buff[1]); 111 conversion.output_format = static_cast<OutputFormat>(cmd_buff[1]);
112
113 cmd_buff[0] = IPC::MakeHeader(0x3, 1, 0);
114 cmd_buff[1] = RESULT_SUCCESS.raw;
115
98 LOG_DEBUG(Service_Y2R, "called output_format=%hhu", conversion.output_format); 116 LOG_DEBUG(Service_Y2R, "called output_format=%hhu", conversion.output_format);
117}
118
119static void GetOutputFormat(Service::Interface* self) {
120 u32* cmd_buff = Kernel::GetCommandBuffer();
99 121
122 cmd_buff[0] = IPC::MakeHeader(0x4, 2, 0);
100 cmd_buff[1] = RESULT_SUCCESS.raw; 123 cmd_buff[1] = RESULT_SUCCESS.raw;
124 cmd_buff[2] = static_cast<u32>(conversion.output_format);
125
126 LOG_DEBUG(Service_Y2R, "called output_format=%hhu", conversion.output_format);
101} 127}
102 128
103static void SetRotation(Service::Interface* self) { 129static void SetRotation(Service::Interface* self) {
104 u32* cmd_buff = Kernel::GetCommandBuffer(); 130 u32* cmd_buff = Kernel::GetCommandBuffer();
105 131
106 conversion.rotation = static_cast<Rotation>(cmd_buff[1]); 132 conversion.rotation = static_cast<Rotation>(cmd_buff[1]);
133
134 cmd_buff[0] = IPC::MakeHeader(0x5, 1, 0);
135 cmd_buff[1] = RESULT_SUCCESS.raw;
136
107 LOG_DEBUG(Service_Y2R, "called rotation=%hhu", conversion.rotation); 137 LOG_DEBUG(Service_Y2R, "called rotation=%hhu", conversion.rotation);
138}
139
140static void GetRotation(Service::Interface* self) {
141 u32* cmd_buff = Kernel::GetCommandBuffer();
108 142
143 cmd_buff[0] = IPC::MakeHeader(0x6, 2, 0);
109 cmd_buff[1] = RESULT_SUCCESS.raw; 144 cmd_buff[1] = RESULT_SUCCESS.raw;
145 cmd_buff[2] = static_cast<u32>(conversion.rotation);
146
147 LOG_DEBUG(Service_Y2R, "called rotation=%hhu", conversion.rotation);
110} 148}
111 149
112static void SetBlockAlignment(Service::Interface* self) { 150static void SetBlockAlignment(Service::Interface* self) {
113 u32* cmd_buff = Kernel::GetCommandBuffer(); 151 u32* cmd_buff = Kernel::GetCommandBuffer();
114 152
115 conversion.block_alignment = static_cast<BlockAlignment>(cmd_buff[1]); 153 conversion.block_alignment = static_cast<BlockAlignment>(cmd_buff[1]);
116 LOG_DEBUG(Service_Y2R, "called alignment=%hhu", conversion.block_alignment);
117 154
155 cmd_buff[0] = IPC::MakeHeader(0x7, 1, 0);
156 cmd_buff[1] = RESULT_SUCCESS.raw;
157
158 LOG_DEBUG(Service_Y2R, "called block_alignment=%hhu", conversion.block_alignment);
159}
160
161static void GetBlockAlignment(Service::Interface* self) {
162 u32* cmd_buff = Kernel::GetCommandBuffer();
163
164 cmd_buff[0] = IPC::MakeHeader(0x8, 2, 0);
165 cmd_buff[1] = RESULT_SUCCESS.raw;
166 cmd_buff[2] = static_cast<u32>(conversion.block_alignment);
167
168 LOG_DEBUG(Service_Y2R, "called block_alignment=%hhu", conversion.block_alignment);
169}
170
171/**
172 * Y2R_U::SetSpacialDithering service function
173 * Inputs:
174 * 1 : u8, 0 = Disabled, 1 = Enabled
175 * Outputs:
176 * 1 : Result of function, 0 on success, otherwise error code
177 */
178static void SetSpacialDithering(Service::Interface* self) {
179 u32* cmd_buff = Kernel::GetCommandBuffer();
180 spacial_dithering_enabled = cmd_buff[1] & 0xF;
181
182 cmd_buff[0] = IPC::MakeHeader(0x9, 1, 0);
183 cmd_buff[1] = RESULT_SUCCESS.raw;
184
185 LOG_WARNING(Service_Y2R, "(STUBBED) called");
186}
187
188/**
189 * Y2R_U::GetSpacialDithering service function
190 * Outputs:
191 * 1 : Result of function, 0 on success, otherwise error code
192 * 2 : u8, 0 = Disabled, 1 = Enabled
193 */
194static void GetSpacialDithering(Service::Interface* self) {
195 u32* cmd_buff = Kernel::GetCommandBuffer();
196
197 cmd_buff[0] = IPC::MakeHeader(0xA, 2, 0);
198 cmd_buff[1] = RESULT_SUCCESS.raw;
199 cmd_buff[2] = spacial_dithering_enabled;
200
201 LOG_WARNING(Service_Y2R, "(STUBBED) called");
202}
203
204/**
205 * Y2R_U::SetTemporalDithering service function
206 * Inputs:
207 * 1 : u8, 0 = Disabled, 1 = Enabled
208 * Outputs:
209 * 1 : Result of function, 0 on success, otherwise error code
210 */
211static void SetTemporalDithering(Service::Interface* self) {
212 u32* cmd_buff = Kernel::GetCommandBuffer();
213 temporal_dithering_enabled = cmd_buff[1] & 0xF;
214
215 cmd_buff[0] = IPC::MakeHeader(0xB, 1, 0);
118 cmd_buff[1] = RESULT_SUCCESS.raw; 216 cmd_buff[1] = RESULT_SUCCESS.raw;
217
218 LOG_WARNING(Service_Y2R, "(STUBBED) called");
119} 219}
120 220
221/**
222 * Y2R_U::GetTemporalDithering service function
223 * Outputs:
224 * 1 : Result of function, 0 on success, otherwise error code
225 * 2 : u8, 0 = Disabled, 1 = Enabled
226 */
227static void GetTemporalDithering(Service::Interface* self) {
228 u32* cmd_buff = Kernel::GetCommandBuffer();
229
230 cmd_buff[0] = IPC::MakeHeader(0xC, 2, 0);
231 cmd_buff[1] = RESULT_SUCCESS.raw;
232 cmd_buff[2] = temporal_dithering_enabled;
233
234 LOG_WARNING(Service_Y2R, "(STUBBED) called");
235}
236
237/**
238 * Y2R_U::SetTransferEndInterrupt service function
239 * Inputs:
240 * 1 : u8, 0 = Disabled, 1 = Enabled
241 * Outputs:
242 * 1 : Result of function, 0 on success, otherwise error code
243 */
121static void SetTransferEndInterrupt(Service::Interface* self) { 244static void SetTransferEndInterrupt(Service::Interface* self) {
122 u32* cmd_buff = Kernel::GetCommandBuffer(); 245 u32* cmd_buff = Kernel::GetCommandBuffer();
246 transfer_end_interrupt_enabled = cmd_buff[1] & 0xf;
123 247
124 cmd_buff[0] = IPC::MakeHeader(0xD, 1, 0); 248 cmd_buff[0] = IPC::MakeHeader(0xD, 1, 0);
125 cmd_buff[1] = RESULT_SUCCESS.raw; 249 cmd_buff[1] = RESULT_SUCCESS.raw;
126 LOG_DEBUG(Service_Y2R, "(STUBBED) called"); 250
251 LOG_WARNING(Service_Y2R, "(STUBBED) called");
252}
253
254/**
255 * Y2R_U::GetTransferEndInterrupt service function
256 * Outputs:
257 * 1 : Result of function, 0 on success, otherwise error code
258 * 2 : u8, 0 = Disabled, 1 = Enabled
259 */
260static void GetTransferEndInterrupt(Service::Interface* self) {
261 u32* cmd_buff = Kernel::GetCommandBuffer();
262
263 cmd_buff[0] = IPC::MakeHeader(0xE, 2, 0);
264 cmd_buff[1] = RESULT_SUCCESS.raw;
265 cmd_buff[2] = transfer_end_interrupt_enabled;
266
267 LOG_WARNING(Service_Y2R, "(STUBBED) called");
127} 268}
128 269
129/** 270/**
@@ -135,8 +276,10 @@ static void SetTransferEndInterrupt(Service::Interface* self) {
135static void GetTransferEndEvent(Service::Interface* self) { 276static void GetTransferEndEvent(Service::Interface* self) {
136 u32* cmd_buff = Kernel::GetCommandBuffer(); 277 u32* cmd_buff = Kernel::GetCommandBuffer();
137 278
279 cmd_buff[0] = IPC::MakeHeader(0xF, 2, 0);
138 cmd_buff[1] = RESULT_SUCCESS.raw; 280 cmd_buff[1] = RESULT_SUCCESS.raw;
139 cmd_buff[3] = Kernel::g_handle_table.Create(completion_event).MoveFrom(); 281 cmd_buff[3] = Kernel::g_handle_table.Create(completion_event).MoveFrom();
282
140 LOG_DEBUG(Service_Y2R, "called"); 283 LOG_DEBUG(Service_Y2R, "called");
141} 284}
142 285
@@ -147,12 +290,12 @@ static void SetSendingY(Service::Interface* self) {
147 conversion.src_Y.image_size = cmd_buff[2]; 290 conversion.src_Y.image_size = cmd_buff[2];
148 conversion.src_Y.transfer_unit = cmd_buff[3]; 291 conversion.src_Y.transfer_unit = cmd_buff[3];
149 conversion.src_Y.gap = cmd_buff[4]; 292 conversion.src_Y.gap = cmd_buff[4];
150 u32 src_process_handle = cmd_buff[6];
151 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
152 "src_process_handle=0x%08X", conversion.src_Y.image_size,
153 conversion.src_Y.transfer_unit, conversion.src_Y.gap, src_process_handle);
154 293
294 cmd_buff[0] = IPC::MakeHeader(0x10, 1, 0);
155 cmd_buff[1] = RESULT_SUCCESS.raw; 295 cmd_buff[1] = RESULT_SUCCESS.raw;
296
297 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, src_process_handle=0x%08X",
298 conversion.src_Y.image_size, conversion.src_Y.transfer_unit, conversion.src_Y.gap, cmd_buff[6]);
156} 299}
157 300
158static void SetSendingU(Service::Interface* self) { 301static void SetSendingU(Service::Interface* self) {
@@ -162,12 +305,12 @@ static void SetSendingU(Service::Interface* self) {
162 conversion.src_U.image_size = cmd_buff[2]; 305 conversion.src_U.image_size = cmd_buff[2];
163 conversion.src_U.transfer_unit = cmd_buff[3]; 306 conversion.src_U.transfer_unit = cmd_buff[3];
164 conversion.src_U.gap = cmd_buff[4]; 307 conversion.src_U.gap = cmd_buff[4];
165 u32 src_process_handle = cmd_buff[6];
166 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
167 "src_process_handle=0x%08X", conversion.src_U.image_size,
168 conversion.src_U.transfer_unit, conversion.src_U.gap, src_process_handle);
169 308
309 cmd_buff[0] = IPC::MakeHeader(0x11, 1, 0);
170 cmd_buff[1] = RESULT_SUCCESS.raw; 310 cmd_buff[1] = RESULT_SUCCESS.raw;
311
312 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, src_process_handle=0x%08X",
313 conversion.src_U.image_size, conversion.src_U.transfer_unit, conversion.src_U.gap, cmd_buff[6]);
171} 314}
172 315
173static void SetSendingV(Service::Interface* self) { 316static void SetSendingV(Service::Interface* self) {
@@ -177,12 +320,12 @@ static void SetSendingV(Service::Interface* self) {
177 conversion.src_V.image_size = cmd_buff[2]; 320 conversion.src_V.image_size = cmd_buff[2];
178 conversion.src_V.transfer_unit = cmd_buff[3]; 321 conversion.src_V.transfer_unit = cmd_buff[3];
179 conversion.src_V.gap = cmd_buff[4]; 322 conversion.src_V.gap = cmd_buff[4];
180 u32 src_process_handle = cmd_buff[6];
181 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
182 "src_process_handle=0x%08X", conversion.src_V.image_size,
183 conversion.src_V.transfer_unit, conversion.src_V.gap, src_process_handle);
184 323
324 cmd_buff[0] = IPC::MakeHeader(0x12, 1, 0);
185 cmd_buff[1] = RESULT_SUCCESS.raw; 325 cmd_buff[1] = RESULT_SUCCESS.raw;
326
327 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, src_process_handle=0x%08X",
328 conversion.src_V.image_size, conversion.src_V.transfer_unit, conversion.src_V.gap, cmd_buff[6]);
186} 329}
187 330
188static void SetSendingYUYV(Service::Interface* self) { 331static void SetSendingYUYV(Service::Interface* self) {
@@ -192,12 +335,76 @@ static void SetSendingYUYV(Service::Interface* self) {
192 conversion.src_YUYV.image_size = cmd_buff[2]; 335 conversion.src_YUYV.image_size = cmd_buff[2];
193 conversion.src_YUYV.transfer_unit = cmd_buff[3]; 336 conversion.src_YUYV.transfer_unit = cmd_buff[3];
194 conversion.src_YUYV.gap = cmd_buff[4]; 337 conversion.src_YUYV.gap = cmd_buff[4];
195 u32 src_process_handle = cmd_buff[6];
196 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
197 "src_process_handle=0x%08X", conversion.src_YUYV.image_size,
198 conversion.src_YUYV.transfer_unit, conversion.src_YUYV.gap, src_process_handle);
199 338
339 cmd_buff[0] = IPC::MakeHeader(0x13, 1, 0);
340 cmd_buff[1] = RESULT_SUCCESS.raw;
341
342 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, src_process_handle=0x%08X",
343 conversion.src_YUYV.image_size, conversion.src_YUYV.transfer_unit, conversion.src_YUYV.gap, cmd_buff[6]);
344}
345
346/**
347 * Y2R::IsFinishedSendingYuv service function
348 * Output:
349 * 1 : Result of the function, 0 on success, otherwise error code
350 * 2 : u8, 0 = Not Finished, 1 = Finished
351 */
352static void IsFinishedSendingYuv(Service::Interface* self) {
353 u32* cmd_buff = Kernel::GetCommandBuffer();
354
355 cmd_buff[0] = IPC::MakeHeader(0x14, 2, 0);
356 cmd_buff[1] = RESULT_SUCCESS.raw;
357 cmd_buff[2] = 1;
358
359 LOG_WARNING(Service_Y2R, "(STUBBED) called");
360}
361
362/**
363 * Y2R::IsFinishedSendingY service function
364 * Output:
365 * 1 : Result of the function, 0 on success, otherwise error code
366 * 2 : u8, 0 = Not Finished, 1 = Finished
367 */
368static void IsFinishedSendingY(Service::Interface* self) {
369 u32* cmd_buff = Kernel::GetCommandBuffer();
370
371 cmd_buff[0] = IPC::MakeHeader(0x15, 2, 0);
200 cmd_buff[1] = RESULT_SUCCESS.raw; 372 cmd_buff[1] = RESULT_SUCCESS.raw;
373 cmd_buff[2] = 1;
374
375 LOG_WARNING(Service_Y2R, "(STUBBED) called");
376}
377
378/**
379 * Y2R::IsFinishedSendingU service function
380 * Output:
381 * 1 : Result of the function, 0 on success, otherwise error code
382 * 2 : u8, 0 = Not Finished, 1 = Finished
383 */
384static void IsFinishedSendingU(Service::Interface* self) {
385 u32* cmd_buff = Kernel::GetCommandBuffer();
386
387 cmd_buff[0] = IPC::MakeHeader(0x16, 2, 0);
388 cmd_buff[1] = RESULT_SUCCESS.raw;
389 cmd_buff[2] = 1;
390
391 LOG_WARNING(Service_Y2R, "(STUBBED) called");
392}
393
394/**
395 * Y2R::IsFinishedSendingV service function
396 * Output:
397 * 1 : Result of the function, 0 on success, otherwise error code
398 * 2 : u8, 0 = Not Finished, 1 = Finished
399 */
400static void IsFinishedSendingV(Service::Interface* self) {
401 u32* cmd_buff = Kernel::GetCommandBuffer();
402
403 cmd_buff[0] = IPC::MakeHeader(0x17, 2, 0);
404 cmd_buff[1] = RESULT_SUCCESS.raw;
405 cmd_buff[2] = 1;
406
407 LOG_WARNING(Service_Y2R, "(STUBBED) called");
201} 408}
202 409
203static void SetReceiving(Service::Interface* self) { 410static void SetReceiving(Service::Interface* self) {
@@ -207,27 +414,66 @@ static void SetReceiving(Service::Interface* self) {
207 conversion.dst.image_size = cmd_buff[2]; 414 conversion.dst.image_size = cmd_buff[2];
208 conversion.dst.transfer_unit = cmd_buff[3]; 415 conversion.dst.transfer_unit = cmd_buff[3];
209 conversion.dst.gap = cmd_buff[4]; 416 conversion.dst.gap = cmd_buff[4];
210 u32 dst_process_handle = cmd_buff[6];
211 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
212 "dst_process_handle=0x%08X", conversion.dst.image_size,
213 conversion.dst.transfer_unit, conversion.dst.gap,
214 dst_process_handle);
215 417
418 cmd_buff[0] = IPC::MakeHeader(0x18, 1, 0);
216 cmd_buff[1] = RESULT_SUCCESS.raw; 419 cmd_buff[1] = RESULT_SUCCESS.raw;
420
421 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, dst_process_handle=0x%08X",
422 conversion.dst.image_size, conversion.dst.transfer_unit, conversion.dst.gap, cmd_buff[6]);
423}
424
425/**
426 * Y2R::IsFinishedReceiving service function
427 * Output:
428 * 1 : Result of the function, 0 on success, otherwise error code
429 * 2 : u8, 0 = Not Finished, 1 = Finished
430 */
431static void IsFinishedReceiving(Service::Interface* self) {
432 u32* cmd_buff = Kernel::GetCommandBuffer();
433
434 cmd_buff[0] = IPC::MakeHeader(0x19, 2, 0);
435 cmd_buff[1] = RESULT_SUCCESS.raw;
436 cmd_buff[2] = 1;
437
438 LOG_WARNING(Service_Y2R, "(STUBBED) called");
217} 439}
218 440
219static void SetInputLineWidth(Service::Interface* self) { 441static void SetInputLineWidth(Service::Interface* self) {
220 u32* cmd_buff = Kernel::GetCommandBuffer(); 442 u32* cmd_buff = Kernel::GetCommandBuffer();
221 443
222 LOG_DEBUG(Service_Y2R, "called input_line_width=%u", cmd_buff[1]); 444 cmd_buff[0] = IPC::MakeHeader(0x1A, 1, 0);
223 cmd_buff[1] = conversion.SetInputLineWidth(cmd_buff[1]).raw; 445 cmd_buff[1] = conversion.SetInputLineWidth(cmd_buff[1]).raw;
446
447 LOG_DEBUG(Service_Y2R, "called input_line_width=%u", cmd_buff[1]);
448}
449
450static void GetInputLineWidth(Service::Interface* self) {
451 u32* cmd_buff = Kernel::GetCommandBuffer();
452
453 cmd_buff[0] = IPC::MakeHeader(0x1B, 2, 0);
454 cmd_buff[1] = RESULT_SUCCESS.raw;
455 cmd_buff[2] = conversion.input_line_width;
456
457 LOG_DEBUG(Service_Y2R, "called input_line_width=%u", conversion.input_line_width);
224} 458}
225 459
226static void SetInputLines(Service::Interface* self) { 460static void SetInputLines(Service::Interface* self) {
227 u32* cmd_buff = Kernel::GetCommandBuffer(); 461 u32* cmd_buff = Kernel::GetCommandBuffer();
228 462
229 LOG_DEBUG(Service_Y2R, "called input_line_number=%u", cmd_buff[1]); 463 cmd_buff[0] = IPC::MakeHeader(0x1C, 1, 0);
230 cmd_buff[1] = conversion.SetInputLines(cmd_buff[1]).raw; 464 cmd_buff[1] = conversion.SetInputLines(cmd_buff[1]).raw;
465
466 LOG_DEBUG(Service_Y2R, "called input_lines=%u", cmd_buff[1]);
467}
468
469static void GetInputLines(Service::Interface* self) {
470 u32* cmd_buff = Kernel::GetCommandBuffer();
471
472 cmd_buff[0] = IPC::MakeHeader(0x1D, 2, 0);
473 cmd_buff[1] = RESULT_SUCCESS.raw;
474 cmd_buff[2] = static_cast<u32>(conversion.input_lines);
475
476 LOG_DEBUG(Service_Y2R, "called input_lines=%u", conversion.input_lines);
231} 477}
232 478
233static void SetCoefficient(Service::Interface* self) { 479static void SetCoefficient(Service::Interface* self) {
@@ -235,45 +481,111 @@ static void SetCoefficient(Service::Interface* self) {
235 481
236 const u16* coefficients = reinterpret_cast<const u16*>(&cmd_buff[1]); 482 const u16* coefficients = reinterpret_cast<const u16*>(&cmd_buff[1]);
237 std::memcpy(conversion.coefficients.data(), coefficients, sizeof(CoefficientSet)); 483 std::memcpy(conversion.coefficients.data(), coefficients, sizeof(CoefficientSet));
484
485 cmd_buff[0] = IPC::MakeHeader(0x1E, 1, 0);
486 cmd_buff[1] = RESULT_SUCCESS.raw;
487
238 LOG_DEBUG(Service_Y2R, "called coefficients=[%hX, %hX, %hX, %hX, %hX, %hX, %hX, %hX]", 488 LOG_DEBUG(Service_Y2R, "called coefficients=[%hX, %hX, %hX, %hX, %hX, %hX, %hX, %hX]",
239 coefficients[0], coefficients[1], coefficients[2], coefficients[3], 489 coefficients[0], coefficients[1], coefficients[2], coefficients[3],
240 coefficients[4], coefficients[5], coefficients[6], coefficients[7]); 490 coefficients[4], coefficients[5], coefficients[6], coefficients[7]);
491}
241 492
493static void GetCoefficient(Service::Interface* self) {
494 u32* cmd_buff = Kernel::GetCommandBuffer();
495
496 cmd_buff[0] = IPC::MakeHeader(0x1F, 5, 0);
242 cmd_buff[1] = RESULT_SUCCESS.raw; 497 cmd_buff[1] = RESULT_SUCCESS.raw;
498 std::memcpy(&cmd_buff[2], conversion.coefficients.data(), sizeof(CoefficientSet));
499
500 LOG_DEBUG(Service_Y2R, "called");
243} 501}
244 502
245static void SetStandardCoefficient(Service::Interface* self) { 503static void SetStandardCoefficient(Service::Interface* self) {
246 u32* cmd_buff = Kernel::GetCommandBuffer(); 504 u32* cmd_buff = Kernel::GetCommandBuffer();
247 505
248 LOG_DEBUG(Service_Y2R, "called standard_coefficient=%u", cmd_buff[1]); 506 u32 index = cmd_buff[1];
507
508 cmd_buff[0] = IPC::MakeHeader(0x20, 1, 0);
509 cmd_buff[1] = conversion.SetStandardCoefficient((StandardCoefficient)index).raw;
510
511 LOG_DEBUG(Service_Y2R, "called standard_coefficient=%u", index);
512}
513
514static void GetStandardCoefficient(Service::Interface* self) {
515 u32* cmd_buff = Kernel::GetCommandBuffer();
516
517 u32 index = cmd_buff[1];
518
519 if (index < ARRAY_SIZE(standard_coefficients)) {
520 cmd_buff[0] = IPC::MakeHeader(0x21, 5, 0);
521 cmd_buff[1] = RESULT_SUCCESS.raw;
522 std::memcpy(&cmd_buff[2], &standard_coefficients[index], sizeof(CoefficientSet));
249 523
250 cmd_buff[1] = conversion.SetStandardCoefficient((StandardCoefficient)cmd_buff[1]).raw; 524 LOG_DEBUG(Service_Y2R, "called standard_coefficient=%u ", index);
525 } else {
526 cmd_buff[0] = IPC::MakeHeader(0x21, 1, 0);
527 cmd_buff[1] = -1; // TODO(bunnei): Identify the correct error code for this
528
529 LOG_ERROR(Service_Y2R, "called standard_coefficient=%u The argument is invalid!", index);
530 }
251} 531}
252 532
253static void SetAlpha(Service::Interface* self) { 533static void SetAlpha(Service::Interface* self) {
254 u32* cmd_buff = Kernel::GetCommandBuffer(); 534 u32* cmd_buff = Kernel::GetCommandBuffer();
255 535
256 conversion.alpha = cmd_buff[1]; 536 conversion.alpha = cmd_buff[1];
537
538 cmd_buff[0] = IPC::MakeHeader(0x22, 1, 0);
539 cmd_buff[1] = RESULT_SUCCESS.raw;
540
257 LOG_DEBUG(Service_Y2R, "called alpha=%hu", conversion.alpha); 541 LOG_DEBUG(Service_Y2R, "called alpha=%hu", conversion.alpha);
542}
543
544static void GetAlpha(Service::Interface* self) {
545 u32* cmd_buff = Kernel::GetCommandBuffer();
258 546
547 cmd_buff[0] = IPC::MakeHeader(0x23, 2, 0);
259 cmd_buff[1] = RESULT_SUCCESS.raw; 548 cmd_buff[1] = RESULT_SUCCESS.raw;
549 cmd_buff[2] = conversion.alpha;
550
551 LOG_DEBUG(Service_Y2R, "called alpha=%hu", conversion.alpha);
260} 552}
261 553
262static void StartConversion(Service::Interface* self) { 554static void SetDitheringWeightParams(Service::Interface* self) {
263 u32* cmd_buff = Kernel::GetCommandBuffer(); 555 u32* cmd_buff = Kernel::GetCommandBuffer();
556 std::memcpy(&dithering_weight_params, &cmd_buff[1], sizeof(DitheringWeightParams));
264 557
265 HW::Y2R::PerformConversion(conversion); 558 cmd_buff[0] = IPC::MakeHeader(0x24, 1, 0);
559 cmd_buff[1] = RESULT_SUCCESS.raw;
266 560
267 // dst_image_size would seem to be perfect for this, but it doesn't include the gap :( 561 LOG_DEBUG(Service_Y2R, "called");
268 u32 total_output_size = conversion.input_lines * 562}
269 (conversion.dst.transfer_unit + conversion.dst.gap); 563
270 VideoCore::g_renderer->Rasterizer()->InvalidateRegion( 564static void GetDitheringWeightParams(Service::Interface* self) {
271 Memory::VirtualToPhysicalAddress(conversion.dst.address), total_output_size); 565 u32* cmd_buff = Kernel::GetCommandBuffer();
566
567 cmd_buff[0] = IPC::MakeHeader(0x25, 9, 0);
568 cmd_buff[1] = RESULT_SUCCESS.raw;
569 std::memcpy(&cmd_buff[2], &dithering_weight_params, sizeof(DitheringWeightParams));
272 570
273 LOG_DEBUG(Service_Y2R, "called"); 571 LOG_DEBUG(Service_Y2R, "called");
572}
573
574static void StartConversion(Service::Interface* self) {
575 u32* cmd_buff = Kernel::GetCommandBuffer();
576
577 // dst_image_size would seem to be perfect for this, but it doesn't include the gap :(
578 u32 total_output_size = conversion.input_lines * (conversion.dst.transfer_unit + conversion.dst.gap);
579 Memory::RasterizerFlushAndInvalidateRegion(Memory::VirtualToPhysicalAddress(conversion.dst.address), total_output_size);
580
581 HW::Y2R::PerformConversion(conversion);
582
274 completion_event->Signal(); 583 completion_event->Signal();
275 584
585 cmd_buff[0] = IPC::MakeHeader(0x26, 1, 0);
276 cmd_buff[1] = RESULT_SUCCESS.raw; 586 cmd_buff[1] = RESULT_SUCCESS.raw;
587
588 LOG_DEBUG(Service_Y2R, "called");
277} 589}
278 590
279static void StopConversion(Service::Interface* self) { 591static void StopConversion(Service::Interface* self) {
@@ -281,6 +593,7 @@ static void StopConversion(Service::Interface* self) {
281 593
282 cmd_buff[0] = IPC::MakeHeader(0x27, 1, 0); 594 cmd_buff[0] = IPC::MakeHeader(0x27, 1, 0);
283 cmd_buff[1] = RESULT_SUCCESS.raw; 595 cmd_buff[1] = RESULT_SUCCESS.raw;
596
284 LOG_DEBUG(Service_Y2R, "called"); 597 LOG_DEBUG(Service_Y2R, "called");
285} 598}
286 599
@@ -293,50 +606,61 @@ static void StopConversion(Service::Interface* self) {
293static void IsBusyConversion(Service::Interface* self) { 606static void IsBusyConversion(Service::Interface* self) {
294 u32* cmd_buff = Kernel::GetCommandBuffer(); 607 u32* cmd_buff = Kernel::GetCommandBuffer();
295 608
609 cmd_buff[0] = IPC::MakeHeader(0x28, 2, 0);
296 cmd_buff[1] = RESULT_SUCCESS.raw; 610 cmd_buff[1] = RESULT_SUCCESS.raw;
297 cmd_buff[2] = 0; // StartConversion always finishes immediately 611 cmd_buff[2] = 0; // StartConversion always finishes immediately
612
298 LOG_DEBUG(Service_Y2R, "called"); 613 LOG_DEBUG(Service_Y2R, "called");
299} 614}
300 615
301/** 616/**
302 * Y2R_U::SetConversionParams service function 617 * Y2R_U::SetPackageParameter service function
303 */ 618 */
304static void SetConversionParams(Service::Interface* self) { 619static void SetPackageParameter(Service::Interface* self) {
305 u32* cmd_buff = Kernel::GetCommandBuffer(); 620 u32* cmd_buff = Kernel::GetCommandBuffer();
306 621
307 auto params = reinterpret_cast<const ConversionParameters*>(&cmd_buff[1]); 622 auto params = reinterpret_cast<const ConversionParameters*>(&cmd_buff[1]);
308 LOG_DEBUG(Service_Y2R,
309 "called input_format=%hhu output_format=%hhu rotation=%hhu block_alignment=%hhu "
310 "input_line_width=%hu input_lines=%hu standard_coefficient=%hhu "
311 "reserved=%hhu alpha=%hX",
312 params->input_format, params->output_format, params->rotation, params->block_alignment,
313 params->input_line_width, params->input_lines, params->standard_coefficient,
314 params->reserved, params->alpha);
315
316 ResultCode result = RESULT_SUCCESS;
317 623
318 conversion.input_format = params->input_format; 624 conversion.input_format = params->input_format;
319 conversion.output_format = params->output_format; 625 conversion.output_format = params->output_format;
320 conversion.rotation = params->rotation; 626 conversion.rotation = params->rotation;
321 conversion.block_alignment = params->block_alignment; 627 conversion.block_alignment = params->block_alignment;
322 result = conversion.SetInputLineWidth(params->input_line_width); 628
323 if (result.IsError()) goto cleanup; 629 ResultCode result = conversion.SetInputLineWidth(params->input_line_width);
630
631 if (result.IsError())
632 goto cleanup;
633
324 result = conversion.SetInputLines(params->input_lines); 634 result = conversion.SetInputLines(params->input_lines);
325 if (result.IsError()) goto cleanup; 635
636 if (result.IsError())
637 goto cleanup;
638
326 result = conversion.SetStandardCoefficient(params->standard_coefficient); 639 result = conversion.SetStandardCoefficient(params->standard_coefficient);
327 if (result.IsError()) goto cleanup; 640
641 if (result.IsError())
642 goto cleanup;
643
644 conversion.padding = params->padding;
328 conversion.alpha = params->alpha; 645 conversion.alpha = params->alpha;
329 646
330cleanup: 647cleanup:
331 cmd_buff[0] = IPC::MakeHeader(0x29, 1, 0); 648 cmd_buff[0] = IPC::MakeHeader(0x29, 1, 0);
332 cmd_buff[1] = result.raw; 649 cmd_buff[1] = result.raw;
650
651 LOG_DEBUG(Service_Y2R, "called input_format=%hhu output_format=%hhu rotation=%hhu block_alignment=%hhu "
652 "input_line_width=%hu input_lines=%hu standard_coefficient=%hhu reserved=%hhu alpha=%hX",
653 params->input_format, params->output_format, params->rotation, params->block_alignment,
654 params->input_line_width, params->input_lines, params->standard_coefficient, params->padding, params->alpha);
333} 655}
334 656
335static void PingProcess(Service::Interface* self) { 657static void PingProcess(Service::Interface* self) {
336 u32* cmd_buff = Kernel::GetCommandBuffer(); 658 u32* cmd_buff = Kernel::GetCommandBuffer();
337 659
660 cmd_buff[0] = IPC::MakeHeader(0x2A, 2, 0);
338 cmd_buff[1] = RESULT_SUCCESS.raw; 661 cmd_buff[1] = RESULT_SUCCESS.raw;
339 cmd_buff[2] = 0; 662 cmd_buff[2] = 0;
663
340 LOG_WARNING(Service_Y2R, "(STUBBED) called"); 664 LOG_WARNING(Service_Y2R, "(STUBBED) called");
341} 665}
342 666
@@ -362,6 +686,7 @@ static void DriverInitialize(Service::Interface* self) {
362 686
363 cmd_buff[0] = IPC::MakeHeader(0x2B, 1, 0); 687 cmd_buff[0] = IPC::MakeHeader(0x2B, 1, 0);
364 cmd_buff[1] = RESULT_SUCCESS.raw; 688 cmd_buff[1] = RESULT_SUCCESS.raw;
689
365 LOG_DEBUG(Service_Y2R, "called"); 690 LOG_DEBUG(Service_Y2R, "called");
366} 691}
367 692
@@ -370,54 +695,67 @@ static void DriverFinalize(Service::Interface* self) {
370 695
371 cmd_buff[0] = IPC::MakeHeader(0x2C, 1, 0); 696 cmd_buff[0] = IPC::MakeHeader(0x2C, 1, 0);
372 cmd_buff[1] = RESULT_SUCCESS.raw; 697 cmd_buff[1] = RESULT_SUCCESS.raw;
698
699 LOG_DEBUG(Service_Y2R, "called");
700}
701
702
703static void GetPackageParameter(Service::Interface* self) {
704 u32* cmd_buff = Kernel::GetCommandBuffer();
705
706 cmd_buff[0] = IPC::MakeHeader(0x2D, 4, 0);
707 cmd_buff[1] = RESULT_SUCCESS.raw;
708 std::memcpy(&cmd_buff[2], &conversion, sizeof(ConversionParameters));
709
373 LOG_DEBUG(Service_Y2R, "called"); 710 LOG_DEBUG(Service_Y2R, "called");
374} 711}
375 712
376const Interface::FunctionInfo FunctionTable[] = { 713const Interface::FunctionInfo FunctionTable[] = {
377 {0x00010040, SetInputFormat, "SetInputFormat"}, 714 {0x00010040, SetInputFormat, "SetInputFormat"},
378 {0x00020000, nullptr, "GetInputFormat"}, 715 {0x00020000, GetInputFormat, "GetInputFormat"},
379 {0x00030040, SetOutputFormat, "SetOutputFormat"}, 716 {0x00030040, SetOutputFormat, "SetOutputFormat"},
380 {0x00040000, nullptr, "GetOutputFormat"}, 717 {0x00040000, GetOutputFormat, "GetOutputFormat"},
381 {0x00050040, SetRotation, "SetRotation"}, 718 {0x00050040, SetRotation, "SetRotation"},
382 {0x00060000, nullptr, "GetRotation"}, 719 {0x00060000, GetRotation, "GetRotation"},
383 {0x00070040, SetBlockAlignment, "SetBlockAlignment"}, 720 {0x00070040, SetBlockAlignment, "SetBlockAlignment"},
384 {0x00080000, nullptr, "GetBlockAlignment"}, 721 {0x00080000, GetBlockAlignment, "GetBlockAlignment"},
385 {0x00090040, nullptr, "SetSpacialDithering"}, 722 {0x00090040, SetSpacialDithering, "SetSpacialDithering"},
386 {0x000A0000, nullptr, "GetSpacialDithering"}, 723 {0x000A0000, GetSpacialDithering, "GetSpacialDithering"},
387 {0x000B0040, nullptr, "SetTemporalDithering"}, 724 {0x000B0040, SetTemporalDithering, "SetTemporalDithering"},
388 {0x000C0000, nullptr, "GetTemporalDithering"}, 725 {0x000C0000, GetTemporalDithering, "GetTemporalDithering"},
389 {0x000D0040, SetTransferEndInterrupt, "SetTransferEndInterrupt"}, 726 {0x000D0040, SetTransferEndInterrupt, "SetTransferEndInterrupt"},
727 {0x000E0000, GetTransferEndInterrupt, "GetTransferEndInterrupt"},
390 {0x000F0000, GetTransferEndEvent, "GetTransferEndEvent"}, 728 {0x000F0000, GetTransferEndEvent, "GetTransferEndEvent"},
391 {0x00100102, SetSendingY, "SetSendingY"}, 729 {0x00100102, SetSendingY, "SetSendingY"},
392 {0x00110102, SetSendingU, "SetSendingU"}, 730 {0x00110102, SetSendingU, "SetSendingU"},
393 {0x00120102, SetSendingV, "SetSendingV"}, 731 {0x00120102, SetSendingV, "SetSendingV"},
394 {0x00130102, SetSendingYUYV, "SetSendingYUYV"}, 732 {0x00130102, SetSendingYUYV, "SetSendingYUYV"},
395 {0x00140000, nullptr, "IsFinishedSendingYuv"}, 733 {0x00140000, IsFinishedSendingYuv, "IsFinishedSendingYuv"},
396 {0x00150000, nullptr, "IsFinishedSendingY"}, 734 {0x00150000, IsFinishedSendingY, "IsFinishedSendingY"},
397 {0x00160000, nullptr, "IsFinishedSendingU"}, 735 {0x00160000, IsFinishedSendingU, "IsFinishedSendingU"},
398 {0x00170000, nullptr, "IsFinishedSendingV"}, 736 {0x00170000, IsFinishedSendingV, "IsFinishedSendingV"},
399 {0x00180102, SetReceiving, "SetReceiving"}, 737 {0x00180102, SetReceiving, "SetReceiving"},
400 {0x00190000, nullptr, "IsFinishedReceiving"}, 738 {0x00190000, IsFinishedReceiving, "IsFinishedReceiving"},
401 {0x001A0040, SetInputLineWidth, "SetInputLineWidth"}, 739 {0x001A0040, SetInputLineWidth, "SetInputLineWidth"},
402 {0x001B0000, nullptr, "GetInputLineWidth"}, 740 {0x001B0000, GetInputLineWidth, "GetInputLineWidth"},
403 {0x001C0040, SetInputLines, "SetInputLines"}, 741 {0x001C0040, SetInputLines, "SetInputLines"},
404 {0x001D0000, nullptr, "GetInputLines"}, 742 {0x001D0000, GetInputLines, "GetInputLines"},
405 {0x001E0100, SetCoefficient, "SetCoefficient"}, 743 {0x001E0100, SetCoefficient, "SetCoefficient"},
406 {0x001F0000, nullptr, "GetCoefficient"}, 744 {0x001F0000, GetCoefficient, "GetCoefficient"},
407 {0x00200040, SetStandardCoefficient, "SetStandardCoefficient"}, 745 {0x00200040, SetStandardCoefficient, "SetStandardCoefficient"},
408 {0x00210040, nullptr, "GetStandardCoefficientParams"}, 746 {0x00210040, GetStandardCoefficient, "GetStandardCoefficient"},
409 {0x00220040, SetAlpha, "SetAlpha"}, 747 {0x00220040, SetAlpha, "SetAlpha"},
410 {0x00230000, nullptr, "GetAlpha"}, 748 {0x00230000, GetAlpha, "GetAlpha"},
411 {0x00240200, nullptr, "SetDitheringWeightParams"}, 749 {0x00240200, SetDitheringWeightParams,"SetDitheringWeightParams"},
412 {0x00250000, nullptr, "GetDitheringWeightParams"}, 750 {0x00250000, GetDitheringWeightParams,"GetDitheringWeightParams"},
413 {0x00260000, StartConversion, "StartConversion"}, 751 {0x00260000, StartConversion, "StartConversion"},
414 {0x00270000, StopConversion, "StopConversion"}, 752 {0x00270000, StopConversion, "StopConversion"},
415 {0x00280000, IsBusyConversion, "IsBusyConversion"}, 753 {0x00280000, IsBusyConversion, "IsBusyConversion"},
416 {0x002901C0, SetConversionParams, "SetConversionParams"}, 754 {0x002901C0, SetPackageParameter, "SetPackageParameter"},
417 {0x002A0000, PingProcess, "PingProcess"}, 755 {0x002A0000, PingProcess, "PingProcess"},
418 {0x002B0000, DriverInitialize, "DriverInitialize"}, 756 {0x002B0000, DriverInitialize, "DriverInitialize"},
419 {0x002C0000, DriverFinalize, "DriverFinalize"}, 757 {0x002C0000, DriverFinalize, "DriverFinalize"},
420 {0x002D0000, nullptr, "GetPackageParameter"}, 758 {0x002D0000, GetPackageParameter, "GetPackageParameter"},
421}; 759};
422 760
423//////////////////////////////////////////////////////////////////////////////////////////////////// 761////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/core/hle/service/y2r_u.h b/src/core/hle/service/y2r_u.h
index 3965a5545..95fa2fdb7 100644
--- a/src/core/hle/service/y2r_u.h
+++ b/src/core/hle/service/y2r_u.h
@@ -97,6 +97,7 @@ struct ConversionConfiguration {
97 u16 input_line_width; 97 u16 input_line_width;
98 u16 input_lines; 98 u16 input_lines;
99 CoefficientSet coefficients; 99 CoefficientSet coefficients;
100 u8 padding;
100 u16 alpha; 101 u16 alpha;
101 102
102 /// Input parameters for the Y (luma) plane 103 /// Input parameters for the Y (luma) plane
@@ -109,6 +110,25 @@ struct ConversionConfiguration {
109 ResultCode SetStandardCoefficient(StandardCoefficient standard_coefficient); 110 ResultCode SetStandardCoefficient(StandardCoefficient standard_coefficient);
110}; 111};
111 112
113struct DitheringWeightParams {
114 u16 w0_xEven_yEven;
115 u16 w0_xOdd_yEven;
116 u16 w0_xEven_yOdd;
117 u16 w0_xOdd_yOdd;
118 u16 w1_xEven_yEven;
119 u16 w1_xOdd_yEven;
120 u16 w1_xEven_yOdd;
121 u16 w1_xOdd_yOdd;
122 u16 w2_xEven_yEven;
123 u16 w2_xOdd_yEven;
124 u16 w2_xEven_yOdd;
125 u16 w2_xOdd_yOdd;
126 u16 w3_xEven_yEven;
127 u16 w3_xOdd_yEven;
128 u16 w3_xEven_yOdd;
129 u16 w3_xOdd_yOdd;
130};
131
112class Interface : public Service::Interface { 132class Interface : public Service::Interface {
113public: 133public:
114 Interface(); 134 Interface();
diff --git a/src/core/hle/svc.cpp b/src/core/hle/svc.cpp
index 761e1b45b..60c8747f3 100644
--- a/src/core/hle/svc.cpp
+++ b/src/core/hle/svc.cpp
@@ -6,7 +6,6 @@
6 6
7#include "common/logging/log.h" 7#include "common/logging/log.h"
8#include "common/microprofile.h" 8#include "common/microprofile.h"
9#include "common/profiler.h"
10#include "common/string_util.h" 9#include "common/string_util.h"
11#include "common/symbols.h" 10#include "common/symbols.h"
12 11
@@ -865,6 +864,10 @@ static ResultCode GetProcessInfo(s64* out, Handle process_handle, u32 type) {
865 // TODO(yuriks): Type 0 returns a slightly higher number than type 2, but I'm not sure 864 // TODO(yuriks): Type 0 returns a slightly higher number than type 2, but I'm not sure
866 // what's the difference between them. 865 // what's the difference between them.
867 *out = process->heap_used + process->linear_heap_used + process->misc_memory_used; 866 *out = process->heap_used + process->linear_heap_used + process->misc_memory_used;
867 if(*out % Memory::PAGE_SIZE != 0) {
868 LOG_ERROR(Kernel_SVC, "called, memory size not page-aligned");
869 return ERR_MISALIGNED_SIZE;
870 }
868 break; 871 break;
869 case 1: 872 case 1:
870 case 3: 873 case 3:
@@ -1036,8 +1039,6 @@ static const FunctionDef SVC_Table[] = {
1036 {0x7D, HLE::Wrap<QueryProcessMemory>, "QueryProcessMemory"}, 1039 {0x7D, HLE::Wrap<QueryProcessMemory>, "QueryProcessMemory"},
1037}; 1040};
1038 1041
1039Common::Profiling::TimingCategory profiler_svc("SVC Calls");
1040
1041static const FunctionDef* GetSVCInfo(u32 func_num) { 1042static const FunctionDef* GetSVCInfo(u32 func_num) {
1042 if (func_num >= ARRAY_SIZE(SVC_Table)) { 1043 if (func_num >= ARRAY_SIZE(SVC_Table)) {
1043 LOG_ERROR(Kernel_SVC, "unknown svc=0x%02X", func_num); 1044 LOG_ERROR(Kernel_SVC, "unknown svc=0x%02X", func_num);
@@ -1049,7 +1050,6 @@ static const FunctionDef* GetSVCInfo(u32 func_num) {
1049MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70)); 1050MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70));
1050 1051
1051void CallSVC(u32 immediate) { 1052void CallSVC(u32 immediate) {
1052 Common::Profiling::ScopeTimer timer_svc(profiler_svc);
1053 MICROPROFILE_SCOPE(Kernel_SVC); 1053 MICROPROFILE_SCOPE(Kernel_SVC);
1054 1054
1055 const FunctionDef* info = GetSVCInfo(immediate); 1055 const FunctionDef* info = GetSVCInfo(immediate);
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index 7e2f9cdfa..a4dfb7e43 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -115,21 +115,39 @@ inline void Write(u32 addr, const T data) {
115 u8* start = Memory::GetPhysicalPointer(config.GetStartAddress()); 115 u8* start = Memory::GetPhysicalPointer(config.GetStartAddress());
116 u8* end = Memory::GetPhysicalPointer(config.GetEndAddress()); 116 u8* end = Memory::GetPhysicalPointer(config.GetEndAddress());
117 117
118 if (config.fill_24bit) { 118 // TODO: Consider always accelerating and returning vector of
119 // fill with 24-bit values 119 // regions that the accelerated fill did not cover to
120 for (u8* ptr = start; ptr < end; ptr += 3) { 120 // reduce/eliminate the fill that the cpu has to do.
121 ptr[0] = config.value_24bit_r; 121 // This would also mean that the flush below is not needed.
122 ptr[1] = config.value_24bit_g; 122 // Fill should first flush all surfaces that touch but are
123 ptr[2] = config.value_24bit_b; 123 // not completely within the fill range.
124 // Then fill all completely covered surfaces, and return the
125 // regions that were between surfaces or within the touching
126 // ones for cpu to manually fill here.
127 if (!VideoCore::g_renderer->Rasterizer()->AccelerateFill(config)) {
128 Memory::RasterizerFlushAndInvalidateRegion(config.GetStartAddress(), config.GetEndAddress() - config.GetStartAddress());
129
130 if (config.fill_24bit) {
131 // fill with 24-bit values
132 for (u8* ptr = start; ptr < end; ptr += 3) {
133 ptr[0] = config.value_24bit_r;
134 ptr[1] = config.value_24bit_g;
135 ptr[2] = config.value_24bit_b;
136 }
137 } else if (config.fill_32bit) {
138 // fill with 32-bit values
139 if (end > start) {
140 u32 value = config.value_32bit;
141 size_t len = (end - start) / sizeof(u32);
142 for (size_t i = 0; i < len; ++i)
143 memcpy(&start[i * sizeof(u32)], &value, sizeof(u32));
144 }
145 } else {
146 // fill with 16-bit values
147 u16 value_16bit = config.value_16bit.Value();
148 for (u8* ptr = start; ptr < end; ptr += sizeof(u16))
149 memcpy(ptr, &value_16bit, sizeof(u16));
124 } 150 }
125 } else if (config.fill_32bit) {
126 // fill with 32-bit values
127 for (u32* ptr = (u32*)start; ptr < (u32*)end; ++ptr)
128 *ptr = config.value_32bit;
129 } else {
130 // fill with 16-bit values
131 for (u16* ptr = (u16*)start; ptr < (u16*)end; ++ptr)
132 *ptr = config.value_16bit;
133 } 151 }
134 152
135 LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), config.GetEndAddress()); 153 LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), config.GetEndAddress());
@@ -139,8 +157,6 @@ inline void Write(u32 addr, const T data) {
139 } else { 157 } else {
140 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC1); 158 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC1);
141 } 159 }
142
143 VideoCore::g_renderer->Rasterizer()->InvalidateRegion(config.GetStartAddress(), config.GetEndAddress() - config.GetStartAddress());
144 } 160 }
145 161
146 // Reset "trigger" flag and set the "finish" flag 162 // Reset "trigger" flag and set the "finish" flag
@@ -161,184 +177,185 @@ inline void Write(u32 addr, const T data) {
161 if (Pica::g_debug_context) 177 if (Pica::g_debug_context)
162 Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer, nullptr); 178 Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer, nullptr);
163 179
164 u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress()); 180 if (!VideoCore::g_renderer->Rasterizer()->AccelerateDisplayTransfer(config)) {
165 u8* dst_pointer = Memory::GetPhysicalPointer(config.GetPhysicalOutputAddress()); 181 u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress());
166 182 u8* dst_pointer = Memory::GetPhysicalPointer(config.GetPhysicalOutputAddress());
167 if (config.is_texture_copy) {
168 u32 input_width = config.texture_copy.input_width * 16;
169 u32 input_gap = config.texture_copy.input_gap * 16;
170 u32 output_width = config.texture_copy.output_width * 16;
171 u32 output_gap = config.texture_copy.output_gap * 16;
172
173 size_t contiguous_input_size = config.texture_copy.size / input_width * (input_width + input_gap);
174 VideoCore::g_renderer->Rasterizer()->FlushRegion(config.GetPhysicalInputAddress(), contiguous_input_size);
175
176 u32 remaining_size = config.texture_copy.size;
177 u32 remaining_input = input_width;
178 u32 remaining_output = output_width;
179 while (remaining_size > 0) {
180 u32 copy_size = std::min({ remaining_input, remaining_output, remaining_size });
181 183
182 std::memcpy(dst_pointer, src_pointer, copy_size); 184 if (config.is_texture_copy) {
183 src_pointer += copy_size; 185 u32 input_width = config.texture_copy.input_width * 16;
184 dst_pointer += copy_size; 186 u32 input_gap = config.texture_copy.input_gap * 16;
187 u32 output_width = config.texture_copy.output_width * 16;
188 u32 output_gap = config.texture_copy.output_gap * 16;
185 189
186 remaining_input -= copy_size; 190 size_t contiguous_input_size = config.texture_copy.size / input_width * (input_width + input_gap);
187 remaining_output -= copy_size; 191 Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), static_cast<u32>(contiguous_input_size));
188 remaining_size -= copy_size;
189 192
190 if (remaining_input == 0) { 193 size_t contiguous_output_size = config.texture_copy.size / output_width * (output_width + output_gap);
191 remaining_input = input_width; 194 Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), static_cast<u32>(contiguous_output_size));
192 src_pointer += input_gap;
193 }
194 if (remaining_output == 0) {
195 remaining_output = output_width;
196 dst_pointer += output_gap;
197 }
198 }
199 195
200 LOG_TRACE(HW_GPU, "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X", 196 u32 remaining_size = config.texture_copy.size;
201 config.texture_copy.size, 197 u32 remaining_input = input_width;
202 config.GetPhysicalInputAddress(), input_width, input_gap, 198 u32 remaining_output = output_width;
203 config.GetPhysicalOutputAddress(), output_width, output_gap, 199 while (remaining_size > 0) {
204 config.flags); 200 u32 copy_size = std::min({ remaining_input, remaining_output, remaining_size });
205 201
206 size_t contiguous_output_size = config.texture_copy.size / output_width * (output_width + output_gap); 202 std::memcpy(dst_pointer, src_pointer, copy_size);
207 VideoCore::g_renderer->Rasterizer()->InvalidateRegion(config.GetPhysicalOutputAddress(), contiguous_output_size); 203 src_pointer += copy_size;
204 dst_pointer += copy_size;
208 205
209 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); 206 remaining_input -= copy_size;
210 break; 207 remaining_output -= copy_size;
211 } 208 remaining_size -= copy_size;
212 209
213 if (config.scaling > config.ScaleXY) { 210 if (remaining_input == 0) {
214 LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", config.scaling.Value()); 211 remaining_input = input_width;
215 UNIMPLEMENTED(); 212 src_pointer += input_gap;
216 break; 213 }
217 } 214 if (remaining_output == 0) {
215 remaining_output = output_width;
216 dst_pointer += output_gap;
217 }
218 }
218 219
219 if (config.input_linear && config.scaling != config.NoScale) { 220 LOG_TRACE(HW_GPU, "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X",
220 LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input"); 221 config.texture_copy.size,
221 UNIMPLEMENTED(); 222 config.GetPhysicalInputAddress(), input_width, input_gap,
222 break; 223 config.GetPhysicalOutputAddress(), output_width, output_gap,
223 } 224 config.flags);
224 225
225 bool horizontal_scale = config.scaling != config.NoScale; 226 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF);
226 bool vertical_scale = config.scaling == config.ScaleXY; 227 break;
228 }
227 229
228 u32 output_width = config.output_width >> horizontal_scale; 230 if (config.scaling > config.ScaleXY) {
229 u32 output_height = config.output_height >> vertical_scale; 231 LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", config.scaling.Value());
232 UNIMPLEMENTED();
233 break;
234 }
230 235
231 u32 input_size = config.input_width * config.input_height * GPU::Regs::BytesPerPixel(config.input_format); 236 if (config.input_linear && config.scaling != config.NoScale) {
232 u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format); 237 LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input");
238 UNIMPLEMENTED();
239 break;
240 }
233 241
234 VideoCore::g_renderer->Rasterizer()->FlushRegion(config.GetPhysicalInputAddress(), input_size); 242 int horizontal_scale = config.scaling != config.NoScale ? 1 : 0;
243 int vertical_scale = config.scaling == config.ScaleXY ? 1 : 0;
235 244
236 for (u32 y = 0; y < output_height; ++y) { 245 u32 output_width = config.output_width >> horizontal_scale;
237 for (u32 x = 0; x < output_width; ++x) { 246 u32 output_height = config.output_height >> vertical_scale;
238 Math::Vec4<u8> src_color;
239 247
240 // Calculate the [x,y] position of the input image 248 u32 input_size = config.input_width * config.input_height * GPU::Regs::BytesPerPixel(config.input_format);
241 // based on the current output position and the scale 249 u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format);
242 u32 input_x = x << horizontal_scale;
243 u32 input_y = y << vertical_scale;
244 250
245 if (config.flip_vertically) { 251 Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), input_size);
246 // Flip the y value of the output data, 252 Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), output_size);
247 // we do this after calculating the [x,y] position of the input image
248 // to account for the scaling options.
249 y = output_height - y - 1;
250 }
251 253
252 u32 dst_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.output_format); 254 for (u32 y = 0; y < output_height; ++y) {
253 u32 src_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.input_format); 255 for (u32 x = 0; x < output_width; ++x) {
254 u32 src_offset; 256 Math::Vec4<u8> src_color;
255 u32 dst_offset;
256 257
257 if (config.input_linear) { 258 // Calculate the [x,y] position of the input image
258 if (!config.dont_swizzle) { 259 // based on the current output position and the scale
259 // Interpret the input as linear and the output as tiled 260 u32 input_x = x << horizontal_scale;
260 u32 coarse_y = y & ~7; 261 u32 input_y = y << vertical_scale;
261 u32 stride = output_width * dst_bytes_per_pixel;
262 262
263 src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel; 263 if (config.flip_vertically) {
264 dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + coarse_y * stride; 264 // Flip the y value of the output data,
265 } else { 265 // we do this after calculating the [x,y] position of the input image
266 // Both input and output are linear 266 // to account for the scaling options.
267 src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel; 267 y = output_height - y - 1;
268 dst_offset = (x + y * output_width) * dst_bytes_per_pixel;
269 } 268 }
270 } else {
271 if (!config.dont_swizzle) {
272 // Interpret the input as tiled and the output as linear
273 u32 coarse_y = input_y & ~7;
274 u32 stride = config.input_width * src_bytes_per_pixel;
275 269
276 src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + coarse_y * stride; 270 u32 dst_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.output_format);
277 dst_offset = (x + y * output_width) * dst_bytes_per_pixel; 271 u32 src_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.input_format);
272 u32 src_offset;
273 u32 dst_offset;
274
275 if (config.input_linear) {
276 if (!config.dont_swizzle) {
277 // Interpret the input as linear and the output as tiled
278 u32 coarse_y = y & ~7;
279 u32 stride = output_width * dst_bytes_per_pixel;
280
281 src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel;
282 dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + coarse_y * stride;
283 } else {
284 // Both input and output are linear
285 src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel;
286 dst_offset = (x + y * output_width) * dst_bytes_per_pixel;
287 }
278 } else { 288 } else {
279 // Both input and output are tiled 289 if (!config.dont_swizzle) {
280 u32 out_coarse_y = y & ~7; 290 // Interpret the input as tiled and the output as linear
281 u32 out_stride = output_width * dst_bytes_per_pixel; 291 u32 coarse_y = input_y & ~7;
282 292 u32 stride = config.input_width * src_bytes_per_pixel;
283 u32 in_coarse_y = input_y & ~7; 293
284 u32 in_stride = config.input_width * src_bytes_per_pixel; 294 src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + coarse_y * stride;
285 295 dst_offset = (x + y * output_width) * dst_bytes_per_pixel;
286 src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + in_coarse_y * in_stride; 296 } else {
287 dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + out_coarse_y * out_stride; 297 // Both input and output are tiled
298 u32 out_coarse_y = y & ~7;
299 u32 out_stride = output_width * dst_bytes_per_pixel;
300
301 u32 in_coarse_y = input_y & ~7;
302 u32 in_stride = config.input_width * src_bytes_per_pixel;
303
304 src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + in_coarse_y * in_stride;
305 dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + out_coarse_y * out_stride;
306 }
288 } 307 }
289 }
290 308
291 const u8* src_pixel = src_pointer + src_offset; 309 const u8* src_pixel = src_pointer + src_offset;
292 src_color = DecodePixel(config.input_format, src_pixel); 310 src_color = DecodePixel(config.input_format, src_pixel);
293 if (config.scaling == config.ScaleX) { 311 if (config.scaling == config.ScaleX) {
294 Math::Vec4<u8> pixel = DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel); 312 Math::Vec4<u8> pixel = DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel);
295 src_color = ((src_color + pixel) / 2).Cast<u8>(); 313 src_color = ((src_color + pixel) / 2).Cast<u8>();
296 } else if (config.scaling == config.ScaleXY) { 314 } else if (config.scaling == config.ScaleXY) {
297 Math::Vec4<u8> pixel1 = DecodePixel(config.input_format, src_pixel + 1 * src_bytes_per_pixel); 315 Math::Vec4<u8> pixel1 = DecodePixel(config.input_format, src_pixel + 1 * src_bytes_per_pixel);
298 Math::Vec4<u8> pixel2 = DecodePixel(config.input_format, src_pixel + 2 * src_bytes_per_pixel); 316 Math::Vec4<u8> pixel2 = DecodePixel(config.input_format, src_pixel + 2 * src_bytes_per_pixel);
299 Math::Vec4<u8> pixel3 = DecodePixel(config.input_format, src_pixel + 3 * src_bytes_per_pixel); 317 Math::Vec4<u8> pixel3 = DecodePixel(config.input_format, src_pixel + 3 * src_bytes_per_pixel);
300 src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast<u8>(); 318 src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast<u8>();
301 } 319 }
302 320
303 u8* dst_pixel = dst_pointer + dst_offset; 321 u8* dst_pixel = dst_pointer + dst_offset;
304 switch (config.output_format) { 322 switch (config.output_format) {
305 case Regs::PixelFormat::RGBA8: 323 case Regs::PixelFormat::RGBA8:
306 Color::EncodeRGBA8(src_color, dst_pixel); 324 Color::EncodeRGBA8(src_color, dst_pixel);
307 break; 325 break;
308 326
309 case Regs::PixelFormat::RGB8: 327 case Regs::PixelFormat::RGB8:
310 Color::EncodeRGB8(src_color, dst_pixel); 328 Color::EncodeRGB8(src_color, dst_pixel);
311 break; 329 break;
312 330
313 case Regs::PixelFormat::RGB565: 331 case Regs::PixelFormat::RGB565:
314 Color::EncodeRGB565(src_color, dst_pixel); 332 Color::EncodeRGB565(src_color, dst_pixel);
315 break; 333 break;
316 334
317 case Regs::PixelFormat::RGB5A1: 335 case Regs::PixelFormat::RGB5A1:
318 Color::EncodeRGB5A1(src_color, dst_pixel); 336 Color::EncodeRGB5A1(src_color, dst_pixel);
319 break; 337 break;
320 338
321 case Regs::PixelFormat::RGBA4: 339 case Regs::PixelFormat::RGBA4:
322 Color::EncodeRGBA4(src_color, dst_pixel); 340 Color::EncodeRGBA4(src_color, dst_pixel);
323 break; 341 break;
324 342
325 default: 343 default:
326 LOG_ERROR(HW_GPU, "Unknown destination framebuffer format %x", config.output_format.Value()); 344 LOG_ERROR(HW_GPU, "Unknown destination framebuffer format %x", config.output_format.Value());
327 break; 345 break;
346 }
328 } 347 }
329 } 348 }
330 }
331 349
332 LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), dst format %x, flags 0x%08X", 350 LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), dst format %x, flags 0x%08X",
333 config.output_height * output_width * GPU::Regs::BytesPerPixel(config.output_format), 351 config.output_height * output_width * GPU::Regs::BytesPerPixel(config.output_format),
334 config.GetPhysicalInputAddress(), config.input_width.Value(), config.input_height.Value(), 352 config.GetPhysicalInputAddress(), config.input_width.Value(), config.input_height.Value(),
335 config.GetPhysicalOutputAddress(), output_width, output_height, 353 config.GetPhysicalOutputAddress(), output_width, output_height,
336 config.output_format.Value(), config.flags); 354 config.output_format.Value(), config.flags);
355 }
337 356
338 g_regs.display_transfer_config.trigger = 0; 357 g_regs.display_transfer_config.trigger = 0;
339 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); 358 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF);
340
341 VideoCore::g_renderer->Rasterizer()->InvalidateRegion(config.GetPhysicalOutputAddress(), output_size);
342 } 359 }
343 break; 360 break;
344 } 361 }
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h
index a00adbf53..da4c345b4 100644
--- a/src/core/hw/gpu.h
+++ b/src/core/hw/gpu.h
@@ -78,7 +78,7 @@ struct Regs {
78 78
79 INSERT_PADDING_WORDS(0x4); 79 INSERT_PADDING_WORDS(0x4);
80 80
81 struct { 81 struct MemoryFillConfig {
82 u32 address_start; 82 u32 address_start;
83 u32 address_end; 83 u32 address_end;
84 84
@@ -165,7 +165,7 @@ struct Regs {
165 165
166 INSERT_PADDING_WORDS(0x169); 166 INSERT_PADDING_WORDS(0x169);
167 167
168 struct { 168 struct DisplayTransferConfig {
169 u32 input_address; 169 u32 input_address;
170 u32 output_address; 170 u32 output_address;
171 171
diff --git a/src/core/hw/lcd.h b/src/core/hw/lcd.h
index 3dd877fbf..57029c5e8 100644
--- a/src/core/hw/lcd.h
+++ b/src/core/hw/lcd.h
@@ -52,8 +52,6 @@ struct Regs {
52 return content[index]; 52 return content[index];
53 } 53 }
54 54
55#undef ASSERT_MEMBER_SIZE
56
57}; 55};
58static_assert(std::is_standard_layout<Regs>::value, "Structure does not use standard layout"); 56static_assert(std::is_standard_layout<Regs>::value, "Structure does not use standard layout");
59 57
diff --git a/src/core/loader/3dsx.cpp b/src/core/loader/3dsx.cpp
index 5fb3b9e2b..98e7ab48f 100644
--- a/src/core/loader/3dsx.cpp
+++ b/src/core/loader/3dsx.cpp
@@ -178,11 +178,11 @@ static THREEDSX_Error Load3DSXFile(FileUtil::IOFile& file, u32 base_addr, Shared
178 for (unsigned current_inprogress = 0; current_inprogress < remaining && pos < end_pos; current_inprogress++) { 178 for (unsigned current_inprogress = 0; current_inprogress < remaining && pos < end_pos; current_inprogress++) {
179 const auto& table = reloc_table[current_inprogress]; 179 const auto& table = reloc_table[current_inprogress];
180 LOG_TRACE(Loader, "(t=%d,skip=%u,patch=%u)", current_segment_reloc_table, 180 LOG_TRACE(Loader, "(t=%d,skip=%u,patch=%u)", current_segment_reloc_table,
181 (u32)table.skip, (u32)table.patch); 181 static_cast<u32>(table.skip), static_cast<u32>(table.patch));
182 pos += table.skip; 182 pos += table.skip;
183 s32 num_patches = table.patch; 183 s32 num_patches = table.patch;
184 while (0 < num_patches && pos < end_pos) { 184 while (0 < num_patches && pos < end_pos) {
185 u32 in_addr = (u8*)pos - program_image.data(); 185 u32 in_addr = static_cast<u32>(reinterpret_cast<u8*>(pos) - program_image.data());
186 u32 addr = TranslateAddr(*pos, &loadinfo, offsets); 186 u32 addr = TranslateAddr(*pos, &loadinfo, offsets);
187 LOG_TRACE(Loader, "Patching %08X <-- rel(%08X,%d) (%08X)", 187 LOG_TRACE(Loader, "Patching %08X <-- rel(%08X,%d) (%08X)",
188 base_addr + in_addr, addr, current_segment_reloc_table, *pos); 188 base_addr + in_addr, addr, current_segment_reloc_table, *pos);
@@ -284,7 +284,7 @@ ResultStatus AppLoader_THREEDSX::ReadRomFS(std::shared_ptr<FileUtil::IOFile>& ro
284 // Check if the 3DSX has a RomFS... 284 // Check if the 3DSX has a RomFS...
285 if (hdr.fs_offset != 0) { 285 if (hdr.fs_offset != 0) {
286 u32 romfs_offset = hdr.fs_offset; 286 u32 romfs_offset = hdr.fs_offset;
287 u32 romfs_size = file.GetSize() - hdr.fs_offset; 287 u32 romfs_size = static_cast<u32>(file.GetSize()) - hdr.fs_offset;
288 288
289 LOG_DEBUG(Loader, "RomFS offset: 0x%08X", romfs_offset); 289 LOG_DEBUG(Loader, "RomFS offset: 0x%08X", romfs_offset);
290 LOG_DEBUG(Loader, "RomFS size: 0x%08X", romfs_size); 290 LOG_DEBUG(Loader, "RomFS size: 0x%08X", romfs_size);
@@ -303,4 +303,31 @@ ResultStatus AppLoader_THREEDSX::ReadRomFS(std::shared_ptr<FileUtil::IOFile>& ro
303 return ResultStatus::ErrorNotUsed; 303 return ResultStatus::ErrorNotUsed;
304} 304}
305 305
306ResultStatus AppLoader_THREEDSX::ReadIcon(std::vector<u8>& buffer) {
307 if (!file.IsOpen())
308 return ResultStatus::Error;
309
310 // Reset read pointer in case this file has been read before.
311 file.Seek(0, SEEK_SET);
312
313 THREEDSX_Header hdr;
314 if (file.ReadBytes(&hdr, sizeof(THREEDSX_Header)) != sizeof(THREEDSX_Header))
315 return ResultStatus::Error;
316
317 if (hdr.header_size != sizeof(THREEDSX_Header))
318 return ResultStatus::Error;
319
320 // Check if the 3DSX has a SMDH...
321 if (hdr.smdh_offset != 0) {
322 file.Seek(hdr.smdh_offset, SEEK_SET);
323 buffer.resize(hdr.smdh_size);
324
325 if (file.ReadBytes(&buffer[0], hdr.smdh_size) != hdr.smdh_size)
326 return ResultStatus::Error;
327
328 return ResultStatus::Success;
329 }
330 return ResultStatus::ErrorNotUsed;
331}
332
306} // namespace Loader 333} // namespace Loader
diff --git a/src/core/loader/3dsx.h b/src/core/loader/3dsx.h
index 365ddb7a5..3ee686703 100644
--- a/src/core/loader/3dsx.h
+++ b/src/core/loader/3dsx.h
@@ -17,7 +17,7 @@ namespace Loader {
17/// Loads an 3DSX file 17/// Loads an 3DSX file
18class AppLoader_THREEDSX final : public AppLoader { 18class AppLoader_THREEDSX final : public AppLoader {
19public: 19public:
20 AppLoader_THREEDSX(FileUtil::IOFile&& file, std::string filename, const std::string& filepath) 20 AppLoader_THREEDSX(FileUtil::IOFile&& file, const std::string& filename, const std::string& filepath)
21 : AppLoader(std::move(file)), filename(std::move(filename)), filepath(filepath) {} 21 : AppLoader(std::move(file)), filename(std::move(filename)), filepath(filepath) {}
22 22
23 /** 23 /**
@@ -34,6 +34,13 @@ public:
34 ResultStatus Load() override; 34 ResultStatus Load() override;
35 35
36 /** 36 /**
37 * Get the icon (typically icon section) of the application
38 * @param buffer Reference to buffer to store data
39 * @return ResultStatus result of function
40 */
41 ResultStatus ReadIcon(std::vector<u8>& buffer) override;
42
43 /**
37 * Get the RomFS of the application 44 * Get the RomFS of the application
38 * @param romfs_file Reference to buffer to store data 45 * @param romfs_file Reference to buffer to store data
39 * @param offset Offset in the file to the RomFS 46 * @param offset Offset in the file to the RomFS
diff --git a/src/core/loader/loader.cpp b/src/core/loader/loader.cpp
index 886501c41..af3f62248 100644
--- a/src/core/loader/loader.cpp
+++ b/src/core/loader/loader.cpp
@@ -90,6 +90,28 @@ const char* GetFileTypeString(FileType type) {
90 return "unknown"; 90 return "unknown";
91} 91}
92 92
93std::unique_ptr<AppLoader> GetLoader(FileUtil::IOFile&& file, FileType type,
94 const std::string& filename, const std::string& filepath) {
95 switch (type) {
96
97 // 3DSX file format.
98 case FileType::THREEDSX:
99 return std::make_unique<AppLoader_THREEDSX>(std::move(file), filename, filepath);
100
101 // Standard ELF file format.
102 case FileType::ELF:
103 return std::make_unique<AppLoader_ELF>(std::move(file), filename);
104
105 // NCCH/NCSD container formats.
106 case FileType::CXI:
107 case FileType::CCI:
108 return std::make_unique<AppLoader_NCCH>(std::move(file), filepath);
109
110 default:
111 return std::unique_ptr<AppLoader>();
112 }
113}
114
93ResultStatus LoadFile(const std::string& filename) { 115ResultStatus LoadFile(const std::string& filename) {
94 FileUtil::IOFile file(filename, "rb"); 116 FileUtil::IOFile file(filename, "rb");
95 if (!file.IsOpen()) { 117 if (!file.IsOpen()) {
@@ -111,38 +133,29 @@ ResultStatus LoadFile(const std::string& filename) {
111 133
112 LOG_INFO(Loader, "Loading file %s as %s...", filename.c_str(), GetFileTypeString(type)); 134 LOG_INFO(Loader, "Loading file %s as %s...", filename.c_str(), GetFileTypeString(type));
113 135
136 std::unique_ptr<AppLoader> app_loader = GetLoader(std::move(file), type, filename_filename, filename);
137
114 switch (type) { 138 switch (type) {
115 139
116 //3DSX file format... 140 // 3DSX file format...
141 // or NCCH/NCSD container formats...
117 case FileType::THREEDSX: 142 case FileType::THREEDSX:
118 {
119 AppLoader_THREEDSX app_loader(std::move(file), filename_filename, filename);
120 // Load application and RomFS
121 if (ResultStatus::Success == app_loader.Load()) {
122 Service::FS::RegisterArchiveType(std::make_unique<FileSys::ArchiveFactory_RomFS>(app_loader), Service::FS::ArchiveIdCode::RomFS);
123 return ResultStatus::Success;
124 }
125 break;
126 }
127
128 // Standard ELF file format...
129 case FileType::ELF:
130 return AppLoader_ELF(std::move(file), filename_filename).Load();
131
132 // NCCH/NCSD container formats...
133 case FileType::CXI: 143 case FileType::CXI:
134 case FileType::CCI: 144 case FileType::CCI:
135 { 145 {
136 AppLoader_NCCH app_loader(std::move(file), filename);
137
138 // Load application and RomFS 146 // Load application and RomFS
139 ResultStatus result = app_loader.Load(); 147 ResultStatus result = app_loader->Load();
140 if (ResultStatus::Success == result) { 148 if (ResultStatus::Success == result) {
141 Service::FS::RegisterArchiveType(std::make_unique<FileSys::ArchiveFactory_RomFS>(app_loader), Service::FS::ArchiveIdCode::RomFS); 149 Service::FS::RegisterArchiveType(std::make_unique<FileSys::ArchiveFactory_RomFS>(*app_loader), Service::FS::ArchiveIdCode::RomFS);
150 return ResultStatus::Success;
142 } 151 }
143 return result; 152 return result;
144 } 153 }
145 154
155 // Standard ELF file format...
156 case FileType::ELF:
157 return app_loader->Load();
158
146 // CIA file format... 159 // CIA file format...
147 case FileType::CIA: 160 case FileType::CIA:
148 return ResultStatus::ErrorNotImplemented; 161 return ResultStatus::ErrorNotImplemented;
diff --git a/src/core/loader/loader.h b/src/core/loader/loader.h
index 84a4ce5fc..9d3e9ed3b 100644
--- a/src/core/loader/loader.h
+++ b/src/core/loader/loader.h
@@ -10,8 +10,10 @@
10#include <string> 10#include <string>
11#include <vector> 11#include <vector>
12 12
13#include "common/common_funcs.h"
13#include "common/common_types.h" 14#include "common/common_types.h"
14#include "common/file_util.h" 15#include "common/file_util.h"
16#include "common/swap.h"
15 17
16namespace Kernel { 18namespace Kernel {
17struct AddressMapping; 19struct AddressMapping;
@@ -78,6 +80,51 @@ constexpr u32 MakeMagic(char a, char b, char c, char d) {
78 return a | b << 8 | c << 16 | d << 24; 80 return a | b << 8 | c << 16 | d << 24;
79} 81}
80 82
83/// SMDH data structure that contains titles, icons etc. See https://www.3dbrew.org/wiki/SMDH
84struct SMDH {
85 u32_le magic;
86 u16_le version;
87 INSERT_PADDING_BYTES(2);
88
89 struct Title {
90 std::array<u16, 0x40> short_title;
91 std::array<u16, 0x80> long_title;
92 std::array<u16, 0x40> publisher;
93 };
94 std::array<Title, 16> titles;
95
96 std::array<u8, 16> ratings;
97 u32_le region_lockout;
98 u32_le match_maker_id;
99 u64_le match_maker_bit_id;
100 u32_le flags;
101 u16_le eula_version;
102 INSERT_PADDING_BYTES(2);
103 float_le banner_animation_frame;
104 u32_le cec_id;
105 INSERT_PADDING_BYTES(8);
106
107 std::array<u8, 0x480> small_icon;
108 std::array<u8, 0x1200> large_icon;
109
110 /// indicates the language used for each title entry
111 enum class TitleLanguage {
112 Japanese = 0,
113 English = 1,
114 French = 2,
115 German = 3,
116 Italian = 4,
117 Spanish = 5,
118 SimplifiedChinese = 6,
119 Korean= 7,
120 Dutch = 8,
121 Portuguese = 9,
122 Russian = 10,
123 TraditionalChinese = 11
124 };
125};
126static_assert(sizeof(SMDH) == 0x36C0, "SMDH structure size is wrong");
127
81/// Interface for loading an application 128/// Interface for loading an application
82class AppLoader : NonCopyable { 129class AppLoader : NonCopyable {
83public: 130public:
@@ -150,6 +197,16 @@ protected:
150extern const std::initializer_list<Kernel::AddressMapping> default_address_mappings; 197extern const std::initializer_list<Kernel::AddressMapping> default_address_mappings;
151 198
152/** 199/**
200 * Get a loader for a file with a specific type
201 * @param file The file to load
202 * @param type The type of the file
203 * @param filename the file name (without path)
204 * @param filepath the file full path (with name)
205 * @return std::unique_ptr<AppLoader> a pointer to a loader object; nullptr for unsupported type
206 */
207std::unique_ptr<AppLoader> GetLoader(FileUtil::IOFile&& file, FileType type, const std::string& filename, const std::string& filepath);
208
209/**
153 * Identifies and loads a bootable file 210 * Identifies and loads a bootable file
154 * @param filename String filename of bootable file 211 * @param filename String filename of bootable file
155 * @return ResultStatus result of function 212 * @return ResultStatus result of function
diff --git a/src/core/loader/ncch.cpp b/src/core/loader/ncch.cpp
index 52c5fbaaf..7391bdb26 100644
--- a/src/core/loader/ncch.cpp
+++ b/src/core/loader/ncch.cpp
@@ -176,6 +176,10 @@ ResultStatus AppLoader_NCCH::LoadSectionExeFS(const char* name, std::vector<u8>&
176 if (!file.IsOpen()) 176 if (!file.IsOpen())
177 return ResultStatus::Error; 177 return ResultStatus::Error;
178 178
179 ResultStatus result = LoadExeFS();
180 if (result != ResultStatus::Success)
181 return result;
182
179 LOG_DEBUG(Loader, "%d sections:", kMaxSections); 183 LOG_DEBUG(Loader, "%d sections:", kMaxSections);
180 // Iterate through the ExeFs archive until we find a section with the specified name... 184 // Iterate through the ExeFs archive until we find a section with the specified name...
181 for (unsigned section_number = 0; section_number < kMaxSections; section_number++) { 185 for (unsigned section_number = 0; section_number < kMaxSections; section_number++) {
@@ -218,9 +222,9 @@ ResultStatus AppLoader_NCCH::LoadSectionExeFS(const char* name, std::vector<u8>&
218 return ResultStatus::ErrorNotUsed; 222 return ResultStatus::ErrorNotUsed;
219} 223}
220 224
221ResultStatus AppLoader_NCCH::Load() { 225ResultStatus AppLoader_NCCH::LoadExeFS() {
222 if (is_loaded) 226 if (is_exefs_loaded)
223 return ResultStatus::ErrorAlreadyLoaded; 227 return ResultStatus::Success;
224 228
225 if (!file.IsOpen()) 229 if (!file.IsOpen())
226 return ResultStatus::Error; 230 return ResultStatus::Error;
@@ -258,7 +262,7 @@ ResultStatus AppLoader_NCCH::Load() {
258 resource_limit_category = exheader_header.arm11_system_local_caps.resource_limit_category; 262 resource_limit_category = exheader_header.arm11_system_local_caps.resource_limit_category;
259 263
260 LOG_INFO(Loader, "Name: %s" , exheader_header.codeset_info.name); 264 LOG_INFO(Loader, "Name: %s" , exheader_header.codeset_info.name);
261 LOG_INFO(Loader, "Program ID: %016X" , ncch_header.program_id); 265 LOG_INFO(Loader, "Program ID: %016llX" , ncch_header.program_id);
262 LOG_DEBUG(Loader, "Code compressed: %s" , is_compressed ? "yes" : "no"); 266 LOG_DEBUG(Loader, "Code compressed: %s" , is_compressed ? "yes" : "no");
263 LOG_DEBUG(Loader, "Entry point: 0x%08X", entry_point); 267 LOG_DEBUG(Loader, "Entry point: 0x%08X", entry_point);
264 LOG_DEBUG(Loader, "Code size: 0x%08X", code_size); 268 LOG_DEBUG(Loader, "Code size: 0x%08X", code_size);
@@ -285,6 +289,18 @@ ResultStatus AppLoader_NCCH::Load() {
285 if (file.ReadBytes(&exefs_header, sizeof(ExeFs_Header)) != sizeof(ExeFs_Header)) 289 if (file.ReadBytes(&exefs_header, sizeof(ExeFs_Header)) != sizeof(ExeFs_Header))
286 return ResultStatus::Error; 290 return ResultStatus::Error;
287 291
292 is_exefs_loaded = true;
293 return ResultStatus::Success;
294}
295
296ResultStatus AppLoader_NCCH::Load() {
297 if (is_loaded)
298 return ResultStatus::ErrorAlreadyLoaded;
299
300 ResultStatus result = LoadExeFS();
301 if (result != ResultStatus::Success)
302 return result;
303
288 is_loaded = true; // Set state to loaded 304 is_loaded = true; // Set state to loaded
289 305
290 return LoadExec(); // Load the executable into memory for booting 306 return LoadExec(); // Load the executable into memory for booting
diff --git a/src/core/loader/ncch.h b/src/core/loader/ncch.h
index ca6772a78..fd852c3de 100644
--- a/src/core/loader/ncch.h
+++ b/src/core/loader/ncch.h
@@ -232,6 +232,13 @@ private:
232 */ 232 */
233 ResultStatus LoadExec(); 233 ResultStatus LoadExec();
234 234
235 /**
236 * Ensure ExeFS is loaded and ready for reading sections
237 * @return ResultStatus result of function
238 */
239 ResultStatus LoadExeFS();
240
241 bool is_exefs_loaded = false;
235 bool is_compressed = false; 242 bool is_compressed = false;
236 243
237 u32 entry_point = 0; 244 u32 entry_point = 0;
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 7de5bd15d..ee9b69f81 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -15,6 +15,9 @@
15#include "core/memory_setup.h" 15#include "core/memory_setup.h"
16#include "core/mmio.h" 16#include "core/mmio.h"
17 17
18#include "video_core/renderer_base.h"
19#include "video_core/video_core.h"
20
18namespace Memory { 21namespace Memory {
19 22
20enum class PageType { 23enum class PageType {
@@ -22,8 +25,12 @@ enum class PageType {
22 Unmapped, 25 Unmapped,
23 /// Page is mapped to regular memory. This is the only type you can get pointers to. 26 /// Page is mapped to regular memory. This is the only type you can get pointers to.
24 Memory, 27 Memory,
28 /// Page is mapped to regular memory, but also needs to check for rasterizer cache flushing and invalidation
29 RasterizerCachedMemory,
25 /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions. 30 /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions.
26 Special, 31 Special,
32 /// Page is mapped to a I/O region, but also needs to check for rasterizer cache flushing and invalidation
33 RasterizerCachedSpecial,
27}; 34};
28 35
29struct SpecialRegion { 36struct SpecialRegion {
@@ -57,6 +64,12 @@ struct PageTable {
57 * the corresponding entry in `pointers` MUST be set to null. 64 * the corresponding entry in `pointers` MUST be set to null.
58 */ 65 */
59 std::array<PageType, NUM_ENTRIES> attributes; 66 std::array<PageType, NUM_ENTRIES> attributes;
67
68 /**
69 * Indicates the number of externally cached resources touching a page that should be
70 * flushed before the memory is accessed
71 */
72 std::array<u8, NUM_ENTRIES> cached_res_count;
60}; 73};
61 74
62/// Singular page table used for the singleton process 75/// Singular page table used for the singleton process
@@ -72,8 +85,15 @@ static void MapPages(u32 base, u32 size, u8* memory, PageType type) {
72 while (base != end) { 85 while (base != end) {
73 ASSERT_MSG(base < PageTable::NUM_ENTRIES, "out of range mapping at %08X", base); 86 ASSERT_MSG(base < PageTable::NUM_ENTRIES, "out of range mapping at %08X", base);
74 87
88 // Since pages are unmapped on shutdown after video core is shutdown, the renderer may be null here
89 if (current_page_table->attributes[base] == PageType::RasterizerCachedMemory ||
90 current_page_table->attributes[base] == PageType::RasterizerCachedSpecial) {
91 RasterizerFlushAndInvalidateRegion(VirtualToPhysicalAddress(base << PAGE_BITS), PAGE_SIZE);
92 }
93
75 current_page_table->attributes[base] = type; 94 current_page_table->attributes[base] = type;
76 current_page_table->pointers[base] = memory; 95 current_page_table->pointers[base] = memory;
96 current_page_table->cached_res_count[base] = 0;
77 97
78 base += 1; 98 base += 1;
79 if (memory != nullptr) 99 if (memory != nullptr)
@@ -84,6 +104,7 @@ static void MapPages(u32 base, u32 size, u8* memory, PageType type) {
84void InitMemoryMap() { 104void InitMemoryMap() {
85 main_page_table.pointers.fill(nullptr); 105 main_page_table.pointers.fill(nullptr);
86 main_page_table.attributes.fill(PageType::Unmapped); 106 main_page_table.attributes.fill(PageType::Unmapped);
107 main_page_table.cached_res_count.fill(0);
87} 108}
88 109
89void MapMemoryRegion(VAddr base, u32 size, u8* target) { 110void MapMemoryRegion(VAddr base, u32 size, u8* target) {
@@ -107,6 +128,28 @@ void UnmapRegion(VAddr base, u32 size) {
107} 128}
108 129
109/** 130/**
131 * Gets a pointer to the exact memory at the virtual address (i.e. not page aligned)
132 * using a VMA from the current process
133 */
134static u8* GetPointerFromVMA(VAddr vaddr) {
135 u8* direct_pointer = nullptr;
136
137 auto& vma = Kernel::g_current_process->vm_manager.FindVMA(vaddr)->second;
138 switch (vma.type) {
139 case Kernel::VMAType::AllocatedMemoryBlock:
140 direct_pointer = vma.backing_block->data() + vma.offset;
141 break;
142 case Kernel::VMAType::BackingMemory:
143 direct_pointer = vma.backing_memory;
144 break;
145 default:
146 UNREACHABLE();
147 }
148
149 return direct_pointer + (vaddr - vma.base);
150}
151
152/**
110 * This function should only be called for virtual addreses with attribute `PageType::Special`. 153 * This function should only be called for virtual addreses with attribute `PageType::Special`.
111 */ 154 */
112static MMIORegionPointer GetMMIOHandler(VAddr vaddr) { 155static MMIORegionPointer GetMMIOHandler(VAddr vaddr) {
@@ -126,6 +169,7 @@ template <typename T>
126T Read(const VAddr vaddr) { 169T Read(const VAddr vaddr) {
127 const u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; 170 const u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS];
128 if (page_pointer) { 171 if (page_pointer) {
172 // NOTE: Avoid adding any extra logic to this fast-path block
129 T value; 173 T value;
130 std::memcpy(&value, &page_pointer[vaddr & PAGE_MASK], sizeof(T)); 174 std::memcpy(&value, &page_pointer[vaddr & PAGE_MASK], sizeof(T));
131 return value; 175 return value;
@@ -139,8 +183,22 @@ T Read(const VAddr vaddr) {
139 case PageType::Memory: 183 case PageType::Memory:
140 ASSERT_MSG(false, "Mapped memory page without a pointer @ %08X", vaddr); 184 ASSERT_MSG(false, "Mapped memory page without a pointer @ %08X", vaddr);
141 break; 185 break;
186 case PageType::RasterizerCachedMemory:
187 {
188 RasterizerFlushRegion(VirtualToPhysicalAddress(vaddr), sizeof(T));
189
190 T value;
191 std::memcpy(&value, GetPointerFromVMA(vaddr), sizeof(T));
192 return value;
193 }
142 case PageType::Special: 194 case PageType::Special:
143 return ReadMMIO<T>(GetMMIOHandler(vaddr), vaddr); 195 return ReadMMIO<T>(GetMMIOHandler(vaddr), vaddr);
196 case PageType::RasterizerCachedSpecial:
197 {
198 RasterizerFlushRegion(VirtualToPhysicalAddress(vaddr), sizeof(T));
199
200 return ReadMMIO<T>(GetMMIOHandler(vaddr), vaddr);
201 }
144 default: 202 default:
145 UNREACHABLE(); 203 UNREACHABLE();
146 } 204 }
@@ -153,6 +211,7 @@ template <typename T>
153void Write(const VAddr vaddr, const T data) { 211void Write(const VAddr vaddr, const T data) {
154 u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; 212 u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS];
155 if (page_pointer) { 213 if (page_pointer) {
214 // NOTE: Avoid adding any extra logic to this fast-path block
156 std::memcpy(&page_pointer[vaddr & PAGE_MASK], &data, sizeof(T)); 215 std::memcpy(&page_pointer[vaddr & PAGE_MASK], &data, sizeof(T));
157 return; 216 return;
158 } 217 }
@@ -165,9 +224,23 @@ void Write(const VAddr vaddr, const T data) {
165 case PageType::Memory: 224 case PageType::Memory:
166 ASSERT_MSG(false, "Mapped memory page without a pointer @ %08X", vaddr); 225 ASSERT_MSG(false, "Mapped memory page without a pointer @ %08X", vaddr);
167 break; 226 break;
227 case PageType::RasterizerCachedMemory:
228 {
229 RasterizerFlushAndInvalidateRegion(VirtualToPhysicalAddress(vaddr), sizeof(T));
230
231 std::memcpy(GetPointerFromVMA(vaddr), &data, sizeof(T));
232 break;
233 }
168 case PageType::Special: 234 case PageType::Special:
169 WriteMMIO<T>(GetMMIOHandler(vaddr), vaddr, data); 235 WriteMMIO<T>(GetMMIOHandler(vaddr), vaddr, data);
170 break; 236 break;
237 case PageType::RasterizerCachedSpecial:
238 {
239 RasterizerFlushAndInvalidateRegion(VirtualToPhysicalAddress(vaddr), sizeof(T));
240
241 WriteMMIO<T>(GetMMIOHandler(vaddr), vaddr, data);
242 break;
243 }
171 default: 244 default:
172 UNREACHABLE(); 245 UNREACHABLE();
173 } 246 }
@@ -179,6 +252,10 @@ u8* GetPointer(const VAddr vaddr) {
179 return page_pointer + (vaddr & PAGE_MASK); 252 return page_pointer + (vaddr & PAGE_MASK);
180 } 253 }
181 254
255 if (current_page_table->attributes[vaddr >> PAGE_BITS] == PageType::RasterizerCachedMemory) {
256 return GetPointerFromVMA(vaddr);
257 }
258
182 LOG_ERROR(HW_Memory, "unknown GetPointer @ 0x%08x", vaddr); 259 LOG_ERROR(HW_Memory, "unknown GetPointer @ 0x%08x", vaddr);
183 return nullptr; 260 return nullptr;
184} 261}
@@ -187,6 +264,69 @@ u8* GetPhysicalPointer(PAddr address) {
187 return GetPointer(PhysicalToVirtualAddress(address)); 264 return GetPointer(PhysicalToVirtualAddress(address));
188} 265}
189 266
267void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) {
268 if (start == 0) {
269 return;
270 }
271
272 u32 num_pages = ((start + size - 1) >> PAGE_BITS) - (start >> PAGE_BITS) + 1;
273 PAddr paddr = start;
274
275 for (unsigned i = 0; i < num_pages; ++i) {
276 VAddr vaddr = PhysicalToVirtualAddress(paddr);
277 u8& res_count = current_page_table->cached_res_count[vaddr >> PAGE_BITS];
278 ASSERT_MSG(count_delta <= UINT8_MAX - res_count, "Rasterizer resource cache counter overflow!");
279 ASSERT_MSG(count_delta >= -res_count, "Rasterizer resource cache counter underflow!");
280
281 // Switch page type to cached if now cached
282 if (res_count == 0) {
283 PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
284 switch (page_type) {
285 case PageType::Memory:
286 page_type = PageType::RasterizerCachedMemory;
287 current_page_table->pointers[vaddr >> PAGE_BITS] = nullptr;
288 break;
289 case PageType::Special:
290 page_type = PageType::RasterizerCachedSpecial;
291 break;
292 default:
293 UNREACHABLE();
294 }
295 }
296
297 res_count += count_delta;
298
299 // Switch page type to uncached if now uncached
300 if (res_count == 0) {
301 PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
302 switch (page_type) {
303 case PageType::RasterizerCachedMemory:
304 page_type = PageType::Memory;
305 current_page_table->pointers[vaddr >> PAGE_BITS] = GetPointerFromVMA(vaddr & ~PAGE_MASK);
306 break;
307 case PageType::RasterizerCachedSpecial:
308 page_type = PageType::Special;
309 break;
310 default:
311 UNREACHABLE();
312 }
313 }
314 paddr += PAGE_SIZE;
315 }
316}
317
318void RasterizerFlushRegion(PAddr start, u32 size) {
319 if (VideoCore::g_renderer != nullptr) {
320 VideoCore::g_renderer->Rasterizer()->FlushRegion(start, size);
321 }
322}
323
324void RasterizerFlushAndInvalidateRegion(PAddr start, u32 size) {
325 if (VideoCore::g_renderer != nullptr) {
326 VideoCore::g_renderer->Rasterizer()->FlushAndInvalidateRegion(start, size);
327 }
328}
329
190u8 Read8(const VAddr addr) { 330u8 Read8(const VAddr addr) {
191 return Read<u8>(addr); 331 return Read<u8>(addr);
192} 332}
diff --git a/src/core/memory.h b/src/core/memory.h
index 5af72b7a7..9caa3c3f5 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -148,4 +148,20 @@ VAddr PhysicalToVirtualAddress(PAddr addr);
148 */ 148 */
149u8* GetPhysicalPointer(PAddr address); 149u8* GetPhysicalPointer(PAddr address);
150 150
151/**
152 * Adds the supplied value to the rasterizer resource cache counter of each
153 * page touching the region.
154 */
155void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta);
156
157/**
158 * Flushes any externally cached rasterizer resources touching the given region.
159 */
160void RasterizerFlushRegion(PAddr start, u32 size);
161
162/**
163 * Flushes and invalidates any externally cached rasterizer resources touching the given region.
164 */
165void RasterizerFlushAndInvalidateRegion(PAddr start, u32 size);
166
151} 167}
diff --git a/src/core/settings.cpp b/src/core/settings.cpp
index 1aa26fbd2..77261eafe 100644
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -4,6 +4,8 @@
4 4
5#include "settings.h" 5#include "settings.h"
6 6
7#include "audio_core/audio_core.h"
8
7#include "core/gdbstub/gdbstub.h" 9#include "core/gdbstub/gdbstub.h"
8 10
9#include "video_core/video_core.h" 11#include "video_core/video_core.h"
@@ -19,6 +21,9 @@ void Apply() {
19 21
20 VideoCore::g_hw_renderer_enabled = values.use_hw_renderer; 22 VideoCore::g_hw_renderer_enabled = values.use_hw_renderer;
21 VideoCore::g_shader_jit_enabled = values.use_shader_jit; 23 VideoCore::g_shader_jit_enabled = values.use_shader_jit;
24 VideoCore::g_scaled_resolution_enabled = values.use_scaled_resolution;
25
26 AudioCore::SelectSink(values.sink_id);
22 27
23} 28}
24 29
diff --git a/src/core/settings.h b/src/core/settings.h
index 4933a516d..ce2a31164 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -6,7 +6,8 @@
6 6
7#include <string> 7#include <string>
8#include <array> 8#include <array>
9#include <common/file_util.h> 9
10#include "common/common_types.h"
10 11
11namespace Settings { 12namespace Settings {
12 13
@@ -55,6 +56,7 @@ struct Values {
55 // Renderer 56 // Renderer
56 bool use_hw_renderer; 57 bool use_hw_renderer;
57 bool use_shader_jit; 58 bool use_shader_jit;
59 bool use_scaled_resolution;
58 60
59 float bg_red; 61 float bg_red;
60 float bg_green; 62 float bg_green;
@@ -62,6 +64,9 @@ struct Values {
62 64
63 std::string log_filter; 65 std::string log_filter;
64 66
67 // Audio
68 std::string sink_id;
69
65 // Debugging 70 // Debugging
66 bool use_gdbstub; 71 bool use_gdbstub;
67 u16 gdbstub_port; 72 u16 gdbstub_port;
diff --git a/src/core/tracer/recorder.cpp b/src/core/tracer/recorder.cpp
index c6dc35c83..7abaacf70 100644
--- a/src/core/tracer/recorder.cpp
+++ b/src/core/tracer/recorder.cpp
@@ -26,17 +26,17 @@ void Recorder::Finish(const std::string& filename) {
26 // Calculate file offsets 26 // Calculate file offsets
27 auto& initial = header.initial_state_offsets; 27 auto& initial = header.initial_state_offsets;
28 28
29 initial.gpu_registers_size = initial_state.gpu_registers.size(); 29 initial.gpu_registers_size = static_cast<u32>(initial_state.gpu_registers.size());
30 initial.lcd_registers_size = initial_state.lcd_registers.size(); 30 initial.lcd_registers_size = static_cast<u32>(initial_state.lcd_registers.size());
31 initial.pica_registers_size = initial_state.pica_registers.size(); 31 initial.pica_registers_size = static_cast<u32>(initial_state.pica_registers.size());
32 initial.default_attributes_size = initial_state.default_attributes.size(); 32 initial.default_attributes_size = static_cast<u32>(initial_state.default_attributes.size());
33 initial.vs_program_binary_size = initial_state.vs_program_binary.size(); 33 initial.vs_program_binary_size = static_cast<u32>(initial_state.vs_program_binary.size());
34 initial.vs_swizzle_data_size = initial_state.vs_swizzle_data.size(); 34 initial.vs_swizzle_data_size = static_cast<u32>(initial_state.vs_swizzle_data.size());
35 initial.vs_float_uniforms_size = initial_state.vs_float_uniforms.size(); 35 initial.vs_float_uniforms_size = static_cast<u32>(initial_state.vs_float_uniforms.size());
36 initial.gs_program_binary_size = initial_state.gs_program_binary.size(); 36 initial.gs_program_binary_size = static_cast<u32>(initial_state.gs_program_binary.size());
37 initial.gs_swizzle_data_size = initial_state.gs_swizzle_data.size(); 37 initial.gs_swizzle_data_size = static_cast<u32>(initial_state.gs_swizzle_data.size());
38 initial.gs_float_uniforms_size = initial_state.gs_float_uniforms.size(); 38 initial.gs_float_uniforms_size = static_cast<u32>(initial_state.gs_float_uniforms.size());
39 header.stream_size = stream.size(); 39 header.stream_size = static_cast<u32>(stream.size());
40 40
41 initial.gpu_registers = sizeof(header); 41 initial.gpu_registers = sizeof(header);
42 initial.lcd_registers = initial.gpu_registers + initial.gpu_registers_size * sizeof(u32); 42 initial.lcd_registers = initial.gpu_registers + initial.gpu_registers_size * sizeof(u32);
@@ -68,7 +68,7 @@ void Recorder::Finish(const std::string& filename) {
68 DEBUG_ASSERT(stream_element.extra_data.size() == 0); 68 DEBUG_ASSERT(stream_element.extra_data.size() == 0);
69 break; 69 break;
70 } 70 }
71 header.stream_offset += stream_element.extra_data.size(); 71 header.stream_offset += static_cast<u32>(stream_element.extra_data.size());
72 } 72 }
73 73
74 try { 74 try {
diff --git a/src/core/tracer/recorder.h b/src/core/tracer/recorder.h
index a42ccc45f..febf883c8 100644
--- a/src/core/tracer/recorder.h
+++ b/src/core/tracer/recorder.h
@@ -4,6 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <string>
7#include <unordered_map> 8#include <unordered_map>
8#include <vector> 9#include <vector>
9 10
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 76cfd4f7d..581a37897 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -15,7 +15,7 @@ set(SRCS
15 shader/shader.cpp 15 shader/shader.cpp
16 shader/shader_interpreter.cpp 16 shader/shader_interpreter.cpp
17 swrasterizer.cpp 17 swrasterizer.cpp
18 utils.cpp 18 vertex_loader.cpp
19 video_core.cpp 19 video_core.cpp
20 ) 20 )
21 21
@@ -43,6 +43,7 @@ set(HEADERS
43 shader/shader_interpreter.h 43 shader/shader_interpreter.h
44 swrasterizer.h 44 swrasterizer.h
45 utils.h 45 utils.h
46 vertex_loader.h
46 video_core.h 47 video_core.h
47 ) 48 )
48 49
diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp
index 3d503486e..2bc747102 100644
--- a/src/video_core/clipper.cpp
+++ b/src/video_core/clipper.cpp
@@ -2,13 +2,24 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
6#include <array>
7#include <cstddef>
8
5#include <boost/container/static_vector.hpp> 9#include <boost/container/static_vector.hpp>
10#include <boost/container/vector.hpp>
11
12#include "common/bit_field.h"
13#include "common/common_types.h"
14#include "common/logging/log.h"
15#include "common/vector_math.h"
6 16
7#include "video_core/clipper.h" 17#include "video_core/clipper.h"
8#include "video_core/pica.h" 18#include "video_core/pica.h"
9#include "video_core/pica_state.h" 19#include "video_core/pica_state.h"
20#include "video_core/pica_types.h"
10#include "video_core/rasterizer.h" 21#include "video_core/rasterizer.h"
11#include "video_core/shader/shader_interpreter.h" 22#include "video_core/shader/shader.h"
12 23
13namespace Pica { 24namespace Pica {
14 25
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 3abe79c09..dd1379503 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -2,26 +2,32 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cmath> 5#include <array>
6#include <boost/range/algorithm/fill.hpp> 6#include <cstddef>
7#include <memory>
8#include <utility>
7 9
8#include "common/alignment.h" 10#include "common/assert.h"
11#include "common/logging/log.h"
9#include "common/microprofile.h" 12#include "common/microprofile.h"
10#include "common/profiler.h" 13#include "common/vector_math.h"
11 14
12#include "core/settings.h"
13#include "core/hle/service/gsp_gpu.h" 15#include "core/hle/service/gsp_gpu.h"
14#include "core/hw/gpu.h" 16#include "core/hw/gpu.h"
17#include "core/memory.h"
18#include "core/tracer/recorder.h"
15 19
16#include "video_core/clipper.h"
17#include "video_core/command_processor.h" 20#include "video_core/command_processor.h"
21#include "video_core/debug_utils/debug_utils.h"
18#include "video_core/pica.h" 22#include "video_core/pica.h"
19#include "video_core/pica_state.h" 23#include "video_core/pica_state.h"
24#include "video_core/pica_types.h"
20#include "video_core/primitive_assembly.h" 25#include "video_core/primitive_assembly.h"
26#include "video_core/rasterizer_interface.h"
21#include "video_core/renderer_base.h" 27#include "video_core/renderer_base.h"
28#include "video_core/shader/shader.h"
29#include "video_core/vertex_loader.h"
22#include "video_core/video_core.h" 30#include "video_core/video_core.h"
23#include "video_core/debug_utils/debug_utils.h"
24#include "video_core/shader/shader_interpreter.h"
25 31
26namespace Pica { 32namespace Pica {
27 33
@@ -35,8 +41,6 @@ static int default_attr_counter = 0;
35 41
36static u32 default_attr_write_buffer[3]; 42static u32 default_attr_write_buffer[3];
37 43
38Common::Profiling::TimingCategory category_drawing("Drawing");
39
40// Expand a 4-bit mask to 4-byte mask, e.g. 0b0101 -> 0x00FF00FF 44// Expand a 4-bit mask to 4-byte mask, e.g. 0b0101 -> 0x00FF00FF
41static const u32 expand_bits_to_bytes[] = { 45static const u32 expand_bits_to_bytes[] = {
42 0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff, 46 0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff,
@@ -142,10 +146,9 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
142 Shader::UnitState<false> shader_unit; 146 Shader::UnitState<false> shader_unit;
143 Shader::Setup(); 147 Shader::Setup();
144 148
145 if (g_debug_context)
146 g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, static_cast<void*>(&immediate_input));
147
148 // Send to vertex shader 149 // Send to vertex shader
150 if (g_debug_context)
151 g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, static_cast<void*>(&immediate_input));
149 Shader::OutputVertex output = Shader::Run(shader_unit, immediate_input, regs.vs.num_input_attributes+1); 152 Shader::OutputVertex output = Shader::Run(shader_unit, immediate_input, regs.vs.num_input_attributes+1);
150 153
151 // Send to renderer 154 // Send to renderer
@@ -186,60 +189,19 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
186 case PICA_REG_INDEX(trigger_draw): 189 case PICA_REG_INDEX(trigger_draw):
187 case PICA_REG_INDEX(trigger_draw_indexed): 190 case PICA_REG_INDEX(trigger_draw_indexed):
188 { 191 {
189 Common::Profiling::ScopeTimer scope_timer(category_drawing);
190 MICROPROFILE_SCOPE(GPU_Drawing); 192 MICROPROFILE_SCOPE(GPU_Drawing);
191 193
192#if PICA_LOG_TEV 194#if PICA_LOG_TEV
193 DebugUtils::DumpTevStageConfig(regs.GetTevStages()); 195 DebugUtils::DumpTevStageConfig(regs.GetTevStages());
194#endif 196#endif
195
196 if (g_debug_context) 197 if (g_debug_context)
197 g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr); 198 g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr);
198 199
199 const auto& attribute_config = regs.vertex_attributes; 200 // Processes information about internal vertex attributes to figure out how a vertex is loaded.
200 const u32 base_address = attribute_config.GetPhysicalBaseAddress(); 201 // Later, these can be compiled and cached.
201 202 VertexLoader loader;
202 // Information about internal vertex attributes 203 const u32 base_address = regs.vertex_attributes.GetPhysicalBaseAddress();
203 u32 vertex_attribute_sources[16]; 204 loader.Setup(regs);
204 boost::fill(vertex_attribute_sources, 0xdeadbeef);
205 u32 vertex_attribute_strides[16] = {};
206 Regs::VertexAttributeFormat vertex_attribute_formats[16] = {};
207
208 u32 vertex_attribute_elements[16] = {};
209 u32 vertex_attribute_element_size[16] = {};
210
211 // Setup attribute data from loaders
212 for (int loader = 0; loader < 12; ++loader) {
213 const auto& loader_config = attribute_config.attribute_loaders[loader];
214
215 u32 offset = 0;
216
217 // TODO: What happens if a loader overwrites a previous one's data?
218 for (unsigned component = 0; component < loader_config.component_count; ++component) {
219 if (component >= 12) {
220 LOG_ERROR(HW_GPU, "Overflow in the vertex attribute loader %u trying to load component %u", loader, component);
221 continue;
222 }
223
224 u32 attribute_index = loader_config.GetComponent(component);
225 if (attribute_index < 12) {
226 int element_size = attribute_config.GetElementSizeInBytes(attribute_index);
227 offset = Common::AlignUp(offset, element_size);
228 vertex_attribute_sources[attribute_index] = base_address + loader_config.data_offset + offset;
229 vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count);
230 vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index);
231 vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index);
232 vertex_attribute_element_size[attribute_index] = element_size;
233 offset += attribute_config.GetStride(attribute_index);
234 } else if (attribute_index < 16) {
235 // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively
236 offset = Common::AlignUp(offset, 4);
237 offset += (attribute_index - 11) * 4;
238 } else {
239 UNREACHABLE(); // This is truly unreachable due to the number of bits for each component
240 }
241 }
242 }
243 205
244 // Load vertices 206 // Load vertices
245 bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed)); 207 bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed));
@@ -263,32 +225,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
263 } 225 }
264 } 226 }
265 227
266 class { 228 DebugUtils::MemoryAccessTracker memory_accesses;
267 /// Combine overlapping and close ranges
268 void SimplifyRanges() {
269 for (auto it = ranges.begin(); it != ranges.end(); ++it) {
270 // NOTE: We add 32 to the range end address to make sure "close" ranges are combined, too
271 auto it2 = std::next(it);
272 while (it2 != ranges.end() && it->first + it->second + 32 >= it2->first) {
273 it->second = std::max(it->second, it2->first + it2->second - it->first);
274 it2 = ranges.erase(it2);
275 }
276 }
277 }
278
279 public:
280 /// Record a particular memory access in the list
281 void AddAccess(u32 paddr, u32 size) {
282 // Create new range or extend existing one
283 ranges[paddr] = std::max(ranges[paddr], size);
284
285 // Simplify ranges...
286 SimplifyRanges();
287 }
288
289 /// Map of accessed ranges (mapping start address to range size)
290 std::map<u32, u32> ranges;
291 } memory_accesses;
292 229
293 // Simple circular-replacement vertex cache 230 // Simple circular-replacement vertex cache
294 // The size has been tuned for optimal balance between hit-rate and the cost of lookup 231 // The size has been tuned for optimal balance between hit-rate and the cost of lookup
@@ -332,60 +269,12 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
332 if (!vertex_cache_hit) { 269 if (!vertex_cache_hit) {
333 // Initialize data for the current vertex 270 // Initialize data for the current vertex
334 Shader::InputVertex input; 271 Shader::InputVertex input;
335 272 loader.LoadVertex(base_address, index, vertex, input, memory_accesses);
336 for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) {
337 if (vertex_attribute_elements[i] != 0) {
338 // Default attribute values set if array elements have < 4 components. This
339 // is *not* carried over from the default attribute settings even if they're
340 // enabled for this attribute.
341 static const float24 zero = float24::FromFloat32(0.0f);
342 static const float24 one = float24::FromFloat32(1.0f);
343 input.attr[i] = Math::Vec4<float24>(zero, zero, zero, one);
344
345 // Load per-vertex data from the loader arrays
346 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
347 u32 source_addr = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i];
348 const u8* srcdata = Memory::GetPhysicalPointer(source_addr);
349
350 if (g_debug_context && Pica::g_debug_context->recorder) {
351 memory_accesses.AddAccess(source_addr,
352 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4
353 : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1);
354 }
355
356 const float srcval =
357 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE) ? *reinterpret_cast<const s8*>(srcdata) :
358 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? *reinterpret_cast<const u8*>(srcdata) :
359 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? *reinterpret_cast<const s16*>(srcdata) :
360 *reinterpret_cast<const float*>(srcdata);
361
362 input.attr[i][comp] = float24::FromFloat32(srcval);
363 LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f",
364 comp, i, vertex, index,
365 attribute_config.GetPhysicalBaseAddress(),
366 vertex_attribute_sources[i] - base_address,
367 vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i],
368 input.attr[i][comp].ToFloat32());
369 }
370 } else if (attribute_config.IsDefaultAttribute(i)) {
371 // Load the default attribute if we're configured to do so
372 input.attr[i] = g_state.vs.default_attributes[i];
373 LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)",
374 i, vertex, index,
375 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(),
376 input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
377 } else {
378 // TODO(yuriks): In this case, no data gets loaded and the vertex
379 // remains with the last value it had. This isn't currently maintained
380 // as global state, however, and so won't work in Citra yet.
381 }
382 }
383
384 if (g_debug_context)
385 g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input);
386 273
387 // Send to vertex shader 274 // Send to vertex shader
388 output = Shader::Run(shader_unit, input, attribute_config.GetNumTotalAttributes()); 275 if (g_debug_context)
276 g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, (void*)&input);
277 output = Shader::Run(shader_unit, input, loader.GetNumTotalAttributes());
389 278
390 if (is_indexed) { 279 if (is_indexed) {
391 vertex_cache[vertex_cache_pos] = output; 280 vertex_cache[vertex_cache_pos] = output;
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index c3a9c9598..2f645b441 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -4,35 +4,41 @@
4 4
5#include <algorithm> 5#include <algorithm>
6#include <condition_variable> 6#include <condition_variable>
7#include <cstdint>
7#include <cstring> 8#include <cstring>
8#include <fstream> 9#include <fstream>
9#include <list>
10#include <map> 10#include <map>
11#include <mutex> 11#include <mutex>
12#include <stdexcept>
12#include <string> 13#include <string>
13 14
14#ifdef HAVE_PNG 15#ifdef HAVE_PNG
15#include <png.h> 16#include <png.h>
17#include <setjmp.h>
16#endif 18#endif
17 19
20#include <nihstro/bit_field.h>
18#include <nihstro/float24.h> 21#include <nihstro/float24.h>
19#include <nihstro/shader_binary.h> 22#include <nihstro/shader_binary.h>
20 23
21#include "common/assert.h" 24#include "common/assert.h"
25#include "common/bit_field.h"
22#include "common/color.h" 26#include "common/color.h"
23#include "common/common_types.h" 27#include "common/common_types.h"
24#include "common/file_util.h" 28#include "common/file_util.h"
29#include "common/logging/log.h"
25#include "common/math_util.h" 30#include "common/math_util.h"
26#include "common/vector_math.h" 31#include "common/vector_math.h"
27 32
28#include "core/settings.h" 33#include "video_core/debug_utils/debug_utils.h"
29
30#include "video_core/pica.h" 34#include "video_core/pica.h"
31#include "video_core/pica_state.h" 35#include "video_core/pica_state.h"
36#include "video_core/pica_types.h"
37#include "video_core/rasterizer_interface.h"
32#include "video_core/renderer_base.h" 38#include "video_core/renderer_base.h"
39#include "video_core/shader/shader.h"
33#include "video_core/utils.h" 40#include "video_core/utils.h"
34#include "video_core/video_core.h" 41#include "video_core/video_core.h"
35#include "video_core/debug_utils/debug_utils.h"
36 42
37using nihstro::DVLBHeader; 43using nihstro::DVLBHeader;
38using nihstro::DVLEHeader; 44using nihstro::DVLEHeader;
@@ -40,15 +46,12 @@ using nihstro::DVLPHeader;
40 46
41namespace Pica { 47namespace Pica {
42 48
43void DebugContext::OnEvent(Event event, void* data) { 49void DebugContext::DoOnEvent(Event event, void* data) {
44 if (!breakpoints[event].enabled)
45 return;
46
47 { 50 {
48 std::unique_lock<std::mutex> lock(breakpoint_mutex); 51 std::unique_lock<std::mutex> lock(breakpoint_mutex);
49 52
50 // Commit the hardware renderer's framebuffer so it will show on debug widgets 53 // Commit the rasterizer's caches so framebuffers, render targets, etc. will show on debug widgets
51 VideoCore::g_renderer->Rasterizer()->FlushFramebuffer(); 54 VideoCore::g_renderer->Rasterizer()->FlushAll();
52 55
53 // TODO: Should stop the CPU thread here once we multithread emulation. 56 // TODO: Should stop the CPU thread here once we multithread emulation.
54 57
@@ -205,11 +208,12 @@ void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, c
205 208
206 // TODO: Reduce the amount of binary code written to relevant portions 209 // TODO: Reduce the amount of binary code written to relevant portions
207 dvlp.binary_offset = write_offset - dvlp_offset; 210 dvlp.binary_offset = write_offset - dvlp_offset;
208 dvlp.binary_size_words = setup.program_code.size(); 211 dvlp.binary_size_words = static_cast<uint32_t>(setup.program_code.size());
209 QueueForWriting(reinterpret_cast<const u8*>(setup.program_code.data()), setup.program_code.size() * sizeof(u32)); 212 QueueForWriting(reinterpret_cast<const u8*>(setup.program_code.data()),
213 static_cast<u32>(setup.program_code.size()) * sizeof(u32));
210 214
211 dvlp.swizzle_info_offset = write_offset - dvlp_offset; 215 dvlp.swizzle_info_offset = write_offset - dvlp_offset;
212 dvlp.swizzle_info_num_entries = setup.swizzle_data.size(); 216 dvlp.swizzle_info_num_entries = static_cast<uint32_t>(setup.swizzle_data.size());
213 u32 dummy = 0; 217 u32 dummy = 0;
214 for (unsigned int i = 0; i < setup.swizzle_data.size(); ++i) { 218 for (unsigned int i = 0; i < setup.swizzle_data.size(); ++i) {
215 QueueForWriting(reinterpret_cast<const u8*>(&setup.swizzle_data[i]), sizeof(setup.swizzle_data[i])); 219 QueueForWriting(reinterpret_cast<const u8*>(&setup.swizzle_data[i]), sizeof(setup.swizzle_data[i]));
@@ -261,7 +265,7 @@ void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, c
261 constant_table.emplace_back(constant); 265 constant_table.emplace_back(constant);
262 } 266 }
263 dvle.constant_table_offset = write_offset - dvlb.dvle_offset; 267 dvle.constant_table_offset = write_offset - dvlb.dvle_offset;
264 dvle.constant_table_size = constant_table.size(); 268 dvle.constant_table_size = static_cast<uint32_t>(constant_table.size());
265 for (const auto& constant : constant_table) { 269 for (const auto& constant : constant_table) {
266 QueueForWriting(reinterpret_cast<const u8*>(&constant), sizeof(constant)); 270 QueueForWriting(reinterpret_cast<const u8*>(&constant), sizeof(constant));
267 } 271 }
diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h
index 7df941619..f628292a4 100644
--- a/src/video_core/debug_utils/debug_utils.h
+++ b/src/video_core/debug_utils/debug_utils.h
@@ -4,23 +4,33 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <algorithm>
7#include <array> 8#include <array>
8#include <condition_variable> 9#include <condition_variable>
10#include <iterator>
9#include <list> 11#include <list>
10#include <map> 12#include <map>
11#include <memory> 13#include <memory>
12#include <mutex> 14#include <mutex>
15#include <string>
16#include <utility>
13#include <vector> 17#include <vector>
14 18
19#include "common/common_types.h"
15#include "common/vector_math.h" 20#include "common/vector_math.h"
16 21
17#include "core/tracer/recorder.h"
18
19#include "video_core/pica.h" 22#include "video_core/pica.h"
20#include "video_core/shader/shader.h" 23
24namespace CiTrace {
25class Recorder;
26}
21 27
22namespace Pica { 28namespace Pica {
23 29
30namespace Shader {
31struct ShaderSetup;
32}
33
24class DebugContext { 34class DebugContext {
25public: 35public:
26 enum class Event { 36 enum class Event {
@@ -30,7 +40,7 @@ public:
30 PicaCommandProcessed, 40 PicaCommandProcessed,
31 IncomingPrimitiveBatch, 41 IncomingPrimitiveBatch,
32 FinishedPrimitiveBatch, 42 FinishedPrimitiveBatch,
33 VertexLoaded, 43 VertexShaderInvocation,
34 IncomingDisplayTransfer, 44 IncomingDisplayTransfer,
35 GSPCommandProcessed, 45 GSPCommandProcessed,
36 BufferSwapped, 46 BufferSwapped,
@@ -114,7 +124,15 @@ public:
114 * @param event Event which has happened 124 * @param event Event which has happened
115 * @param data Optional data pointer (pass nullptr if unused). Needs to remain valid until Resume() is called. 125 * @param data Optional data pointer (pass nullptr if unused). Needs to remain valid until Resume() is called.
116 */ 126 */
117 void OnEvent(Event event, void* data); 127 void OnEvent(Event event, void* data) {
128 // This check is left in the header to allow the compiler to inline it.
129 if (!breakpoints[(int)event].enabled)
130 return;
131 // For the rest of event handling, call a separate function.
132 DoOnEvent(event, data);
133 }
134
135 void DoOnEvent(Event event, void *data);
118 136
119 /** 137 /**
120 * Resume from the current breakpoint. 138 * Resume from the current breakpoint.
@@ -126,12 +144,14 @@ public:
126 * Delete all set breakpoints and resume emulation. 144 * Delete all set breakpoints and resume emulation.
127 */ 145 */
128 void ClearBreakpoints() { 146 void ClearBreakpoints() {
129 breakpoints.clear(); 147 for (auto &bp : breakpoints) {
148 bp.enabled = false;
149 }
130 Resume(); 150 Resume();
131 } 151 }
132 152
133 // TODO: Evaluate if access to these members should be hidden behind a public interface. 153 // TODO: Evaluate if access to these members should be hidden behind a public interface.
134 std::map<Event, BreakPoint> breakpoints; 154 std::array<BreakPoint, (int)Event::NumEvents> breakpoints;
135 Event active_breakpoint; 155 Event active_breakpoint;
136 bool at_breakpoint = false; 156 bool at_breakpoint = false;
137 157
@@ -206,6 +226,36 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data);
206 226
207void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages); 227void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages);
208 228
229/**
230 * Used in the vertex loader to merge access records. TODO: Investigate if actually useful.
231 */
232class MemoryAccessTracker {
233 /// Combine overlapping and close ranges
234 void SimplifyRanges() {
235 for (auto it = ranges.begin(); it != ranges.end(); ++it) {
236 // NOTE: We add 32 to the range end address to make sure "close" ranges are combined, too
237 auto it2 = std::next(it);
238 while (it2 != ranges.end() && it->first + it->second + 32 >= it2->first) {
239 it->second = std::max(it->second, it2->first + it2->second - it->first);
240 it2 = ranges.erase(it2);
241 }
242 }
243 }
244
245public:
246 /// Record a particular memory access in the list
247 void AddAccess(u32 paddr, u32 size) {
248 // Create new range or extend existing one
249 ranges[paddr] = std::max(ranges[paddr], size);
250
251 // Simplify ranges...
252 SimplifyRanges();
253 }
254
255 /// Map of accessed ranges (mapping start address to range size)
256 std::map<u32, u32> ranges;
257};
258
209} // namespace 259} // namespace
210 260
211} // namespace 261} // namespace
diff --git a/src/video_core/pica.cpp b/src/video_core/pica.cpp
index ccbaf071b..be82cf4b5 100644
--- a/src/video_core/pica.cpp
+++ b/src/video_core/pica.cpp
@@ -3,10 +3,13 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cstring> 5#include <cstring>
6#include <iterator>
6#include <unordered_map> 7#include <unordered_map>
8#include <utility>
7 9
8#include "video_core/pica.h" 10#include "video_core/pica.h"
9#include "video_core/pica_state.h" 11#include "video_core/pica_state.h"
12#include "video_core/primitive_assembly.h"
10#include "video_core/shader/shader.h" 13#include "video_core/shader/shader.h"
11 14
12namespace Pica { 15namespace Pica {
@@ -480,7 +483,7 @@ std::string Regs::GetCommandName(int index) {
480 static std::unordered_map<u32, const char*> map; 483 static std::unordered_map<u32, const char*> map;
481 484
482 if (map.empty()) { 485 if (map.empty()) {
483 map.insert(begin(register_names), end(register_names)); 486 map.insert(std::begin(register_names), std::end(register_names));
484 } 487 }
485 488
486 // Return empty string if no match is found 489 // Return empty string if no match is found
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 4552ff81c..5891fb72a 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -5,10 +5,13 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <cmath>
9#include <cstddef> 8#include <cstddef>
10#include <string> 9#include <string>
11 10
11#ifndef _MSC_VER
12#include <type_traits> // for std::enable_if
13#endif
14
12#include "common/assert.h" 15#include "common/assert.h"
13#include "common/bit_field.h" 16#include "common/bit_field.h"
14#include "common/common_funcs.h" 17#include "common/common_funcs.h"
@@ -16,8 +19,6 @@
16#include "common/vector_math.h" 19#include "common/vector_math.h"
17#include "common/logging/log.h" 20#include "common/logging/log.h"
18 21
19#include "pica_types.h"
20
21namespace Pica { 22namespace Pica {
22 23
23// Returns index corresponding to the Regs member labeled by field_name 24// Returns index corresponding to the Regs member labeled by field_name
@@ -577,7 +578,7 @@ struct Regs {
577 } 578 }
578 } 579 }
579 580
580 struct { 581 struct FramebufferConfig {
581 INSERT_PADDING_WORDS(0x3); 582 INSERT_PADDING_WORDS(0x3);
582 583
583 union { 584 union {
@@ -747,8 +748,13 @@ struct Regs {
747 case LightingSampler::ReflectGreen: 748 case LightingSampler::ReflectGreen:
748 case LightingSampler::ReflectBlue: 749 case LightingSampler::ReflectBlue:
749 return (config == LightingConfig::Config4) || (config == LightingConfig::Config5) || (config == LightingConfig::Config7); 750 return (config == LightingConfig::Config4) || (config == LightingConfig::Config5) || (config == LightingConfig::Config7);
751 default:
752 UNREACHABLE_MSG("Regs::IsLightingSamplerSupported: Reached "
753 "unreachable section, sampler should be one "
754 "of Distribution0, Distribution1, Fresnel, "
755 "ReflectRed, ReflectGreen or ReflectBlue, instead "
756 "got %i", static_cast<int>(config));
750 } 757 }
751 return false;
752 } 758 }
753 759
754 struct { 760 struct {
diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h
index 323290054..1059c6ae4 100644
--- a/src/video_core/pica_state.h
+++ b/src/video_core/pica_state.h
@@ -4,6 +4,11 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8
9#include "common/bit_field.h"
10#include "common/common_types.h"
11
7#include "video_core/pica.h" 12#include "video_core/pica.h"
8#include "video_core/primitive_assembly.h" 13#include "video_core/primitive_assembly.h"
9#include "video_core/shader/shader.h" 14#include "video_core/shader/shader.h"
@@ -51,7 +56,7 @@ struct State {
51 // Used to buffer partial vertices for immediate-mode rendering. 56 // Used to buffer partial vertices for immediate-mode rendering.
52 Shader::InputVertex input_vertex; 57 Shader::InputVertex input_vertex;
53 // Index of the next attribute to be loaded into `input_vertex`. 58 // Index of the next attribute to be loaded into `input_vertex`.
54 int current_attribute = 0; 59 u32 current_attribute = 0;
55 } immediate; 60 } immediate;
56 61
57 // This is constructed with a dummy triangle topology 62 // This is constructed with a dummy triangle topology
diff --git a/src/video_core/pica_types.h b/src/video_core/pica_types.h
index ecf45654b..3b7bfbdca 100644
--- a/src/video_core/pica_types.h
+++ b/src/video_core/pica_types.h
@@ -4,6 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <cmath>
7#include <cstring> 8#include <cstring>
8 9
9#include "common/common_types.h" 10#include "common/common_types.h"
diff --git a/src/video_core/primitive_assembly.cpp b/src/video_core/primitive_assembly.cpp
index ff3e2b862..68ea3c08a 100644
--- a/src/video_core/primitive_assembly.cpp
+++ b/src/video_core/primitive_assembly.cpp
@@ -6,8 +6,7 @@
6 6
7#include "video_core/pica.h" 7#include "video_core/pica.h"
8#include "video_core/primitive_assembly.h" 8#include "video_core/primitive_assembly.h"
9#include "video_core/debug_utils/debug_utils.h" 9#include "video_core/shader/shader.h"
10#include "video_core/shader/shader_interpreter.h"
11 10
12namespace Pica { 11namespace Pica {
13 12
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 5b9ed7c64..df67b9081 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -3,23 +3,28 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include <algorithm>
6#include <array>
6#include <cmath> 7#include <cmath>
7 8
9#include "common/assert.h"
10#include "common/bit_field.h"
8#include "common/color.h" 11#include "common/color.h"
9#include "common/common_types.h" 12#include "common/common_types.h"
13#include "common/logging/log.h"
10#include "common/math_util.h" 14#include "common/math_util.h"
11#include "common/microprofile.h" 15#include "common/microprofile.h"
12#include "common/profiler.h" 16#include "common/vector_math.h"
13 17
14#include "core/memory.h" 18#include "core/memory.h"
15#include "core/hw/gpu.h" 19#include "core/hw/gpu.h"
16 20
21#include "video_core/debug_utils/debug_utils.h"
17#include "video_core/pica.h" 22#include "video_core/pica.h"
18#include "video_core/pica_state.h" 23#include "video_core/pica_state.h"
24#include "video_core/pica_types.h"
19#include "video_core/rasterizer.h" 25#include "video_core/rasterizer.h"
20#include "video_core/utils.h" 26#include "video_core/utils.h"
21#include "video_core/debug_utils/debug_utils.h" 27#include "video_core/shader/shader.h"
22#include "video_core/shader/shader_interpreter.h"
23 28
24namespace Pica { 29namespace Pica {
25 30
@@ -287,7 +292,6 @@ static int SignedArea (const Math::Vec2<Fix12P4>& vtx1,
287 return Math::Cross(vec1, vec2).z; 292 return Math::Cross(vec1, vec2).z;
288}; 293};
289 294
290static Common::Profiling::TimingCategory rasterization_category("Rasterization");
291MICROPROFILE_DEFINE(GPU_Rasterization, "GPU", "Rasterization", MP_RGB(50, 50, 240)); 295MICROPROFILE_DEFINE(GPU_Rasterization, "GPU", "Rasterization", MP_RGB(50, 50, 240));
292 296
293/** 297/**
@@ -300,7 +304,6 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
300 bool reversed = false) 304 bool reversed = false)
301{ 305{
302 const auto& regs = g_state.regs; 306 const auto& regs = g_state.regs;
303 Common::Profiling::ScopeTimer timer(rasterization_category);
304 MICROPROFILE_SCOPE(GPU_Rasterization); 307 MICROPROFILE_SCOPE(GPU_Rasterization);
305 308
306 // vertex positions in rasterizer coordinates 309 // vertex positions in rasterizer coordinates
@@ -923,92 +926,72 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
923 if (output_merger.alphablend_enable) { 926 if (output_merger.alphablend_enable) {
924 auto params = output_merger.alpha_blending; 927 auto params = output_merger.alpha_blending;
925 928
926 auto LookupFactorRGB = [&](Regs::BlendFactor factor) -> Math::Vec3<u8> { 929 auto LookupFactor = [&](unsigned channel, Regs::BlendFactor factor) -> u8 {
930 DEBUG_ASSERT(channel < 4);
931
932 const Math::Vec4<u8> blend_const = {
933 static_cast<u8>(output_merger.blend_const.r),
934 static_cast<u8>(output_merger.blend_const.g),
935 static_cast<u8>(output_merger.blend_const.b),
936 static_cast<u8>(output_merger.blend_const.a)
937 };
938
927 switch (factor) { 939 switch (factor) {
928 case Regs::BlendFactor::Zero : 940 case Regs::BlendFactor::Zero:
929 return Math::Vec3<u8>(0, 0, 0); 941 return 0;
930 942
931 case Regs::BlendFactor::One : 943 case Regs::BlendFactor::One:
932 return Math::Vec3<u8>(255, 255, 255); 944 return 255;
933 945
934 case Regs::BlendFactor::SourceColor: 946 case Regs::BlendFactor::SourceColor:
935 return combiner_output.rgb(); 947 return combiner_output[channel];
936 948
937 case Regs::BlendFactor::OneMinusSourceColor: 949 case Regs::BlendFactor::OneMinusSourceColor:
938 return Math::Vec3<u8>(255 - combiner_output.r(), 255 - combiner_output.g(), 255 - combiner_output.b()); 950 return 255 - combiner_output[channel];
939 951
940 case Regs::BlendFactor::DestColor: 952 case Regs::BlendFactor::DestColor:
941 return dest.rgb(); 953 return dest[channel];
942 954
943 case Regs::BlendFactor::OneMinusDestColor: 955 case Regs::BlendFactor::OneMinusDestColor:
944 return Math::Vec3<u8>(255 - dest.r(), 255 - dest.g(), 255 - dest.b()); 956 return 255 - dest[channel];
945 957
946 case Regs::BlendFactor::SourceAlpha: 958 case Regs::BlendFactor::SourceAlpha:
947 return Math::Vec3<u8>(combiner_output.a(), combiner_output.a(), combiner_output.a()); 959 return combiner_output.a();
948 960
949 case Regs::BlendFactor::OneMinusSourceAlpha: 961 case Regs::BlendFactor::OneMinusSourceAlpha:
950 return Math::Vec3<u8>(255 - combiner_output.a(), 255 - combiner_output.a(), 255 - combiner_output.a()); 962 return 255 - combiner_output.a();
951 963
952 case Regs::BlendFactor::DestAlpha: 964 case Regs::BlendFactor::DestAlpha:
953 return Math::Vec3<u8>(dest.a(), dest.a(), dest.a()); 965 return dest.a();
954 966
955 case Regs::BlendFactor::OneMinusDestAlpha: 967 case Regs::BlendFactor::OneMinusDestAlpha:
956 return Math::Vec3<u8>(255 - dest.a(), 255 - dest.a(), 255 - dest.a()); 968 return 255 - dest.a();
957 969
958 case Regs::BlendFactor::ConstantColor: 970 case Regs::BlendFactor::ConstantColor:
959 return Math::Vec3<u8>(output_merger.blend_const.r, output_merger.blend_const.g, output_merger.blend_const.b); 971 return blend_const[channel];
960 972
961 case Regs::BlendFactor::OneMinusConstantColor: 973 case Regs::BlendFactor::OneMinusConstantColor:
962 return Math::Vec3<u8>(255 - output_merger.blend_const.r, 255 - output_merger.blend_const.g, 255 - output_merger.blend_const.b); 974 return 255 - blend_const[channel];
963 975
964 case Regs::BlendFactor::ConstantAlpha: 976 case Regs::BlendFactor::ConstantAlpha:
965 return Math::Vec3<u8>(output_merger.blend_const.a, output_merger.blend_const.a, output_merger.blend_const.a); 977 return blend_const.a();
966 978
967 case Regs::BlendFactor::OneMinusConstantAlpha: 979 case Regs::BlendFactor::OneMinusConstantAlpha:
968 return Math::Vec3<u8>(255 - output_merger.blend_const.a, 255 - output_merger.blend_const.a, 255 - output_merger.blend_const.a); 980 return 255 - blend_const.a();
969
970 default:
971 LOG_CRITICAL(HW_GPU, "Unknown color blend factor %x", factor);
972 UNIMPLEMENTED();
973 break;
974 }
975
976 return {};
977 };
978
979 auto LookupFactorA = [&](Regs::BlendFactor factor) -> u8 {
980 switch (factor) {
981 case Regs::BlendFactor::Zero:
982 return 0;
983 981
984 case Regs::BlendFactor::One: 982 case Regs::BlendFactor::SourceAlphaSaturate:
985 return 255; 983 // Returns 1.0 for the alpha channel
986 984 if (channel == 3)
987 case Regs::BlendFactor::SourceAlpha: 985 return 255;
988 return combiner_output.a(); 986 return std::min(combiner_output.a(), static_cast<u8>(255 - dest.a()));
989
990 case Regs::BlendFactor::OneMinusSourceAlpha:
991 return 255 - combiner_output.a();
992
993 case Regs::BlendFactor::DestAlpha:
994 return dest.a();
995
996 case Regs::BlendFactor::OneMinusDestAlpha:
997 return 255 - dest.a();
998
999 case Regs::BlendFactor::ConstantAlpha:
1000 return output_merger.blend_const.a;
1001
1002 case Regs::BlendFactor::OneMinusConstantAlpha:
1003 return 255 - output_merger.blend_const.a;
1004 987
1005 default: 988 default:
1006 LOG_CRITICAL(HW_GPU, "Unknown alpha blend factor %x", factor); 989 LOG_CRITICAL(HW_GPU, "Unknown blend factor %x", factor);
1007 UNIMPLEMENTED(); 990 UNIMPLEMENTED();
1008 break; 991 break;
1009 } 992 }
1010 993
1011 return {}; 994 return combiner_output[channel];
1012 }; 995 };
1013 996
1014 static auto EvaluateBlendEquation = [](const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor, 997 static auto EvaluateBlendEquation = [](const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor,
@@ -1060,10 +1043,15 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
1060 MathUtil::Clamp(result.a(), 0, 255)); 1043 MathUtil::Clamp(result.a(), 0, 255));
1061 }; 1044 };
1062 1045
1063 auto srcfactor = Math::MakeVec(LookupFactorRGB(params.factor_source_rgb), 1046 auto srcfactor = Math::MakeVec(LookupFactor(0, params.factor_source_rgb),
1064 LookupFactorA(params.factor_source_a)); 1047 LookupFactor(1, params.factor_source_rgb),
1065 auto dstfactor = Math::MakeVec(LookupFactorRGB(params.factor_dest_rgb), 1048 LookupFactor(2, params.factor_source_rgb),
1066 LookupFactorA(params.factor_dest_a)); 1049 LookupFactor(3, params.factor_source_a));
1050
1051 auto dstfactor = Math::MakeVec(LookupFactor(0, params.factor_dest_rgb),
1052 LookupFactor(1, params.factor_dest_rgb),
1053 LookupFactor(2, params.factor_dest_rgb),
1054 LookupFactor(3, params.factor_dest_a));
1067 1055
1068 blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_rgb); 1056 blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_rgb);
1069 blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_a).a(); 1057 blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_a).a();
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 008c5827b..bf7101665 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -6,6 +6,10 @@
6 6
7#include "common/common_types.h" 7#include "common/common_types.h"
8 8
9#include "core/hw/gpu.h"
10
11struct ScreenInfo;
12
9namespace Pica { 13namespace Pica {
10namespace Shader { 14namespace Shader {
11struct OutputVertex; 15struct OutputVertex;
@@ -18,12 +22,6 @@ class RasterizerInterface {
18public: 22public:
19 virtual ~RasterizerInterface() {} 23 virtual ~RasterizerInterface() {}
20 24
21 /// Initialize API-specific GPU objects
22 virtual void InitObjects() = 0;
23
24 /// Reset the rasterizer, such as flushing all caches and updating all state
25 virtual void Reset() = 0;
26
27 /// Queues the primitive formed by the given vertices for rendering 25 /// Queues the primitive formed by the given vertices for rendering
28 virtual void AddTriangle(const Pica::Shader::OutputVertex& v0, 26 virtual void AddTriangle(const Pica::Shader::OutputVertex& v0,
29 const Pica::Shader::OutputVertex& v1, 27 const Pica::Shader::OutputVertex& v1,
@@ -32,17 +30,26 @@ public:
32 /// Draw the current batch of triangles 30 /// Draw the current batch of triangles
33 virtual void DrawTriangles() = 0; 31 virtual void DrawTriangles() = 0;
34 32
35 /// Commit the rasterizer's framebuffer contents immediately to the current 3DS memory framebuffer
36 virtual void FlushFramebuffer() = 0;
37
38 /// Notify rasterizer that the specified PICA register has been changed 33 /// Notify rasterizer that the specified PICA register has been changed
39 virtual void NotifyPicaRegisterChanged(u32 id) = 0; 34 virtual void NotifyPicaRegisterChanged(u32 id) = 0;
40 35
41 /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory. 36 /// Notify rasterizer that all caches should be flushed to 3DS memory
37 virtual void FlushAll() = 0;
38
39 /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory
42 virtual void FlushRegion(PAddr addr, u32 size) = 0; 40 virtual void FlushRegion(PAddr addr, u32 size) = 0;
43 41
44 /// Notify rasterizer that any caches of the specified region should be discraded and reloaded from 3DS memory. 42 /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory and invalidated
45 virtual void InvalidateRegion(PAddr addr, u32 size) = 0; 43 virtual void FlushAndInvalidateRegion(PAddr addr, u32 size) = 0;
44
45 /// Attempt to use a faster method to perform a display transfer
46 virtual bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { return false; }
47
48 /// Attempt to use a faster method to fill a region
49 virtual bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { return false; }
50
51 /// Attempt to use a faster method to display the framebuffer to screen
52 virtual bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) { return false; }
46}; 53};
47 54
48} 55}
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp
index 101f84eb9..3f451e062 100644
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -2,10 +2,9 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <atomic>
5#include <memory> 6#include <memory>
6 7
7#include "core/settings.h"
8
9#include "video_core/renderer_base.h" 8#include "video_core/renderer_base.h"
10#include "video_core/video_core.h" 9#include "video_core/video_core.h"
11#include "video_core/swrasterizer.h" 10#include "video_core/swrasterizer.h"
@@ -21,7 +20,5 @@ void RendererBase::RefreshRasterizerSetting() {
21 } else { 20 } else {
22 rasterizer = std::make_unique<VideoCore::SWRasterizer>(); 21 rasterizer = std::make_unique<VideoCore::SWRasterizer>();
23 } 22 }
24 rasterizer->InitObjects();
25 rasterizer->Reset();
26 } 23 }
27} 24}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 6ca9f45e2..0b471dfd2 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -2,28 +2,28 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cstring>
6#include <memory> 5#include <memory>
6#include <string>
7#include <tuple>
8#include <utility>
7 9
8#include <glad/glad.h> 10#include <glad/glad.h>
9 11
12#include "common/assert.h"
10#include "common/color.h" 13#include "common/color.h"
11#include "common/file_util.h" 14#include "common/logging/log.h"
12#include "common/math_util.h" 15#include "common/math_util.h"
13#include "common/microprofile.h" 16#include "common/vector_math.h"
14#include "common/profiler.h"
15 17
16#include "core/memory.h"
17#include "core/settings.h"
18#include "core/hw/gpu.h" 18#include "core/hw/gpu.h"
19 19
20#include "video_core/pica.h" 20#include "video_core/pica.h"
21#include "video_core/pica_state.h" 21#include "video_core/pica_state.h"
22#include "video_core/utils.h"
23#include "video_core/renderer_opengl/gl_rasterizer.h" 22#include "video_core/renderer_opengl/gl_rasterizer.h"
24#include "video_core/renderer_opengl/gl_shader_gen.h" 23#include "video_core/renderer_opengl/gl_shader_gen.h"
25#include "video_core/renderer_opengl/gl_shader_util.h" 24#include "video_core/renderer_opengl/gl_shader_util.h"
26#include "video_core/renderer_opengl/pica_to_gl.h" 25#include "video_core/renderer_opengl/pica_to_gl.h"
26#include "video_core/renderer_opengl/renderer_opengl.h"
27 27
28static bool IsPassThroughTevStage(const Pica::Regs::TevStageConfig& stage) { 28static bool IsPassThroughTevStage(const Pica::Regs::TevStageConfig& stage) {
29 return (stage.color_op == Pica::Regs::TevStageConfig::Operation::Replace && 29 return (stage.color_op == Pica::Regs::TevStageConfig::Operation::Replace &&
@@ -36,10 +36,7 @@ static bool IsPassThroughTevStage(const Pica::Regs::TevStageConfig& stage) {
36 stage.GetAlphaMultiplier() == 1); 36 stage.GetAlphaMultiplier() == 1);
37} 37}
38 38
39RasterizerOpenGL::RasterizerOpenGL() : cached_fb_color_addr(0), cached_fb_depth_addr(0) { } 39RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
40RasterizerOpenGL::~RasterizerOpenGL() { }
41
42void RasterizerOpenGL::InitObjects() {
43 // Create sampler objects 40 // Create sampler objects
44 for (size_t i = 0; i < texture_samplers.size(); ++i) { 41 for (size_t i = 0; i < texture_samplers.size(); ++i) {
45 texture_samplers[i].Create(); 42 texture_samplers[i].Create();
@@ -61,6 +58,10 @@ void RasterizerOpenGL::InitObjects() {
61 58
62 uniform_block_data.dirty = true; 59 uniform_block_data.dirty = true;
63 60
61 for (unsigned index = 0; index < lighting_luts.size(); index++) {
62 uniform_block_data.lut_dirty[index] = true;
63 }
64
64 // Set vertex attributes 65 // Set vertex attributes
65 glVertexAttribPointer(GLShader::ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position)); 66 glVertexAttribPointer(GLShader::ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position));
66 glEnableVertexAttribArray(GLShader::ATTRIBUTE_POSITION); 67 glEnableVertexAttribArray(GLShader::ATTRIBUTE_POSITION);
@@ -81,70 +82,24 @@ void RasterizerOpenGL::InitObjects() {
81 glVertexAttribPointer(GLShader::ATTRIBUTE_VIEW, 3, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, view)); 82 glVertexAttribPointer(GLShader::ATTRIBUTE_VIEW, 3, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, view));
82 glEnableVertexAttribArray(GLShader::ATTRIBUTE_VIEW); 83 glEnableVertexAttribArray(GLShader::ATTRIBUTE_VIEW);
83 84
84 SetShader(); 85 // Create render framebuffer
85
86 // Create textures for OGL framebuffer that will be rendered to, initially 1x1 to succeed in framebuffer creation
87 fb_color_texture.texture.Create();
88 ReconfigureColorTexture(fb_color_texture, Pica::Regs::ColorFormat::RGBA8, 1, 1);
89
90 state.texture_units[0].texture_2d = fb_color_texture.texture.handle;
91 state.Apply();
92
93 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
94 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
95 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
96 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
97 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
98
99 state.texture_units[0].texture_2d = 0;
100 state.Apply();
101
102 fb_depth_texture.texture.Create();
103 ReconfigureDepthTexture(fb_depth_texture, Pica::Regs::DepthFormat::D16, 1, 1);
104
105 state.texture_units[0].texture_2d = fb_depth_texture.texture.handle;
106 state.Apply();
107
108 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
109 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
110 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
111 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
112 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
113 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_FUNC, GL_LEQUAL);
114 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_MODE, GL_NONE);
115
116 state.texture_units[0].texture_2d = 0;
117 state.Apply();
118
119 // Configure OpenGL framebuffer
120 framebuffer.Create(); 86 framebuffer.Create();
121 87
122 state.draw.framebuffer = framebuffer.handle; 88 // Allocate and bind lighting lut textures
89 for (size_t i = 0; i < lighting_luts.size(); ++i) {
90 lighting_luts[i].Create();
91 state.lighting_luts[i].texture_1d = lighting_luts[i].handle;
92 }
123 state.Apply(); 93 state.Apply();
124 94
125 glActiveTexture(GL_TEXTURE0); 95 for (size_t i = 0; i < lighting_luts.size(); ++i) {
126 glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, fb_color_texture.texture.handle, 0); 96 glActiveTexture(static_cast<GLenum>(GL_TEXTURE3 + i));
127 glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, fb_depth_texture.texture.handle, 0);
128
129 for (size_t i = 0; i < lighting_lut.size(); ++i) {
130 lighting_lut[i].Create();
131 state.lighting_lut[i].texture_1d = lighting_lut[i].handle;
132
133 glActiveTexture(GL_TEXTURE3 + i);
134 glBindTexture(GL_TEXTURE_1D, state.lighting_lut[i].texture_1d);
135
136 glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr); 97 glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr);
137 glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); 98 glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
138 glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); 99 glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
139 } 100 }
140 state.Apply();
141
142 GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER);
143 ASSERT_MSG(status == GL_FRAMEBUFFER_COMPLETE,
144 "OpenGL rasterizer framebuffer setup failed, status %X", status);
145}
146 101
147void RasterizerOpenGL::Reset() { 102 // Sync fixed function OpenGL state
148 SyncCullMode(); 103 SyncCullMode();
149 SyncDepthModifiers(); 104 SyncDepthModifiers();
150 SyncBlendEnabled(); 105 SyncBlendEnabled();
@@ -156,10 +111,10 @@ void RasterizerOpenGL::Reset() {
156 SyncColorWriteMask(); 111 SyncColorWriteMask();
157 SyncStencilWriteMask(); 112 SyncStencilWriteMask();
158 SyncDepthWriteMask(); 113 SyncDepthWriteMask();
114}
159 115
160 SetShader(); 116RasterizerOpenGL::~RasterizerOpenGL() {
161 117
162 res_cache.InvalidateAll();
163} 118}
164 119
165/** 120/**
@@ -196,47 +151,98 @@ void RasterizerOpenGL::DrawTriangles() {
196 if (vertex_batch.empty()) 151 if (vertex_batch.empty())
197 return; 152 return;
198 153
199 SyncFramebuffer(); 154 const auto& regs = Pica::g_state.regs;
200 SyncDrawState(); 155
156 // Sync and bind the framebuffer surfaces
157 CachedSurface* color_surface;
158 CachedSurface* depth_surface;
159 MathUtil::Rectangle<int> rect;
160 std::tie(color_surface, depth_surface, rect) = res_cache.GetFramebufferSurfaces(regs.framebuffer);
161
162 state.draw.draw_framebuffer = framebuffer.handle;
163 state.Apply();
164
165 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, color_surface != nullptr ? color_surface->texture.handle : 0, 0);
166 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, depth_surface != nullptr ? depth_surface->texture.handle : 0, 0);
167 bool has_stencil = regs.framebuffer.depth_format == Pica::Regs::DepthFormat::D24S8;
168 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, (has_stencil && depth_surface != nullptr) ? depth_surface->texture.handle : 0, 0);
169
170 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
171 return;
172 }
173
174 // Sync the viewport
175 // These registers hold half-width and half-height, so must be multiplied by 2
176 GLsizei viewport_width = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_x).ToFloat32() * 2;
177 GLsizei viewport_height = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_y).ToFloat32() * 2;
178
179 glViewport((GLint)(rect.left + regs.viewport_corner.x * color_surface->res_scale_width),
180 (GLint)(rect.bottom + regs.viewport_corner.y * color_surface->res_scale_height),
181 (GLsizei)(viewport_width * color_surface->res_scale_width), (GLsizei)(viewport_height * color_surface->res_scale_height));
182
183 // Sync and bind the texture surfaces
184 const auto pica_textures = regs.GetTextures();
185 for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) {
186 const auto& texture = pica_textures[texture_index];
187
188 if (texture.enabled) {
189 texture_samplers[texture_index].SyncWithConfig(texture.config);
190 CachedSurface* surface = res_cache.GetTextureSurface(texture);
191 if (surface != nullptr) {
192 state.texture_units[texture_index].texture_2d = surface->texture.handle;
193 } else {
194 // Can occur when texture addr is null or its memory is unmapped/invalid
195 state.texture_units[texture_index].texture_2d = 0;
196 }
197 } else {
198 state.texture_units[texture_index].texture_2d = 0;
199 }
200 }
201 201
202 if (state.draw.shader_dirty) { 202 // Sync and bind the shader
203 if (shader_dirty) {
203 SetShader(); 204 SetShader();
204 state.draw.shader_dirty = false; 205 shader_dirty = false;
205 } 206 }
206 207
207 for (unsigned index = 0; index < lighting_lut.size(); index++) { 208 // Sync the lighting luts
209 for (unsigned index = 0; index < lighting_luts.size(); index++) {
208 if (uniform_block_data.lut_dirty[index]) { 210 if (uniform_block_data.lut_dirty[index]) {
209 SyncLightingLUT(index); 211 SyncLightingLUT(index);
210 uniform_block_data.lut_dirty[index] = false; 212 uniform_block_data.lut_dirty[index] = false;
211 } 213 }
212 } 214 }
213 215
216 // Sync the uniform data
214 if (uniform_block_data.dirty) { 217 if (uniform_block_data.dirty) {
215 glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data, GL_STATIC_DRAW); 218 glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data, GL_STATIC_DRAW);
216 uniform_block_data.dirty = false; 219 uniform_block_data.dirty = false;
217 } 220 }
218 221
222 state.Apply();
223
224 // Draw the vertex batch
219 glBufferData(GL_ARRAY_BUFFER, vertex_batch.size() * sizeof(HardwareVertex), vertex_batch.data(), GL_STREAM_DRAW); 225 glBufferData(GL_ARRAY_BUFFER, vertex_batch.size() * sizeof(HardwareVertex), vertex_batch.data(), GL_STREAM_DRAW);
220 glDrawArrays(GL_TRIANGLES, 0, (GLsizei)vertex_batch.size()); 226 glDrawArrays(GL_TRIANGLES, 0, (GLsizei)vertex_batch.size());
221 227
222 vertex_batch.clear(); 228 // Mark framebuffer surfaces as dirty
223 229 // TODO: Restrict invalidation area to the viewport
224 // Flush the resource cache at the current depth and color framebuffer addresses for render-to-texture 230 if (color_surface != nullptr) {
225 const auto& regs = Pica::g_state.regs; 231 color_surface->dirty = true;
226 232 res_cache.FlushRegion(color_surface->addr, color_surface->size, color_surface, true);
227 u32 cached_fb_color_size = Pica::Regs::BytesPerColorPixel(fb_color_texture.format) 233 }
228 * fb_color_texture.width * fb_color_texture.height; 234 if (depth_surface != nullptr) {
229 235 depth_surface->dirty = true;
230 u32 cached_fb_depth_size = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format) 236 res_cache.FlushRegion(depth_surface->addr, depth_surface->size, depth_surface, true);
231 * fb_depth_texture.width * fb_depth_texture.height; 237 }
232 238
233 res_cache.InvalidateInRange(cached_fb_color_addr, cached_fb_color_size, true); 239 vertex_batch.clear();
234 res_cache.InvalidateInRange(cached_fb_depth_addr, cached_fb_depth_size, true);
235}
236 240
237void RasterizerOpenGL::FlushFramebuffer() { 241 // Unbind textures for potential future use as framebuffer attachments
238 CommitColorBuffer(); 242 for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) {
239 CommitDepthBuffer(); 243 state.texture_units[texture_index].texture_2d = 0;
244 }
245 state.Apply();
240} 246}
241 247
242void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { 248void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
@@ -268,7 +274,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
268 // Alpha test 274 // Alpha test
269 case PICA_REG_INDEX(output_merger.alpha_test): 275 case PICA_REG_INDEX(output_merger.alpha_test):
270 SyncAlphaTest(); 276 SyncAlphaTest();
271 state.draw.shader_dirty = true; 277 shader_dirty = true;
272 break; 278 break;
273 279
274 // Sync GL stencil test + stencil write mask 280 // Sync GL stencil test + stencil write mask
@@ -334,7 +340,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
334 case PICA_REG_INDEX(tev_stage5.color_op): 340 case PICA_REG_INDEX(tev_stage5.color_op):
335 case PICA_REG_INDEX(tev_stage5.color_scale): 341 case PICA_REG_INDEX(tev_stage5.color_scale):
336 case PICA_REG_INDEX(tev_combiner_buffer_input): 342 case PICA_REG_INDEX(tev_combiner_buffer_input):
337 state.draw.shader_dirty = true; 343 shader_dirty = true;
338 break; 344 break;
339 case PICA_REG_INDEX(tev_stage0.const_r): 345 case PICA_REG_INDEX(tev_stage0.const_r):
340 SyncTevConstColor(0, regs.tev_stage0); 346 SyncTevConstColor(0, regs.tev_stage0);
@@ -521,41 +527,257 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
521 } 527 }
522} 528}
523 529
530void RasterizerOpenGL::FlushAll() {
531 res_cache.FlushAll();
532}
533
524void RasterizerOpenGL::FlushRegion(PAddr addr, u32 size) { 534void RasterizerOpenGL::FlushRegion(PAddr addr, u32 size) {
525 const auto& regs = Pica::g_state.regs; 535 res_cache.FlushRegion(addr, size, nullptr, false);
536}
526 537
527 u32 cached_fb_color_size = Pica::Regs::BytesPerColorPixel(fb_color_texture.format) 538void RasterizerOpenGL::FlushAndInvalidateRegion(PAddr addr, u32 size) {
528 * fb_color_texture.width * fb_color_texture.height; 539 res_cache.FlushRegion(addr, size, nullptr, true);
540}
529 541
530 u32 cached_fb_depth_size = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format) 542bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) {
531 * fb_depth_texture.width * fb_depth_texture.height; 543 using PixelFormat = CachedSurface::PixelFormat;
544 using SurfaceType = CachedSurface::SurfaceType;
532 545
533 // If source memory region overlaps 3DS framebuffers, commit them before the copy happens 546 if (config.is_texture_copy) {
534 if (MathUtil::IntervalsIntersect(addr, size, cached_fb_color_addr, cached_fb_color_size)) 547 // TODO(tfarley): Try to hardware accelerate this
535 CommitColorBuffer(); 548 return false;
549 }
536 550
537 if (MathUtil::IntervalsIntersect(addr, size, cached_fb_depth_addr, cached_fb_depth_size)) 551 CachedSurface src_params;
538 CommitDepthBuffer(); 552 src_params.addr = config.GetPhysicalInputAddress();
553 src_params.width = config.output_width;
554 src_params.height = config.output_height;
555 src_params.is_tiled = !config.input_linear;
556 src_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.input_format);
557
558 CachedSurface dst_params;
559 dst_params.addr = config.GetPhysicalOutputAddress();
560 dst_params.width = config.scaling != config.NoScale ? config.output_width / 2 : config.output_width.Value();
561 dst_params.height = config.scaling == config.ScaleXY ? config.output_height / 2 : config.output_height.Value();
562 dst_params.is_tiled = config.input_linear != config.dont_swizzle;
563 dst_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.output_format);
564
565 MathUtil::Rectangle<int> src_rect;
566 CachedSurface* src_surface = res_cache.GetSurfaceRect(src_params, false, true, src_rect);
567
568 if (src_surface == nullptr) {
569 return false;
570 }
571
572 // Require destination surface to have same resolution scale as source to preserve scaling
573 dst_params.res_scale_width = src_surface->res_scale_width;
574 dst_params.res_scale_height = src_surface->res_scale_height;
575
576 MathUtil::Rectangle<int> dst_rect;
577 CachedSurface* dst_surface = res_cache.GetSurfaceRect(dst_params, true, false, dst_rect);
578
579 if (dst_surface == nullptr) {
580 return false;
581 }
582
583 // Don't accelerate if the src and dst surfaces are the same
584 if (src_surface == dst_surface) {
585 return false;
586 }
587
588 if (config.flip_vertically) {
589 std::swap(dst_rect.top, dst_rect.bottom);
590 }
591
592 if (!res_cache.TryBlitSurfaces(src_surface, src_rect, dst_surface, dst_rect)) {
593 return false;
594 }
595
596 u32 dst_size = dst_params.width * dst_params.height * CachedSurface::GetFormatBpp(dst_params.pixel_format) / 8;
597 dst_surface->dirty = true;
598 res_cache.FlushRegion(config.GetPhysicalOutputAddress(), dst_size, dst_surface, true);
599 return true;
539} 600}
540 601
541void RasterizerOpenGL::InvalidateRegion(PAddr addr, u32 size) { 602bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) {
542 const auto& regs = Pica::g_state.regs; 603 using PixelFormat = CachedSurface::PixelFormat;
604 using SurfaceType = CachedSurface::SurfaceType;
605
606 CachedSurface* dst_surface = res_cache.TryGetFillSurface(config);
607
608 if (dst_surface == nullptr) {
609 return false;
610 }
611
612 OpenGLState cur_state = OpenGLState::GetCurState();
613
614 SurfaceType dst_type = CachedSurface::GetFormatType(dst_surface->pixel_format);
543 615
544 u32 cached_fb_color_size = Pica::Regs::BytesPerColorPixel(fb_color_texture.format) 616 GLuint old_fb = cur_state.draw.draw_framebuffer;
545 * fb_color_texture.width * fb_color_texture.height; 617 cur_state.draw.draw_framebuffer = framebuffer.handle;
618 // TODO: When scissor test is implemented, need to disable scissor test in cur_state here so Clear call isn't affected
619 cur_state.Apply();
546 620
547 u32 cached_fb_depth_size = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format) 621 if (dst_type == SurfaceType::Color || dst_type == SurfaceType::Texture) {
548 * fb_depth_texture.width * fb_depth_texture.height; 622 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_surface->texture.handle, 0);
623 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
549 624
550 // If modified memory region overlaps 3DS framebuffers, reload their contents into OpenGL 625 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
551 if (MathUtil::IntervalsIntersect(addr, size, cached_fb_color_addr, cached_fb_color_size)) 626 return false;
552 ReloadColorBuffer(); 627 }
628
629 GLfloat color_values[4] = {0.0f, 0.0f, 0.0f, 0.0f};
630
631 // TODO: Handle additional pixel format and fill value size combinations to accelerate more cases
632 // For instance, checking if fill value's bytes/bits repeat to allow filling I8/A8/I4/A4/...
633 // Currently only handles formats that are multiples of the fill value size
634
635 if (config.fill_24bit) {
636 switch (dst_surface->pixel_format) {
637 case PixelFormat::RGB8:
638 color_values[0] = config.value_24bit_r / 255.0f;
639 color_values[1] = config.value_24bit_g / 255.0f;
640 color_values[2] = config.value_24bit_b / 255.0f;
641 break;
642 default:
643 return false;
644 }
645 } else if (config.fill_32bit) {
646 u32 value = config.value_32bit;
647
648 switch (dst_surface->pixel_format) {
649 case PixelFormat::RGBA8:
650 color_values[0] = (value >> 24) / 255.0f;
651 color_values[1] = ((value >> 16) & 0xFF) / 255.0f;
652 color_values[2] = ((value >> 8) & 0xFF) / 255.0f;
653 color_values[3] = (value & 0xFF) / 255.0f;
654 break;
655 default:
656 return false;
657 }
658 } else {
659 u16 value_16bit = config.value_16bit.Value();
660 Math::Vec4<u8> color;
661
662 switch (dst_surface->pixel_format) {
663 case PixelFormat::RGBA8:
664 color_values[0] = (value_16bit >> 8) / 255.0f;
665 color_values[1] = (value_16bit & 0xFF) / 255.0f;
666 color_values[2] = color_values[0];
667 color_values[3] = color_values[1];
668 break;
669 case PixelFormat::RGB5A1:
670 color = Color::DecodeRGB5A1((const u8*)&value_16bit);
671 color_values[0] = color[0] / 31.0f;
672 color_values[1] = color[1] / 31.0f;
673 color_values[2] = color[2] / 31.0f;
674 color_values[3] = color[3];
675 break;
676 case PixelFormat::RGB565:
677 color = Color::DecodeRGB565((const u8*)&value_16bit);
678 color_values[0] = color[0] / 31.0f;
679 color_values[1] = color[1] / 63.0f;
680 color_values[2] = color[2] / 31.0f;
681 break;
682 case PixelFormat::RGBA4:
683 color = Color::DecodeRGBA4((const u8*)&value_16bit);
684 color_values[0] = color[0] / 15.0f;
685 color_values[1] = color[1] / 15.0f;
686 color_values[2] = color[2] / 15.0f;
687 color_values[3] = color[3] / 15.0f;
688 break;
689 case PixelFormat::IA8:
690 case PixelFormat::RG8:
691 color_values[0] = (value_16bit >> 8) / 255.0f;
692 color_values[1] = (value_16bit & 0xFF) / 255.0f;
693 break;
694 default:
695 return false;
696 }
697 }
698
699 cur_state.color_mask.red_enabled = true;
700 cur_state.color_mask.green_enabled = true;
701 cur_state.color_mask.blue_enabled = true;
702 cur_state.color_mask.alpha_enabled = true;
703 cur_state.Apply();
704 glClearBufferfv(GL_COLOR, 0, color_values);
705 } else if (dst_type == SurfaceType::Depth) {
706 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
707 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_surface->texture.handle, 0);
708 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
709
710 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
711 return false;
712 }
713
714 GLfloat value_float;
715 if (dst_surface->pixel_format == CachedSurface::PixelFormat::D16) {
716 value_float = config.value_32bit / 65535.0f; // 2^16 - 1
717 } else if (dst_surface->pixel_format == CachedSurface::PixelFormat::D24) {
718 value_float = config.value_32bit / 16777215.0f; // 2^24 - 1
719 }
720
721 cur_state.depth.write_mask = true;
722 cur_state.Apply();
723 glClearBufferfv(GL_DEPTH, 0, &value_float);
724 } else if (dst_type == SurfaceType::DepthStencil) {
725 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
726 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, dst_surface->texture.handle, 0);
727
728 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
729 return false;
730 }
731
732 GLfloat value_float = (config.value_32bit & 0xFFFFFF) / 16777215.0f; // 2^24 - 1
733 GLint value_int = (config.value_32bit >> 24);
734
735 cur_state.depth.write_mask = true;
736 cur_state.stencil.write_mask = true;
737 cur_state.Apply();
738 glClearBufferfi(GL_DEPTH_STENCIL, 0, value_float, value_int);
739 }
553 740
554 if (MathUtil::IntervalsIntersect(addr, size, cached_fb_depth_addr, cached_fb_depth_size)) 741 cur_state.draw.draw_framebuffer = old_fb;
555 ReloadDepthBuffer(); 742 // TODO: Return scissor test to previous value when scissor test is implemented
743 cur_state.Apply();
556 744
557 // Notify cache of flush in case the region touches a cached resource 745 dst_surface->dirty = true;
558 res_cache.InvalidateInRange(addr, size); 746 res_cache.FlushRegion(dst_surface->addr, dst_surface->size, dst_surface, true);
747 return true;
748}
749
750bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) {
751 if (framebuffer_addr == 0) {
752 return false;
753 }
754
755 CachedSurface src_params;
756 src_params.addr = framebuffer_addr;
757 src_params.width = config.width;
758 src_params.height = config.height;
759 src_params.stride = pixel_stride;
760 src_params.is_tiled = false;
761 src_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.color_format);
762
763 MathUtil::Rectangle<int> src_rect;
764 CachedSurface* src_surface = res_cache.GetSurfaceRect(src_params, false, true, src_rect);
765
766 if (src_surface == nullptr) {
767 return false;
768 }
769
770 u32 scaled_width = src_surface->GetScaledWidth();
771 u32 scaled_height = src_surface->GetScaledHeight();
772
773 screen_info.display_texcoords = MathUtil::Rectangle<float>((float)src_rect.top / (float)scaled_height,
774 (float)src_rect.left / (float)scaled_width,
775 (float)src_rect.bottom / (float)scaled_height,
776 (float)src_rect.right / (float)scaled_width);
777
778 screen_info.display_texture = src_surface->texture.handle;
779
780 return true;
559} 781}
560 782
561void RasterizerOpenGL::SamplerInfo::Create() { 783void RasterizerOpenGL::SamplerInfo::Create() {
@@ -591,114 +813,13 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Pica::Regs::TextureConf
591 813
592 if (wrap_s == TextureConfig::ClampToBorder || wrap_t == TextureConfig::ClampToBorder) { 814 if (wrap_s == TextureConfig::ClampToBorder || wrap_t == TextureConfig::ClampToBorder) {
593 if (border_color != config.border_color.raw) { 815 if (border_color != config.border_color.raw) {
816 border_color = config.border_color.raw;
594 auto gl_color = PicaToGL::ColorRGBA8(border_color); 817 auto gl_color = PicaToGL::ColorRGBA8(border_color);
595 glSamplerParameterfv(s, GL_TEXTURE_BORDER_COLOR, gl_color.data()); 818 glSamplerParameterfv(s, GL_TEXTURE_BORDER_COLOR, gl_color.data());
596 } 819 }
597 } 820 }
598} 821}
599 822
600void RasterizerOpenGL::ReconfigureColorTexture(TextureInfo& texture, Pica::Regs::ColorFormat format, u32 width, u32 height) {
601 GLint internal_format;
602
603 texture.format = format;
604 texture.width = width;
605 texture.height = height;
606
607 switch (format) {
608 case Pica::Regs::ColorFormat::RGBA8:
609 internal_format = GL_RGBA;
610 texture.gl_format = GL_RGBA;
611 texture.gl_type = GL_UNSIGNED_INT_8_8_8_8;
612 break;
613
614 case Pica::Regs::ColorFormat::RGB8:
615 // This pixel format uses BGR since GL_UNSIGNED_BYTE specifies byte-order, unlike every
616 // specific OpenGL type used in this function using native-endian (that is, little-endian
617 // mostly everywhere) for words or half-words.
618 // TODO: check how those behave on big-endian processors.
619 internal_format = GL_RGB;
620 texture.gl_format = GL_BGR;
621 texture.gl_type = GL_UNSIGNED_BYTE;
622 break;
623
624 case Pica::Regs::ColorFormat::RGB5A1:
625 internal_format = GL_RGBA;
626 texture.gl_format = GL_RGBA;
627 texture.gl_type = GL_UNSIGNED_SHORT_5_5_5_1;
628 break;
629
630 case Pica::Regs::ColorFormat::RGB565:
631 internal_format = GL_RGB;
632 texture.gl_format = GL_RGB;
633 texture.gl_type = GL_UNSIGNED_SHORT_5_6_5;
634 break;
635
636 case Pica::Regs::ColorFormat::RGBA4:
637 internal_format = GL_RGBA;
638 texture.gl_format = GL_RGBA;
639 texture.gl_type = GL_UNSIGNED_SHORT_4_4_4_4;
640 break;
641
642 default:
643 LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer texture color format %x", format);
644 UNIMPLEMENTED();
645 break;
646 }
647
648 state.texture_units[0].texture_2d = texture.texture.handle;
649 state.Apply();
650
651 glActiveTexture(GL_TEXTURE0);
652 glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0,
653 texture.gl_format, texture.gl_type, nullptr);
654
655 state.texture_units[0].texture_2d = 0;
656 state.Apply();
657}
658
659void RasterizerOpenGL::ReconfigureDepthTexture(DepthTextureInfo& texture, Pica::Regs::DepthFormat format, u32 width, u32 height) {
660 GLint internal_format;
661
662 texture.format = format;
663 texture.width = width;
664 texture.height = height;
665
666 switch (format) {
667 case Pica::Regs::DepthFormat::D16:
668 internal_format = GL_DEPTH_COMPONENT16;
669 texture.gl_format = GL_DEPTH_COMPONENT;
670 texture.gl_type = GL_UNSIGNED_SHORT;
671 break;
672
673 case Pica::Regs::DepthFormat::D24:
674 internal_format = GL_DEPTH_COMPONENT24;
675 texture.gl_format = GL_DEPTH_COMPONENT;
676 texture.gl_type = GL_UNSIGNED_INT;
677 break;
678
679 case Pica::Regs::DepthFormat::D24S8:
680 internal_format = GL_DEPTH24_STENCIL8;
681 texture.gl_format = GL_DEPTH_STENCIL;
682 texture.gl_type = GL_UNSIGNED_INT_24_8;
683 break;
684
685 default:
686 LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer texture depth format %x", format);
687 UNIMPLEMENTED();
688 break;
689 }
690
691 state.texture_units[0].texture_2d = texture.texture.handle;
692 state.Apply();
693
694 glActiveTexture(GL_TEXTURE0);
695 glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0,
696 texture.gl_format, texture.gl_type, nullptr);
697
698 state.texture_units[0].texture_2d = 0;
699 state.Apply();
700}
701
702void RasterizerOpenGL::SetShader() { 823void RasterizerOpenGL::SetShader() {
703 PicaShaderConfig config = PicaShaderConfig::CurrentConfig(); 824 PicaShaderConfig config = PicaShaderConfig::CurrentConfig();
704 std::unique_ptr<PicaShader> shader = std::make_unique<PicaShader>(); 825 std::unique_ptr<PicaShader> shader = std::make_unique<PicaShader>();
@@ -754,6 +875,8 @@ void RasterizerOpenGL::SetShader() {
754 875
755 SyncGlobalAmbient(); 876 SyncGlobalAmbient();
756 for (int light_index = 0; light_index < 8; light_index++) { 877 for (int light_index = 0; light_index < 8; light_index++) {
878 SyncLightSpecular0(light_index);
879 SyncLightSpecular1(light_index);
757 SyncLightDiffuse(light_index); 880 SyncLightDiffuse(light_index);
758 SyncLightAmbient(light_index); 881 SyncLightAmbient(light_index);
759 SyncLightPosition(light_index); 882 SyncLightPosition(light_index);
@@ -761,83 +884,6 @@ void RasterizerOpenGL::SetShader() {
761 } 884 }
762} 885}
763 886
764void RasterizerOpenGL::SyncFramebuffer() {
765 const auto& regs = Pica::g_state.regs;
766
767 PAddr new_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress();
768 Pica::Regs::ColorFormat new_fb_color_format = regs.framebuffer.color_format;
769
770 PAddr new_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress();
771 Pica::Regs::DepthFormat new_fb_depth_format = regs.framebuffer.depth_format;
772
773 bool fb_size_changed = fb_color_texture.width != static_cast<GLsizei>(regs.framebuffer.GetWidth()) ||
774 fb_color_texture.height != static_cast<GLsizei>(regs.framebuffer.GetHeight());
775
776 bool color_fb_prop_changed = fb_color_texture.format != new_fb_color_format ||
777 fb_size_changed;
778
779 bool depth_fb_prop_changed = fb_depth_texture.format != new_fb_depth_format ||
780 fb_size_changed;
781
782 bool color_fb_modified = cached_fb_color_addr != new_fb_color_addr ||
783 color_fb_prop_changed;
784
785 bool depth_fb_modified = cached_fb_depth_addr != new_fb_depth_addr ||
786 depth_fb_prop_changed;
787
788 // Commit if framebuffer modified in any way
789 if (color_fb_modified)
790 CommitColorBuffer();
791
792 if (depth_fb_modified)
793 CommitDepthBuffer();
794
795 // Reconfigure framebuffer textures if any property has changed
796 if (color_fb_prop_changed) {
797 ReconfigureColorTexture(fb_color_texture, new_fb_color_format,
798 regs.framebuffer.GetWidth(), regs.framebuffer.GetHeight());
799 }
800
801 if (depth_fb_prop_changed) {
802 ReconfigureDepthTexture(fb_depth_texture, new_fb_depth_format,
803 regs.framebuffer.GetWidth(), regs.framebuffer.GetHeight());
804
805 // Only attach depth buffer as stencil if it supports stencil
806 switch (new_fb_depth_format) {
807 case Pica::Regs::DepthFormat::D16:
808 case Pica::Regs::DepthFormat::D24:
809 glFramebufferTexture2D(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
810 break;
811
812 case Pica::Regs::DepthFormat::D24S8:
813 glFramebufferTexture2D(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, fb_depth_texture.texture.handle, 0);
814 break;
815
816 default:
817 LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer depth format %x", new_fb_depth_format);
818 UNIMPLEMENTED();
819 break;
820 }
821 }
822
823 // Load buffer data again if fb modified in any way
824 if (color_fb_modified) {
825 cached_fb_color_addr = new_fb_color_addr;
826
827 ReloadColorBuffer();
828 }
829
830 if (depth_fb_modified) {
831 cached_fb_depth_addr = new_fb_depth_addr;
832
833 ReloadDepthBuffer();
834 }
835
836 GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER);
837 ASSERT_MSG(status == GL_FRAMEBUFFER_COMPLETE,
838 "OpenGL rasterizer framebuffer setup failed, status %X", status);
839}
840
841void RasterizerOpenGL::SyncCullMode() { 887void RasterizerOpenGL::SyncCullMode() {
842 const auto& regs = Pica::g_state.regs; 888 const auto& regs = Pica::g_state.regs;
843 889
@@ -1034,229 +1080,3 @@ void RasterizerOpenGL::SyncLightPosition(int light_index) {
1034 uniform_block_data.dirty = true; 1080 uniform_block_data.dirty = true;
1035 } 1081 }
1036} 1082}
1037
1038void RasterizerOpenGL::SyncDrawState() {
1039 const auto& regs = Pica::g_state.regs;
1040
1041 // Sync the viewport
1042 GLsizei viewport_width = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_x).ToFloat32() * 2;
1043 GLsizei viewport_height = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_y).ToFloat32() * 2;
1044
1045 // OpenGL uses different y coordinates, so negate corner offset and flip origin
1046 // TODO: Ensure viewport_corner.x should not be negated or origin flipped
1047 // TODO: Use floating-point viewports for accuracy if supported
1048 glViewport((GLsizei)regs.viewport_corner.x,
1049 (GLsizei)regs.viewport_corner.y,
1050 viewport_width, viewport_height);
1051
1052 // Sync bound texture(s), upload if not cached
1053 const auto pica_textures = regs.GetTextures();
1054 for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) {
1055 const auto& texture = pica_textures[texture_index];
1056
1057 if (texture.enabled) {
1058 texture_samplers[texture_index].SyncWithConfig(texture.config);
1059 res_cache.LoadAndBindTexture(state, texture_index, texture);
1060 } else {
1061 state.texture_units[texture_index].texture_2d = 0;
1062 }
1063 }
1064
1065 state.draw.uniform_buffer = uniform_buffer.handle;
1066 state.Apply();
1067}
1068
1069MICROPROFILE_DEFINE(OpenGL_FramebufferReload, "OpenGL", "FB Reload", MP_RGB(70, 70, 200));
1070
1071void RasterizerOpenGL::ReloadColorBuffer() {
1072 u8* color_buffer = Memory::GetPhysicalPointer(cached_fb_color_addr);
1073
1074 if (color_buffer == nullptr)
1075 return;
1076
1077 MICROPROFILE_SCOPE(OpenGL_FramebufferReload);
1078
1079 u32 bytes_per_pixel = Pica::Regs::BytesPerColorPixel(fb_color_texture.format);
1080
1081 std::unique_ptr<u8[]> temp_fb_color_buffer(new u8[fb_color_texture.width * fb_color_texture.height * bytes_per_pixel]);
1082
1083 // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion is necessary.
1084 for (int y = 0; y < fb_color_texture.height; ++y) {
1085 for (int x = 0; x < fb_color_texture.width; ++x) {
1086 const u32 coarse_y = y & ~7;
1087 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_color_texture.width * bytes_per_pixel;
1088 u32 gl_pixel_index = (x + (fb_color_texture.height - 1 - y) * fb_color_texture.width) * bytes_per_pixel;
1089
1090 u8* pixel = color_buffer + dst_offset;
1091 memcpy(&temp_fb_color_buffer[gl_pixel_index], pixel, bytes_per_pixel);
1092 }
1093 }
1094
1095 state.texture_units[0].texture_2d = fb_color_texture.texture.handle;
1096 state.Apply();
1097
1098 glActiveTexture(GL_TEXTURE0);
1099 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, fb_color_texture.width, fb_color_texture.height,
1100 fb_color_texture.gl_format, fb_color_texture.gl_type, temp_fb_color_buffer.get());
1101
1102 state.texture_units[0].texture_2d = 0;
1103 state.Apply();
1104}
1105
1106void RasterizerOpenGL::ReloadDepthBuffer() {
1107 if (cached_fb_depth_addr == 0)
1108 return;
1109
1110 // TODO: Appears to work, but double-check endianness of depth values and order of depth-stencil
1111 u8* depth_buffer = Memory::GetPhysicalPointer(cached_fb_depth_addr);
1112
1113 if (depth_buffer == nullptr)
1114 return;
1115
1116 MICROPROFILE_SCOPE(OpenGL_FramebufferReload);
1117
1118 u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format);
1119
1120 // OpenGL needs 4 bpp alignment for D24
1121 u32 gl_bpp = bytes_per_pixel == 3 ? 4 : bytes_per_pixel;
1122
1123 std::unique_ptr<u8[]> temp_fb_depth_buffer(new u8[fb_depth_texture.width * fb_depth_texture.height * gl_bpp]);
1124
1125 u8* temp_fb_depth_data = bytes_per_pixel == 3 ? (temp_fb_depth_buffer.get() + 1) : temp_fb_depth_buffer.get();
1126
1127 if (fb_depth_texture.format == Pica::Regs::DepthFormat::D24S8) {
1128 for (int y = 0; y < fb_depth_texture.height; ++y) {
1129 for (int x = 0; x < fb_depth_texture.width; ++x) {
1130 const u32 coarse_y = y & ~7;
1131 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel;
1132 u32 gl_pixel_index = (x + (fb_depth_texture.height - 1 - y) * fb_depth_texture.width);
1133
1134 u8* pixel = depth_buffer + dst_offset;
1135 u32 depth_stencil = *(u32*)pixel;
1136 ((u32*)temp_fb_depth_data)[gl_pixel_index] = (depth_stencil << 8) | (depth_stencil >> 24);
1137 }
1138 }
1139 } else {
1140 for (int y = 0; y < fb_depth_texture.height; ++y) {
1141 for (int x = 0; x < fb_depth_texture.width; ++x) {
1142 const u32 coarse_y = y & ~7;
1143 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel;
1144 u32 gl_pixel_index = (x + (fb_depth_texture.height - 1 - y) * fb_depth_texture.width) * gl_bpp;
1145
1146 u8* pixel = depth_buffer + dst_offset;
1147 memcpy(&temp_fb_depth_data[gl_pixel_index], pixel, bytes_per_pixel);
1148 }
1149 }
1150 }
1151
1152 state.texture_units[0].texture_2d = fb_depth_texture.texture.handle;
1153 state.Apply();
1154
1155 glActiveTexture(GL_TEXTURE0);
1156 if (fb_depth_texture.format == Pica::Regs::DepthFormat::D24S8) {
1157 // TODO(Subv): There is a bug with Intel Windows drivers that makes glTexSubImage2D not change the stencil buffer.
1158 // The bug has been reported to Intel (https://communities.intel.com/message/324464)
1159 glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH24_STENCIL8, fb_depth_texture.width, fb_depth_texture.height, 0,
1160 GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, temp_fb_depth_buffer.get());
1161 } else {
1162 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, fb_depth_texture.width, fb_depth_texture.height,
1163 fb_depth_texture.gl_format, fb_depth_texture.gl_type, temp_fb_depth_buffer.get());
1164 }
1165
1166 state.texture_units[0].texture_2d = 0;
1167 state.Apply();
1168}
1169
1170Common::Profiling::TimingCategory buffer_commit_category("Framebuffer Commit");
1171MICROPROFILE_DEFINE(OpenGL_FramebufferCommit, "OpenGL", "FB Commit", MP_RGB(70, 70, 200));
1172
1173void RasterizerOpenGL::CommitColorBuffer() {
1174 if (cached_fb_color_addr != 0) {
1175 u8* color_buffer = Memory::GetPhysicalPointer(cached_fb_color_addr);
1176
1177 if (color_buffer != nullptr) {
1178 Common::Profiling::ScopeTimer timer(buffer_commit_category);
1179 MICROPROFILE_SCOPE(OpenGL_FramebufferCommit);
1180
1181 u32 bytes_per_pixel = Pica::Regs::BytesPerColorPixel(fb_color_texture.format);
1182
1183 std::unique_ptr<u8[]> temp_gl_color_buffer(new u8[fb_color_texture.width * fb_color_texture.height * bytes_per_pixel]);
1184
1185 state.texture_units[0].texture_2d = fb_color_texture.texture.handle;
1186 state.Apply();
1187
1188 glActiveTexture(GL_TEXTURE0);
1189 glGetTexImage(GL_TEXTURE_2D, 0, fb_color_texture.gl_format, fb_color_texture.gl_type, temp_gl_color_buffer.get());
1190
1191 state.texture_units[0].texture_2d = 0;
1192 state.Apply();
1193
1194 // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion is necessary.
1195 for (int y = 0; y < fb_color_texture.height; ++y) {
1196 for (int x = 0; x < fb_color_texture.width; ++x) {
1197 const u32 coarse_y = y & ~7;
1198 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_color_texture.width * bytes_per_pixel;
1199 u32 gl_pixel_index = x * bytes_per_pixel + (fb_color_texture.height - 1 - y) * fb_color_texture.width * bytes_per_pixel;
1200
1201 u8* pixel = color_buffer + dst_offset;
1202 memcpy(pixel, &temp_gl_color_buffer[gl_pixel_index], bytes_per_pixel);
1203 }
1204 }
1205 }
1206 }
1207}
1208
1209void RasterizerOpenGL::CommitDepthBuffer() {
1210 if (cached_fb_depth_addr != 0) {
1211 // TODO: Output seems correct visually, but doesn't quite match sw renderer output. One of them is wrong.
1212 u8* depth_buffer = Memory::GetPhysicalPointer(cached_fb_depth_addr);
1213
1214 if (depth_buffer != nullptr) {
1215 Common::Profiling::ScopeTimer timer(buffer_commit_category);
1216 MICROPROFILE_SCOPE(OpenGL_FramebufferCommit);
1217
1218 u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format);
1219
1220 // OpenGL needs 4 bpp alignment for D24
1221 u32 gl_bpp = bytes_per_pixel == 3 ? 4 : bytes_per_pixel;
1222
1223 std::unique_ptr<u8[]> temp_gl_depth_buffer(new u8[fb_depth_texture.width * fb_depth_texture.height * gl_bpp]);
1224
1225 state.texture_units[0].texture_2d = fb_depth_texture.texture.handle;
1226 state.Apply();
1227
1228 glActiveTexture(GL_TEXTURE0);
1229 glGetTexImage(GL_TEXTURE_2D, 0, fb_depth_texture.gl_format, fb_depth_texture.gl_type, temp_gl_depth_buffer.get());
1230
1231 state.texture_units[0].texture_2d = 0;
1232 state.Apply();
1233
1234 u8* temp_gl_depth_data = bytes_per_pixel == 3 ? (temp_gl_depth_buffer.get() + 1) : temp_gl_depth_buffer.get();
1235
1236 if (fb_depth_texture.format == Pica::Regs::DepthFormat::D24S8) {
1237 for (int y = 0; y < fb_depth_texture.height; ++y) {
1238 for (int x = 0; x < fb_depth_texture.width; ++x) {
1239 const u32 coarse_y = y & ~7;
1240 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel;
1241 u32 gl_pixel_index = (x + (fb_depth_texture.height - 1 - y) * fb_depth_texture.width);
1242
1243 u8* pixel = depth_buffer + dst_offset;
1244 u32 depth_stencil = ((u32*)temp_gl_depth_data)[gl_pixel_index];
1245 *(u32*)pixel = (depth_stencil >> 8) | (depth_stencil << 24);
1246 }
1247 }
1248 } else {
1249 for (int y = 0; y < fb_depth_texture.height; ++y) {
1250 for (int x = 0; x < fb_depth_texture.width; ++x) {
1251 const u32 coarse_y = y & ~7;
1252 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel;
1253 u32 gl_pixel_index = (x + (fb_depth_texture.height - 1 - y) * fb_depth_texture.width) * gl_bpp;
1254
1255 u8* pixel = depth_buffer + dst_offset;
1256 memcpy(pixel, &temp_gl_depth_data[gl_pixel_index], bytes_per_pixel);
1257 }
1258 }
1259 }
1260 }
1261 }
1262}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 390349a0c..82fa61742 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -4,22 +4,33 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
7#include <cstddef> 8#include <cstddef>
8#include <cstring> 9#include <cstring>
9#include <memory> 10#include <memory>
10#include <vector> 11#include <vector>
11#include <unordered_map> 12#include <unordered_map>
12 13
14#include <glad/glad.h>
15
16#include "common/bit_field.h"
13#include "common/common_types.h" 17#include "common/common_types.h"
14#include "common/hash.h" 18#include "common/hash.h"
19#include "common/vector_math.h"
20
21#include "core/hw/gpu.h"
15 22
16#include "video_core/pica.h" 23#include "video_core/pica.h"
17#include "video_core/pica_state.h" 24#include "video_core/pica_state.h"
25#include "video_core/pica_types.h"
18#include "video_core/rasterizer_interface.h" 26#include "video_core/rasterizer_interface.h"
19#include "video_core/renderer_opengl/gl_rasterizer_cache.h" 27#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
28#include "video_core/renderer_opengl/gl_resource_manager.h"
20#include "video_core/renderer_opengl/gl_state.h" 29#include "video_core/renderer_opengl/gl_state.h"
21#include "video_core/renderer_opengl/pica_to_gl.h" 30#include "video_core/renderer_opengl/pica_to_gl.h"
22#include "video_core/shader/shader_interpreter.h" 31#include "video_core/shader/shader.h"
32
33struct ScreenInfo;
23 34
24/** 35/**
25 * This struct contains all state used to generate the GLSL shader program that emulates the current 36 * This struct contains all state used to generate the GLSL shader program that emulates the current
@@ -38,36 +49,18 @@ struct PicaShaderConfig {
38 res.alpha_test_func = regs.output_merger.alpha_test.enable ? 49 res.alpha_test_func = regs.output_merger.alpha_test.enable ?
39 regs.output_merger.alpha_test.func.Value() : Pica::Regs::CompareFunc::Always; 50 regs.output_merger.alpha_test.func.Value() : Pica::Regs::CompareFunc::Always;
40 51
41 // Copy relevant TevStageConfig fields only. We're doing this manually (instead of calling 52 // Copy relevant tev stages fields.
42 // the GetTevStages() function) because BitField explicitly disables copies. 53 // We don't sync const_color here because of the high variance, it is a
43 54 // shader uniform instead.
44 res.tev_stages[0].sources_raw = regs.tev_stage0.sources_raw; 55 const auto& tev_stages = regs.GetTevStages();
45 res.tev_stages[1].sources_raw = regs.tev_stage1.sources_raw; 56 DEBUG_ASSERT(res.tev_stages.size() == tev_stages.size());
46 res.tev_stages[2].sources_raw = regs.tev_stage2.sources_raw; 57 for (size_t i = 0; i < tev_stages.size(); i++) {
47 res.tev_stages[3].sources_raw = regs.tev_stage3.sources_raw; 58 const auto& tev_stage = tev_stages[i];
48 res.tev_stages[4].sources_raw = regs.tev_stage4.sources_raw; 59 res.tev_stages[i].sources_raw = tev_stage.sources_raw;
49 res.tev_stages[5].sources_raw = regs.tev_stage5.sources_raw; 60 res.tev_stages[i].modifiers_raw = tev_stage.modifiers_raw;
50 61 res.tev_stages[i].ops_raw = tev_stage.ops_raw;
51 res.tev_stages[0].modifiers_raw = regs.tev_stage0.modifiers_raw; 62 res.tev_stages[i].scales_raw = tev_stage.scales_raw;
52 res.tev_stages[1].modifiers_raw = regs.tev_stage1.modifiers_raw; 63 }
53 res.tev_stages[2].modifiers_raw = regs.tev_stage2.modifiers_raw;
54 res.tev_stages[3].modifiers_raw = regs.tev_stage3.modifiers_raw;
55 res.tev_stages[4].modifiers_raw = regs.tev_stage4.modifiers_raw;
56 res.tev_stages[5].modifiers_raw = regs.tev_stage5.modifiers_raw;
57
58 res.tev_stages[0].ops_raw = regs.tev_stage0.ops_raw;
59 res.tev_stages[1].ops_raw = regs.tev_stage1.ops_raw;
60 res.tev_stages[2].ops_raw = regs.tev_stage2.ops_raw;
61 res.tev_stages[3].ops_raw = regs.tev_stage3.ops_raw;
62 res.tev_stages[4].ops_raw = regs.tev_stage4.ops_raw;
63 res.tev_stages[5].ops_raw = regs.tev_stage5.ops_raw;
64
65 res.tev_stages[0].scales_raw = regs.tev_stage0.scales_raw;
66 res.tev_stages[1].scales_raw = regs.tev_stage1.scales_raw;
67 res.tev_stages[2].scales_raw = regs.tev_stage2.scales_raw;
68 res.tev_stages[3].scales_raw = regs.tev_stage3.scales_raw;
69 res.tev_stages[4].scales_raw = regs.tev_stage4.scales_raw;
70 res.tev_stages[5].scales_raw = regs.tev_stage5.scales_raw;
71 64
72 res.combiner_buffer_input = 65 res.combiner_buffer_input =
73 regs.tev_combiner_buffer_input.update_mask_rgb.Value() | 66 regs.tev_combiner_buffer_input.update_mask_rgb.Value() |
@@ -191,16 +184,17 @@ public:
191 RasterizerOpenGL(); 184 RasterizerOpenGL();
192 ~RasterizerOpenGL() override; 185 ~RasterizerOpenGL() override;
193 186
194 void InitObjects() override;
195 void Reset() override;
196 void AddTriangle(const Pica::Shader::OutputVertex& v0, 187 void AddTriangle(const Pica::Shader::OutputVertex& v0,
197 const Pica::Shader::OutputVertex& v1, 188 const Pica::Shader::OutputVertex& v1,
198 const Pica::Shader::OutputVertex& v2) override; 189 const Pica::Shader::OutputVertex& v2) override;
199 void DrawTriangles() override; 190 void DrawTriangles() override;
200 void FlushFramebuffer() override;
201 void NotifyPicaRegisterChanged(u32 id) override; 191 void NotifyPicaRegisterChanged(u32 id) override;
192 void FlushAll() override;
202 void FlushRegion(PAddr addr, u32 size) override; 193 void FlushRegion(PAddr addr, u32 size) override;
203 void InvalidateRegion(PAddr addr, u32 size) override; 194 void FlushAndInvalidateRegion(PAddr addr, u32 size) override;
195 bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) override;
196 bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override;
197 bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) override;
204 198
205 /// OpenGL shader generated for a given Pica register state 199 /// OpenGL shader generated for a given Pica register state
206 struct PicaShader { 200 struct PicaShader {
@@ -210,26 +204,6 @@ public:
210 204
211private: 205private:
212 206
213 /// Structure used for storing information about color textures
214 struct TextureInfo {
215 OGLTexture texture;
216 GLsizei width;
217 GLsizei height;
218 Pica::Regs::ColorFormat format;
219 GLenum gl_format;
220 GLenum gl_type;
221 };
222
223 /// Structure used for storing information about depth textures
224 struct DepthTextureInfo {
225 OGLTexture texture;
226 GLsizei width;
227 GLsizei height;
228 Pica::Regs::DepthFormat format;
229 GLenum gl_format;
230 GLenum gl_type;
231 };
232
233 struct SamplerInfo { 207 struct SamplerInfo {
234 using TextureConfig = Pica::Regs::TextureConfig; 208 using TextureConfig = Pica::Regs::TextureConfig;
235 209
@@ -311,18 +285,9 @@ private:
311 static_assert(sizeof(UniformData) == 0x310, "The size of the UniformData structure has changed, update the structure in the shader"); 285 static_assert(sizeof(UniformData) == 0x310, "The size of the UniformData structure has changed, update the structure in the shader");
312 static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec"); 286 static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec");
313 287
314 /// Reconfigure the OpenGL color texture to use the given format and dimensions
315 void ReconfigureColorTexture(TextureInfo& texture, Pica::Regs::ColorFormat format, u32 width, u32 height);
316
317 /// Reconfigure the OpenGL depth texture to use the given format and dimensions
318 void ReconfigureDepthTexture(DepthTextureInfo& texture, Pica::Regs::DepthFormat format, u32 width, u32 height);
319
320 /// Sets the OpenGL shader in accordance with the current PICA register state 288 /// Sets the OpenGL shader in accordance with the current PICA register state
321 void SetShader(); 289 void SetShader();
322 290
323 /// Syncs the state and contents of the OpenGL framebuffer to match the current PICA framebuffer
324 void SyncFramebuffer();
325
326 /// Syncs the cull mode to match the PICA register 291 /// Syncs the cull mode to match the PICA register
327 void SyncCullMode(); 292 void SyncCullMode();
328 293
@@ -359,72 +324,42 @@ private:
359 /// Syncs the depth test states to match the PICA register 324 /// Syncs the depth test states to match the PICA register
360 void SyncDepthTest(); 325 void SyncDepthTest();
361 326
362 /// Syncs the TEV constant color to match the PICA register
363 void SyncTevConstColor(int tev_index, const Pica::Regs::TevStageConfig& tev_stage);
364
365 /// Syncs the TEV combiner color buffer to match the PICA register 327 /// Syncs the TEV combiner color buffer to match the PICA register
366 void SyncCombinerColor(); 328 void SyncCombinerColor();
367 329
330 /// Syncs the TEV constant color to match the PICA register
331 void SyncTevConstColor(int tev_index, const Pica::Regs::TevStageConfig& tev_stage);
332
368 /// Syncs the lighting global ambient color to match the PICA register 333 /// Syncs the lighting global ambient color to match the PICA register
369 void SyncGlobalAmbient(); 334 void SyncGlobalAmbient();
370 335
371 /// Syncs the lighting lookup tables 336 /// Syncs the lighting lookup tables
372 void SyncLightingLUT(unsigned index); 337 void SyncLightingLUT(unsigned index);
373 338
374 /// Syncs the specified light's diffuse color to match the PICA register
375 void SyncLightDiffuse(int light_index);
376
377 /// Syncs the specified light's ambient color to match the PICA register
378 void SyncLightAmbient(int light_index);
379
380 /// Syncs the specified light's position to match the PICA register
381 void SyncLightPosition(int light_index);
382
383 /// Syncs the specified light's specular 0 color to match the PICA register 339 /// Syncs the specified light's specular 0 color to match the PICA register
384 void SyncLightSpecular0(int light_index); 340 void SyncLightSpecular0(int light_index);
385 341
386 /// Syncs the specified light's specular 1 color to match the PICA register 342 /// Syncs the specified light's specular 1 color to match the PICA register
387 void SyncLightSpecular1(int light_index); 343 void SyncLightSpecular1(int light_index);
388 344
389 /// Syncs the remaining OpenGL drawing state to match the current PICA state 345 /// Syncs the specified light's diffuse color to match the PICA register
390 void SyncDrawState(); 346 void SyncLightDiffuse(int light_index);
391
392 /// Copies the 3DS color framebuffer into the OpenGL color framebuffer texture
393 void ReloadColorBuffer();
394 347
395 /// Copies the 3DS depth framebuffer into the OpenGL depth framebuffer texture 348 /// Syncs the specified light's ambient color to match the PICA register
396 void ReloadDepthBuffer(); 349 void SyncLightAmbient(int light_index);
397 350
398 /** 351 /// Syncs the specified light's position to match the PICA register
399 * Save the current OpenGL color framebuffer to the current PICA framebuffer in 3DS memory 352 void SyncLightPosition(int light_index);
400 * Loads the OpenGL framebuffer textures into temporary buffers
401 * Then copies into the 3DS framebuffer using proper Morton order
402 */
403 void CommitColorBuffer();
404 353
405 /** 354 OpenGLState state;
406 * Save the current OpenGL depth framebuffer to the current PICA framebuffer in 3DS memory
407 * Loads the OpenGL framebuffer textures into temporary buffers
408 * Then copies into the 3DS framebuffer using proper Morton order
409 */
410 void CommitDepthBuffer();
411 355
412 RasterizerCacheOpenGL res_cache; 356 RasterizerCacheOpenGL res_cache;
413 357
414 std::vector<HardwareVertex> vertex_batch; 358 std::vector<HardwareVertex> vertex_batch;
415 359
416 OpenGLState state;
417
418 PAddr cached_fb_color_addr;
419 PAddr cached_fb_depth_addr;
420
421 // Hardware rasterizer
422 std::array<SamplerInfo, 3> texture_samplers;
423 TextureInfo fb_color_texture;
424 DepthTextureInfo fb_depth_texture;
425
426 std::unordered_map<PicaShaderConfig, std::unique_ptr<PicaShader>> shader_cache; 360 std::unordered_map<PicaShaderConfig, std::unique_ptr<PicaShader>> shader_cache;
427 const PicaShader* current_shader = nullptr; 361 const PicaShader* current_shader = nullptr;
362 bool shader_dirty;
428 363
429 struct { 364 struct {
430 UniformData data; 365 UniformData data;
@@ -432,11 +367,12 @@ private:
432 bool dirty; 367 bool dirty;
433 } uniform_block_data; 368 } uniform_block_data;
434 369
370 std::array<SamplerInfo, 3> texture_samplers;
435 OGLVertexArray vertex_array; 371 OGLVertexArray vertex_array;
436 OGLBuffer vertex_buffer; 372 OGLBuffer vertex_buffer;
437 OGLBuffer uniform_buffer; 373 OGLBuffer uniform_buffer;
438 OGLFramebuffer framebuffer; 374 OGLFramebuffer framebuffer;
439 375
440 std::array<OGLTexture, 6> lighting_lut; 376 std::array<OGLTexture, 6> lighting_luts;
441 std::array<std::array<GLvec4, 256>, 6> lighting_lut_data; 377 std::array<std::array<GLvec4, 256>, 6> lighting_lut_data;
442}; 378};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 1323c12e4..7efd0038a 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -2,9 +2,19 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <memory> 5#include <algorithm>
6#include <atomic>
7#include <cstring>
8#include <iterator>
9#include <unordered_set>
10#include <utility>
11#include <vector>
6 12
7#include "common/hash.h" 13#include <glad/glad.h>
14
15#include "common/bit_field.h"
16#include "common/emu_window.h"
17#include "common/logging/log.h"
8#include "common/math_util.h" 18#include "common/math_util.h"
9#include "common/microprofile.h" 19#include "common/microprofile.h"
10#include "common/vector_math.h" 20#include "common/vector_math.h"
@@ -12,71 +22,693 @@
12#include "core/memory.h" 22#include "core/memory.h"
13 23
14#include "video_core/debug_utils/debug_utils.h" 24#include "video_core/debug_utils/debug_utils.h"
25#include "video_core/pica_state.h"
15#include "video_core/renderer_opengl/gl_rasterizer_cache.h" 26#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
16#include "video_core/renderer_opengl/pica_to_gl.h" 27#include "video_core/renderer_opengl/gl_state.h"
28#include "video_core/utils.h"
29#include "video_core/video_core.h"
30
31struct FormatTuple {
32 GLint internal_format;
33 GLenum format;
34 GLenum type;
35};
36
37static const std::array<FormatTuple, 5> fb_format_tuples = {{
38 { GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8 }, // RGBA8
39 { GL_RGB8, GL_BGR, GL_UNSIGNED_BYTE }, // RGB8
40 { GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1 }, // RGB5A1
41 { GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5 }, // RGB565
42 { GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4 }, // RGBA4
43}};
44
45static const std::array<FormatTuple, 4> depth_format_tuples = {{
46 { GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT }, // D16
47 {},
48 { GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT }, // D24
49 { GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8 }, // D24S8
50}};
51
52RasterizerCacheOpenGL::RasterizerCacheOpenGL() {
53 transfer_framebuffers[0].Create();
54 transfer_framebuffers[1].Create();
55}
17 56
18RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { 57RasterizerCacheOpenGL::~RasterizerCacheOpenGL() {
19 InvalidateAll(); 58 FlushAll();
59}
60
61static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width, u32 height, u32 bytes_per_pixel, u32 gl_bytes_per_pixel, u8* morton_data, u8* gl_data, bool morton_to_gl) {
62 using PixelFormat = CachedSurface::PixelFormat;
63
64 u8* data_ptrs[2];
65 u32 depth_stencil_shifts[2] = {24, 8};
66
67 if (morton_to_gl) {
68 std::swap(depth_stencil_shifts[0], depth_stencil_shifts[1]);
69 }
70
71 if (pixel_format == PixelFormat::D24S8) {
72 for (unsigned y = 0; y < height; ++y) {
73 for (unsigned x = 0; x < width; ++x) {
74 const u32 coarse_y = y & ~7;
75 u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel;
76 u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel;
77
78 data_ptrs[morton_to_gl] = morton_data + morton_offset;
79 data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index];
80
81 // Swap depth and stencil value ordering since 3DS does not match OpenGL
82 u32 depth_stencil;
83 memcpy(&depth_stencil, data_ptrs[1], sizeof(u32));
84 depth_stencil = (depth_stencil << depth_stencil_shifts[0]) | (depth_stencil >> depth_stencil_shifts[1]);
85
86 memcpy(data_ptrs[0], &depth_stencil, sizeof(u32));
87 }
88 }
89 } else {
90 for (unsigned y = 0; y < height; ++y) {
91 for (unsigned x = 0; x < width; ++x) {
92 const u32 coarse_y = y & ~7;
93 u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel;
94 u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel;
95
96 data_ptrs[morton_to_gl] = morton_data + morton_offset;
97 data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index];
98
99 memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
100 }
101 }
102 }
103}
104
105bool RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, CachedSurface::SurfaceType type, const MathUtil::Rectangle<int>& src_rect, const MathUtil::Rectangle<int>& dst_rect) {
106 using SurfaceType = CachedSurface::SurfaceType;
107
108 OpenGLState cur_state = OpenGLState::GetCurState();
109
110 // Make sure textures aren't bound to texture units, since going to bind them to framebuffer components
111 OpenGLState::ResetTexture(src_tex);
112 OpenGLState::ResetTexture(dst_tex);
113
114 // Keep track of previous framebuffer bindings
115 GLuint old_fbs[2] = { cur_state.draw.read_framebuffer, cur_state.draw.draw_framebuffer };
116 cur_state.draw.read_framebuffer = transfer_framebuffers[0].handle;
117 cur_state.draw.draw_framebuffer = transfer_framebuffers[1].handle;
118 cur_state.Apply();
119
120 u32 buffers = 0;
121
122 if (type == SurfaceType::Color || type == SurfaceType::Texture) {
123 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex, 0);
124 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
125
126 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex, 0);
127 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
128
129 buffers = GL_COLOR_BUFFER_BIT;
130 } else if (type == SurfaceType::Depth) {
131 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
132 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0);
133 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
134
135 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
136 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0);
137 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
138
139 buffers = GL_DEPTH_BUFFER_BIT;
140 } else if (type == SurfaceType::DepthStencil) {
141 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
142 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0);
143
144 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
145 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0);
146
147 buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
148 }
149
150 if (OpenGLState::CheckFBStatus(GL_READ_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
151 return false;
152 }
153
154 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
155 return false;
156 }
157
158 glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
159 dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom,
160 buffers, buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST);
161
162 // Restore previous framebuffer bindings
163 cur_state.draw.read_framebuffer = old_fbs[0];
164 cur_state.draw.draw_framebuffer = old_fbs[1];
165 cur_state.Apply();
166
167 return true;
168}
169
170bool RasterizerCacheOpenGL::TryBlitSurfaces(CachedSurface* src_surface, const MathUtil::Rectangle<int>& src_rect, CachedSurface* dst_surface, const MathUtil::Rectangle<int>& dst_rect) {
171 using SurfaceType = CachedSurface::SurfaceType;
172
173 if (!CachedSurface::CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format)) {
174 return false;
175 }
176
177 return BlitTextures(src_surface->texture.handle, dst_surface->texture.handle, CachedSurface::GetFormatType(src_surface->pixel_format), src_rect, dst_rect);
178}
179
180static void AllocateSurfaceTexture(GLuint texture, CachedSurface::PixelFormat pixel_format, u32 width, u32 height) {
181 // Allocate an uninitialized texture of appropriate size and format for the surface
182 using SurfaceType = CachedSurface::SurfaceType;
183
184 OpenGLState cur_state = OpenGLState::GetCurState();
185
186 // Keep track of previous texture bindings
187 GLuint old_tex = cur_state.texture_units[0].texture_2d;
188 cur_state.texture_units[0].texture_2d = texture;
189 cur_state.Apply();
190 glActiveTexture(GL_TEXTURE0);
191
192 SurfaceType type = CachedSurface::GetFormatType(pixel_format);
193
194 FormatTuple tuple;
195 if (type == SurfaceType::Color) {
196 ASSERT((size_t)pixel_format < fb_format_tuples.size());
197 tuple = fb_format_tuples[(unsigned int)pixel_format];
198 } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) {
199 size_t tuple_idx = (size_t)pixel_format - 14;
200 ASSERT(tuple_idx < depth_format_tuples.size());
201 tuple = depth_format_tuples[tuple_idx];
202 } else {
203 tuple = { GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE };
204 }
205
206 glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, width, height, 0,
207 tuple.format, tuple.type, nullptr);
208
209 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
210 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
211 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
212 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
213
214 // Restore previous texture bindings
215 cur_state.texture_units[0].texture_2d = old_tex;
216 cur_state.Apply();
20} 217}
21 218
22MICROPROFILE_DEFINE(OpenGL_TextureUpload, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192)); 219MICROPROFILE_DEFINE(OpenGL_SurfaceUpload, "OpenGL", "Surface Upload", MP_RGB(128, 64, 192));
220CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bool match_res_scale, bool load_if_create) {
221 using PixelFormat = CachedSurface::PixelFormat;
222 using SurfaceType = CachedSurface::SurfaceType;
223
224 if (params.addr == 0) {
225 return nullptr;
226 }
227
228 u32 params_size = params.width * params.height * CachedSurface::GetFormatBpp(params.pixel_format) / 8;
229
230 // Check for an exact match in existing surfaces
231 CachedSurface* best_exact_surface = nullptr;
232 float exact_surface_goodness = -1.f;
233
234 auto surface_interval = boost::icl::interval<PAddr>::right_open(params.addr, params.addr + params_size);
235 auto range = surface_cache.equal_range(surface_interval);
236 for (auto it = range.first; it != range.second; ++it) {
237 for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) {
238 CachedSurface* surface = it2->get();
239
240 // Check if the request matches the surface exactly
241 if (params.addr == surface->addr &&
242 params.width == surface->width && params.height == surface->height &&
243 params.pixel_format == surface->pixel_format)
244 {
245 // Make sure optional param-matching criteria are fulfilled
246 bool tiling_match = (params.is_tiled == surface->is_tiled);
247 bool res_scale_match = (params.res_scale_width == surface->res_scale_width && params.res_scale_height == surface->res_scale_height);
248 if (!match_res_scale || res_scale_match) {
249 // Prioritize same-tiling and highest resolution surfaces
250 float match_goodness = (float)tiling_match + surface->res_scale_width * surface->res_scale_height;
251 if (match_goodness > exact_surface_goodness || surface->dirty) {
252 exact_surface_goodness = match_goodness;
253 best_exact_surface = surface;
254 }
255 }
256 }
257 }
258 }
259
260 // Return the best exact surface if found
261 if (best_exact_surface != nullptr) {
262 return best_exact_surface;
263 }
264
265 // No matching surfaces found, so create a new one
266 u8* texture_src_data = Memory::GetPhysicalPointer(params.addr);
267 if (texture_src_data == nullptr) {
268 return nullptr;
269 }
270
271 MICROPROFILE_SCOPE(OpenGL_SurfaceUpload);
272
273 std::shared_ptr<CachedSurface> new_surface = std::make_shared<CachedSurface>();
23 274
24void RasterizerCacheOpenGL::LoadAndBindTexture(OpenGLState &state, unsigned texture_unit, const Pica::DebugUtils::TextureInfo& info) { 275 new_surface->addr = params.addr;
25 const auto cached_texture = texture_cache.find(info.physical_address); 276 new_surface->size = params_size;
26 277
27 if (cached_texture != texture_cache.end()) { 278 new_surface->texture.Create();
28 state.texture_units[texture_unit].texture_2d = cached_texture->second->texture.handle; 279 new_surface->width = params.width;
29 state.Apply(); 280 new_surface->height = params.height;
281 new_surface->stride = params.stride;
282 new_surface->res_scale_width = params.res_scale_width;
283 new_surface->res_scale_height = params.res_scale_height;
284
285 new_surface->is_tiled = params.is_tiled;
286 new_surface->pixel_format = params.pixel_format;
287 new_surface->dirty = false;
288
289 if (!load_if_create) {
290 // Don't load any data; just allocate the surface's texture
291 AllocateSurfaceTexture(new_surface->texture.handle, new_surface->pixel_format, new_surface->GetScaledWidth(), new_surface->GetScaledHeight());
30 } else { 292 } else {
31 MICROPROFILE_SCOPE(OpenGL_TextureUpload); 293 // TODO: Consider attempting subrect match in existing surfaces and direct blit here instead of memory upload below if that's a common scenario in some game
294
295 Memory::RasterizerFlushRegion(params.addr, params_size);
296
297 // Load data from memory to the new surface
298 OpenGLState cur_state = OpenGLState::GetCurState();
299
300 GLuint old_tex = cur_state.texture_units[0].texture_2d;
301 cur_state.texture_units[0].texture_2d = new_surface->texture.handle;
302 cur_state.Apply();
303 glActiveTexture(GL_TEXTURE0);
304
305 glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)new_surface->stride);
306 if (!new_surface->is_tiled) {
307 // TODO: Ensure this will always be a color format, not a depth or other format
308 ASSERT((size_t)new_surface->pixel_format < fb_format_tuples.size());
309 const FormatTuple& tuple = fb_format_tuples[(unsigned int)params.pixel_format];
310
311 glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0,
312 tuple.format, tuple.type, texture_src_data);
313 } else {
314 SurfaceType type = CachedSurface::GetFormatType(new_surface->pixel_format);
315 if (type != SurfaceType::Depth && type != SurfaceType::DepthStencil) {
316 FormatTuple tuple;
317 if ((size_t)params.pixel_format < fb_format_tuples.size()) {
318 tuple = fb_format_tuples[(unsigned int)params.pixel_format];
319 } else {
320 // Texture
321 tuple = { GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE };
322 }
323
324 std::vector<Math::Vec4<u8>> tex_buffer(params.width * params.height);
32 325
33 std::unique_ptr<CachedTexture> new_texture = std::make_unique<CachedTexture>(); 326 Pica::DebugUtils::TextureInfo tex_info;
327 tex_info.width = params.width;
328 tex_info.height = params.height;
329 tex_info.stride = params.width * CachedSurface::GetFormatBpp(params.pixel_format) / 8;
330 tex_info.format = (Pica::Regs::TextureFormat)params.pixel_format;
331 tex_info.physical_address = params.addr;
34 332
35 new_texture->texture.Create(); 333 for (unsigned y = 0; y < params.height; ++y) {
36 state.texture_units[texture_unit].texture_2d = new_texture->texture.handle; 334 for (unsigned x = 0; x < params.width; ++x) {
37 state.Apply(); 335 tex_buffer[x + params.width * y] = Pica::DebugUtils::LookupTexture(texture_src_data, x, params.height - 1 - y, tex_info);
38 glActiveTexture(GL_TEXTURE0 + texture_unit); 336 }
337 }
39 338
40 u8* texture_src_data = Memory::GetPhysicalPointer(info.physical_address); 339 glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0, GL_RGBA, GL_UNSIGNED_BYTE, tex_buffer.data());
340 } else {
341 // Depth/Stencil formats need special treatment since they aren't sampleable using LookupTexture and can't use RGBA format
342 size_t tuple_idx = (size_t)params.pixel_format - 14;
343 ASSERT(tuple_idx < depth_format_tuples.size());
344 const FormatTuple& tuple = depth_format_tuples[tuple_idx];
41 345
42 new_texture->width = info.width; 346 u32 bytes_per_pixel = CachedSurface::GetFormatBpp(params.pixel_format) / 8;
43 new_texture->height = info.height;
44 new_texture->size = info.stride * info.height;
45 new_texture->addr = info.physical_address;
46 new_texture->hash = Common::ComputeHash64(texture_src_data, new_texture->size);
47 347
48 std::unique_ptr<Math::Vec4<u8>[]> temp_texture_buffer_rgba(new Math::Vec4<u8>[info.width * info.height]); 348 // OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type
349 bool use_4bpp = (params.pixel_format == PixelFormat::D24);
49 350
50 for (int y = 0; y < info.height; ++y) { 351 u32 gl_bytes_per_pixel = use_4bpp ? 4 : bytes_per_pixel;
51 for (int x = 0; x < info.width; ++x) { 352
52 temp_texture_buffer_rgba[x + info.width * y] = Pica::DebugUtils::LookupTexture(texture_src_data, x, info.height - 1 - y, info); 353 std::vector<u8> temp_fb_depth_buffer(params.width * params.height * gl_bytes_per_pixel);
354
355 u8* temp_fb_depth_buffer_ptr = use_4bpp ? temp_fb_depth_buffer.data() + 1 : temp_fb_depth_buffer.data();
356
357 MortonCopyPixels(params.pixel_format, params.width, params.height, bytes_per_pixel, gl_bytes_per_pixel, texture_src_data, temp_fb_depth_buffer_ptr, true);
358
359 glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0,
360 tuple.format, tuple.type, temp_fb_depth_buffer.data());
53 } 361 }
54 } 362 }
363 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
364
365 // If not 1x scale, blit 1x texture to a new scaled texture and replace texture in surface
366 if (new_surface->res_scale_width != 1.f || new_surface->res_scale_height != 1.f) {
367 OGLTexture scaled_texture;
368 scaled_texture.Create();
369
370 AllocateSurfaceTexture(scaled_texture.handle, new_surface->pixel_format, new_surface->GetScaledWidth(), new_surface->GetScaledHeight());
371 BlitTextures(new_surface->texture.handle, scaled_texture.handle, CachedSurface::GetFormatType(new_surface->pixel_format),
372 MathUtil::Rectangle<int>(0, 0, new_surface->width, new_surface->height),
373 MathUtil::Rectangle<int>(0, 0, new_surface->GetScaledWidth(), new_surface->GetScaledHeight()));
374
375 new_surface->texture.Release();
376 new_surface->texture.handle = scaled_texture.handle;
377 scaled_texture.handle = 0;
378 cur_state.texture_units[0].texture_2d = new_surface->texture.handle;
379 cur_state.Apply();
380 }
55 381
56 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, info.width, info.height, 0, GL_RGBA, GL_UNSIGNED_BYTE, temp_texture_buffer_rgba.get()); 382 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
383 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
384 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
385 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
57 386
58 texture_cache.emplace(info.physical_address, std::move(new_texture)); 387 cur_state.texture_units[0].texture_2d = old_tex;
388 cur_state.Apply();
59 } 389 }
390
391 Memory::RasterizerMarkRegionCached(new_surface->addr, new_surface->size, 1);
392 surface_cache.add(std::make_pair(boost::icl::interval<PAddr>::right_open(new_surface->addr, new_surface->addr + new_surface->size), std::set<std::shared_ptr<CachedSurface>>({ new_surface })));
393 return new_surface.get();
60} 394}
61 395
62void RasterizerCacheOpenGL::InvalidateInRange(PAddr addr, u32 size, bool ignore_hash) { 396CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params, bool match_res_scale, bool load_if_create, MathUtil::Rectangle<int>& out_rect) {
63 // TODO: Optimize by also inserting upper bound (addr + size) of each texture into the same map and also narrow using lower_bound 397 if (params.addr == 0) {
64 auto cache_upper_bound = texture_cache.upper_bound(addr + size); 398 return nullptr;
399 }
400
401 u32 total_pixels = params.width * params.height;
402 u32 params_size = total_pixels * CachedSurface::GetFormatBpp(params.pixel_format) / 8;
65 403
66 for (auto it = texture_cache.begin(); it != cache_upper_bound;) { 404 // Attempt to find encompassing surfaces
67 const auto& info = *it->second; 405 CachedSurface* best_subrect_surface = nullptr;
406 float subrect_surface_goodness = -1.f;
68 407
69 // Flush the texture only if the memory region intersects and a change is detected 408 auto surface_interval = boost::icl::interval<PAddr>::right_open(params.addr, params.addr + params_size);
70 if (MathUtil::IntervalsIntersect(addr, size, info.addr, info.size) && 409 auto cache_upper_bound = surface_cache.upper_bound(surface_interval);
71 (ignore_hash || info.hash != Common::ComputeHash64(Memory::GetPhysicalPointer(info.addr), info.size))) { 410 for (auto it = surface_cache.lower_bound(surface_interval); it != cache_upper_bound; ++it) {
411 for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) {
412 CachedSurface* surface = it2->get();
72 413
73 it = texture_cache.erase(it); 414 // Check if the request is contained in the surface
415 if (params.addr >= surface->addr &&
416 params.addr + params_size - 1 <= surface->addr + surface->size - 1 &&
417 params.pixel_format == surface->pixel_format)
418 {
419 // Make sure optional param-matching criteria are fulfilled
420 bool tiling_match = (params.is_tiled == surface->is_tiled);
421 bool res_scale_match = (params.res_scale_width == surface->res_scale_width && params.res_scale_height == surface->res_scale_height);
422 if (!match_res_scale || res_scale_match) {
423 // Prioritize same-tiling and highest resolution surfaces
424 float match_goodness = (float)tiling_match + surface->res_scale_width * surface->res_scale_height;
425 if (match_goodness > subrect_surface_goodness || surface->dirty) {
426 subrect_surface_goodness = match_goodness;
427 best_subrect_surface = surface;
428 }
429 }
430 }
431 }
432 }
433
434 // Return the best subrect surface if found
435 if (best_subrect_surface != nullptr) {
436 unsigned int bytes_per_pixel = (CachedSurface::GetFormatBpp(best_subrect_surface->pixel_format) / 8);
437
438 int x0, y0;
439
440 if (!params.is_tiled) {
441 u32 begin_pixel_index = (params.addr - best_subrect_surface->addr) / bytes_per_pixel;
442 x0 = begin_pixel_index % best_subrect_surface->width;
443 y0 = begin_pixel_index / best_subrect_surface->width;
444
445 out_rect = MathUtil::Rectangle<int>(x0, y0, x0 + params.width, y0 + params.height);
446 } else {
447 u32 bytes_per_tile = 8 * 8 * bytes_per_pixel;
448 u32 tiles_per_row = best_subrect_surface->width / 8;
449
450 u32 begin_tile_index = (params.addr - best_subrect_surface->addr) / bytes_per_tile;
451 x0 = begin_tile_index % tiles_per_row * 8;
452 y0 = begin_tile_index / tiles_per_row * 8;
453
454 // Tiled surfaces are flipped vertically in the rasterizer vs. 3DS memory.
455 out_rect = MathUtil::Rectangle<int>(x0, best_subrect_surface->height - y0, x0 + params.width, best_subrect_surface->height - (y0 + params.height));
456 }
457
458 out_rect.left = (int)(out_rect.left * best_subrect_surface->res_scale_width);
459 out_rect.right = (int)(out_rect.right * best_subrect_surface->res_scale_width);
460 out_rect.top = (int)(out_rect.top * best_subrect_surface->res_scale_height);
461 out_rect.bottom = (int)(out_rect.bottom * best_subrect_surface->res_scale_height);
462
463 return best_subrect_surface;
464 }
465
466 // No subrect found - create and return a new surface
467 if (!params.is_tiled) {
468 out_rect = MathUtil::Rectangle<int>(0, 0, (int)(params.width * params.res_scale_width), (int)(params.height * params.res_scale_height));
469 } else {
470 out_rect = MathUtil::Rectangle<int>(0, (int)(params.height * params.res_scale_height), (int)(params.width * params.res_scale_width), 0);
471 }
472
473 return GetSurface(params, match_res_scale, load_if_create);
474}
475
476CachedSurface* RasterizerCacheOpenGL::GetTextureSurface(const Pica::Regs::FullTextureConfig& config) {
477 Pica::DebugUtils::TextureInfo info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config.config, config.format);
478
479 CachedSurface params;
480 params.addr = info.physical_address;
481 params.width = info.width;
482 params.height = info.height;
483 params.is_tiled = true;
484 params.pixel_format = CachedSurface::PixelFormatFromTextureFormat(info.format);
485 return GetSurface(params, false, true);
486}
487
488std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> RasterizerCacheOpenGL::GetFramebufferSurfaces(const Pica::Regs::FramebufferConfig& config) {
489 const auto& regs = Pica::g_state.regs;
490
491 // Make sur that framebuffers don't overlap if both color and depth are being used
492 u32 fb_area = config.GetWidth() * config.GetHeight();
493 bool framebuffers_overlap = config.GetColorBufferPhysicalAddress() != 0 &&
494 config.GetDepthBufferPhysicalAddress() != 0 &&
495 MathUtil::IntervalsIntersect(config.GetColorBufferPhysicalAddress(), fb_area * GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(config.color_format.Value())),
496 config.GetDepthBufferPhysicalAddress(), fb_area * Pica::Regs::BytesPerDepthPixel(config.depth_format));
497 bool using_color_fb = config.GetColorBufferPhysicalAddress() != 0;
498 bool using_depth_fb = config.GetDepthBufferPhysicalAddress() != 0 && (regs.output_merger.depth_test_enable || regs.output_merger.depth_write_enable || !framebuffers_overlap);
499
500 if (framebuffers_overlap && using_color_fb && using_depth_fb) {
501 LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; overlapping framebuffers not supported!");
502 using_depth_fb = false;
503 }
504
505 // get color and depth surfaces
506 CachedSurface color_params;
507 CachedSurface depth_params;
508 color_params.width = depth_params.width = config.GetWidth();
509 color_params.height = depth_params.height = config.GetHeight();
510 color_params.is_tiled = depth_params.is_tiled = true;
511 if (VideoCore::g_scaled_resolution_enabled) {
512 auto layout = VideoCore::g_emu_window->GetFramebufferLayout();
513
514 // Assume same scaling factor for top and bottom screens
515 color_params.res_scale_width = depth_params.res_scale_width = (float)layout.top_screen.GetWidth() / VideoCore::kScreenTopWidth;
516 color_params.res_scale_height = depth_params.res_scale_height = (float)layout.top_screen.GetHeight() / VideoCore::kScreenTopHeight;
517 }
518
519 color_params.addr = config.GetColorBufferPhysicalAddress();
520 color_params.pixel_format = CachedSurface::PixelFormatFromColorFormat(config.color_format);
521
522 depth_params.addr = config.GetDepthBufferPhysicalAddress();
523 depth_params.pixel_format = CachedSurface::PixelFormatFromDepthFormat(config.depth_format);
524
525 MathUtil::Rectangle<int> color_rect;
526 CachedSurface* color_surface = using_color_fb ? GetSurfaceRect(color_params, true, true, color_rect) : nullptr;
527
528 MathUtil::Rectangle<int> depth_rect;
529 CachedSurface* depth_surface = using_depth_fb ? GetSurfaceRect(depth_params, true, true, depth_rect) : nullptr;
530
531 // Sanity check to make sure found surfaces aren't the same
532 if (using_depth_fb && using_color_fb && color_surface == depth_surface) {
533 LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer surfaces overlap; overlapping surfaces not supported!");
534 using_depth_fb = false;
535 depth_surface = nullptr;
536 }
537
538 MathUtil::Rectangle<int> rect;
539
540 if (color_surface != nullptr && depth_surface != nullptr && (depth_rect.left != color_rect.left || depth_rect.top != color_rect.top)) {
541 // Can't specify separate color and depth viewport offsets in OpenGL, so re-zero both if they don't match
542 if (color_rect.left != 0 || color_rect.top != 0) {
543 color_surface = GetSurface(color_params, true, true);
544 }
545
546 if (depth_rect.left != 0 || depth_rect.top != 0) {
547 depth_surface = GetSurface(depth_params, true, true);
548 }
549
550 if (!color_surface->is_tiled) {
551 rect = MathUtil::Rectangle<int>(0, 0, (int)(color_params.width * color_params.res_scale_width), (int)(color_params.height * color_params.res_scale_height));
74 } else { 552 } else {
75 ++it; 553 rect = MathUtil::Rectangle<int>(0, (int)(color_params.height * color_params.res_scale_height), (int)(color_params.width * color_params.res_scale_width), 0);
76 } 554 }
555 } else if (color_surface != nullptr) {
556 rect = color_rect;
557 } else if (depth_surface != nullptr) {
558 rect = depth_rect;
559 } else {
560 rect = MathUtil::Rectangle<int>(0, 0, 0, 0);
77 } 561 }
562
563 return std::make_tuple(color_surface, depth_surface, rect);
78} 564}
79 565
80void RasterizerCacheOpenGL::InvalidateAll() { 566CachedSurface* RasterizerCacheOpenGL::TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config) {
81 texture_cache.clear(); 567 auto surface_interval = boost::icl::interval<PAddr>::right_open(config.GetStartAddress(), config.GetEndAddress());
568 auto range = surface_cache.equal_range(surface_interval);
569 for (auto it = range.first; it != range.second; ++it) {
570 for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) {
571 int bits_per_value = 0;
572 if (config.fill_24bit) {
573 bits_per_value = 24;
574 } else if (config.fill_32bit) {
575 bits_per_value = 32;
576 } else {
577 bits_per_value = 16;
578 }
579
580 CachedSurface* surface = it2->get();
581
582 if (surface->addr == config.GetStartAddress() &&
583 CachedSurface::GetFormatBpp(surface->pixel_format) == bits_per_value &&
584 (surface->width * surface->height * CachedSurface::GetFormatBpp(surface->pixel_format) / 8) == (config.GetEndAddress() - config.GetStartAddress()))
585 {
586 return surface;
587 }
588 }
589 }
590
591 return nullptr;
592}
593
594MICROPROFILE_DEFINE(OpenGL_SurfaceDownload, "OpenGL", "Surface Download", MP_RGB(128, 192, 64));
595void RasterizerCacheOpenGL::FlushSurface(CachedSurface* surface) {
596 using PixelFormat = CachedSurface::PixelFormat;
597 using SurfaceType = CachedSurface::SurfaceType;
598
599 if (!surface->dirty) {
600 return;
601 }
602
603 MICROPROFILE_SCOPE(OpenGL_SurfaceDownload);
604
605 u8* dst_buffer = Memory::GetPhysicalPointer(surface->addr);
606 if (dst_buffer == nullptr) {
607 return;
608 }
609
610 OpenGLState cur_state = OpenGLState::GetCurState();
611 GLuint old_tex = cur_state.texture_units[0].texture_2d;
612
613 OGLTexture unscaled_tex;
614 GLuint texture_to_flush = surface->texture.handle;
615
616 // If not 1x scale, blit scaled texture to a new 1x texture and use that to flush
617 if (surface->res_scale_width != 1.f || surface->res_scale_height != 1.f) {
618 unscaled_tex.Create();
619
620 AllocateSurfaceTexture(unscaled_tex.handle, surface->pixel_format, surface->width, surface->height);
621 BlitTextures(surface->texture.handle, unscaled_tex.handle, CachedSurface::GetFormatType(surface->pixel_format),
622 MathUtil::Rectangle<int>(0, 0, surface->GetScaledWidth(), surface->GetScaledHeight()),
623 MathUtil::Rectangle<int>(0, 0, surface->width, surface->height));
624
625 texture_to_flush = unscaled_tex.handle;
626 }
627
628 cur_state.texture_units[0].texture_2d = texture_to_flush;
629 cur_state.Apply();
630 glActiveTexture(GL_TEXTURE0);
631
632 glPixelStorei(GL_PACK_ROW_LENGTH, (GLint)surface->stride);
633 if (!surface->is_tiled) {
634 // TODO: Ensure this will always be a color format, not a depth or other format
635 ASSERT((size_t)surface->pixel_format < fb_format_tuples.size());
636 const FormatTuple& tuple = fb_format_tuples[(unsigned int)surface->pixel_format];
637
638 glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, dst_buffer);
639 } else {
640 SurfaceType type = CachedSurface::GetFormatType(surface->pixel_format);
641 if (type != SurfaceType::Depth && type != SurfaceType::DepthStencil) {
642 ASSERT((size_t)surface->pixel_format < fb_format_tuples.size());
643 const FormatTuple& tuple = fb_format_tuples[(unsigned int)surface->pixel_format];
644
645 u32 bytes_per_pixel = CachedSurface::GetFormatBpp(surface->pixel_format) / 8;
646
647 std::vector<u8> temp_gl_buffer(surface->width * surface->height * bytes_per_pixel);
648
649 glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data());
650
651 // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion is necessary.
652 MortonCopyPixels(surface->pixel_format, surface->width, surface->height, bytes_per_pixel, bytes_per_pixel, dst_buffer, temp_gl_buffer.data(), false);
653 } else {
654 // Depth/Stencil formats need special treatment since they aren't sampleable using LookupTexture and can't use RGBA format
655 size_t tuple_idx = (size_t)surface->pixel_format - 14;
656 ASSERT(tuple_idx < depth_format_tuples.size());
657 const FormatTuple& tuple = depth_format_tuples[tuple_idx];
658
659 u32 bytes_per_pixel = CachedSurface::GetFormatBpp(surface->pixel_format) / 8;
660
661 // OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type
662 bool use_4bpp = (surface->pixel_format == PixelFormat::D24);
663
664 u32 gl_bytes_per_pixel = use_4bpp ? 4 : bytes_per_pixel;
665
666 std::vector<u8> temp_gl_buffer(surface->width * surface->height * gl_bytes_per_pixel);
667
668 glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data());
669
670 u8* temp_gl_buffer_ptr = use_4bpp ? temp_gl_buffer.data() + 1 : temp_gl_buffer.data();
671
672 MortonCopyPixels(surface->pixel_format, surface->width, surface->height, bytes_per_pixel, gl_bytes_per_pixel, dst_buffer, temp_gl_buffer_ptr, false);
673 }
674 }
675 glPixelStorei(GL_PACK_ROW_LENGTH, 0);
676
677 surface->dirty = false;
678
679 cur_state.texture_units[0].texture_2d = old_tex;
680 cur_state.Apply();
681}
682
683void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, bool invalidate) {
684 if (size == 0) {
685 return;
686 }
687
688 // Gather up unique surfaces that touch the region
689 std::unordered_set<std::shared_ptr<CachedSurface>> touching_surfaces;
690
691 auto surface_interval = boost::icl::interval<PAddr>::right_open(addr, addr + size);
692 auto cache_upper_bound = surface_cache.upper_bound(surface_interval);
693 for (auto it = surface_cache.lower_bound(surface_interval); it != cache_upper_bound; ++it) {
694 std::copy_if(it->second.begin(), it->second.end(), std::inserter(touching_surfaces, touching_surfaces.end()),
695 [skip_surface](std::shared_ptr<CachedSurface> surface) { return (surface.get() != skip_surface); });
696 }
697
698 // Flush and invalidate surfaces
699 for (auto surface : touching_surfaces) {
700 FlushSurface(surface.get());
701 if (invalidate) {
702 Memory::RasterizerMarkRegionCached(surface->addr, surface->size, -1);
703 surface_cache.subtract(std::make_pair(boost::icl::interval<PAddr>::right_open(surface->addr, surface->addr + surface->size), std::set<std::shared_ptr<CachedSurface>>({ surface })));
704 }
705 }
706}
707
708void RasterizerCacheOpenGL::FlushAll() {
709 for (auto& surfaces : surface_cache) {
710 for (auto& surface : surfaces.second) {
711 FlushSurface(surface.get());
712 }
713 }
82} 714}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index b69651427..225596415 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -4,40 +4,219 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <map> 7#include <array>
8#include <memory> 8#include <memory>
9#include <set>
10#include <tuple>
11
12#include <boost/icl/interval_map.hpp>
13#include <glad/glad.h>
14
15#include "common/assert.h"
16#include "common/common_funcs.h"
17#include "common/common_types.h"
18
19#include "core/hw/gpu.h"
9 20
10#include "video_core/pica.h" 21#include "video_core/pica.h"
11#include "video_core/debug_utils/debug_utils.h"
12#include "video_core/renderer_opengl/gl_resource_manager.h" 22#include "video_core/renderer_opengl/gl_resource_manager.h"
13#include "video_core/renderer_opengl/gl_state.h" 23
24namespace MathUtil {
25template <class T> struct Rectangle;
26}
27
28struct CachedSurface;
29
30using SurfaceCache = boost::icl::interval_map<PAddr, std::set<std::shared_ptr<CachedSurface>>>;
31
32struct CachedSurface {
33 enum class PixelFormat {
34 // First 5 formats are shared between textures and color buffers
35 RGBA8 = 0,
36 RGB8 = 1,
37 RGB5A1 = 2,
38 RGB565 = 3,
39 RGBA4 = 4,
40
41 // Texture-only formats
42 IA8 = 5,
43 RG8 = 6,
44 I8 = 7,
45 A8 = 8,
46 IA4 = 9,
47 I4 = 10,
48 A4 = 11,
49 ETC1 = 12,
50 ETC1A4 = 13,
51
52 // Depth buffer-only formats
53 D16 = 14,
54 // gap
55 D24 = 16,
56 D24S8 = 17,
57
58 Invalid = 255,
59 };
60
61 enum class SurfaceType {
62 Color = 0,
63 Texture = 1,
64 Depth = 2,
65 DepthStencil = 3,
66 Invalid = 4,
67 };
68
69 static unsigned int GetFormatBpp(CachedSurface::PixelFormat format) {
70 static const std::array<unsigned int, 18> bpp_table = {
71 32, // RGBA8
72 24, // RGB8
73 16, // RGB5A1
74 16, // RGB565
75 16, // RGBA4
76 16, // IA8
77 16, // RG8
78 8, // I8
79 8, // A8
80 8, // IA4
81 4, // I4
82 4, // A4
83 4, // ETC1
84 8, // ETC1A4
85 16, // D16
86 0,
87 24, // D24
88 32, // D24S8
89 };
90
91 ASSERT((unsigned int)format < ARRAY_SIZE(bpp_table));
92 return bpp_table[(unsigned int)format];
93 }
94
95 static PixelFormat PixelFormatFromTextureFormat(Pica::Regs::TextureFormat format) {
96 return ((unsigned int)format < 14) ? (PixelFormat)format : PixelFormat::Invalid;
97 }
98
99 static PixelFormat PixelFormatFromColorFormat(Pica::Regs::ColorFormat format) {
100 return ((unsigned int)format < 5) ? (PixelFormat)format : PixelFormat::Invalid;
101 }
102
103 static PixelFormat PixelFormatFromDepthFormat(Pica::Regs::DepthFormat format) {
104 return ((unsigned int)format < 4) ? (PixelFormat)((unsigned int)format + 14) : PixelFormat::Invalid;
105 }
106
107 static PixelFormat PixelFormatFromGPUPixelFormat(GPU::Regs::PixelFormat format) {
108 switch (format) {
109 // RGB565 and RGB5A1 are switched in PixelFormat compared to ColorFormat
110 case GPU::Regs::PixelFormat::RGB565:
111 return PixelFormat::RGB565;
112 case GPU::Regs::PixelFormat::RGB5A1:
113 return PixelFormat::RGB5A1;
114 default:
115 return ((unsigned int)format < 5) ? (PixelFormat)format : PixelFormat::Invalid;
116 }
117 }
118
119 static bool CheckFormatsBlittable(PixelFormat pixel_format_a, PixelFormat pixel_format_b) {
120 SurfaceType a_type = GetFormatType(pixel_format_a);
121 SurfaceType b_type = GetFormatType(pixel_format_b);
122
123 if ((a_type == SurfaceType::Color || a_type == SurfaceType::Texture) && (b_type == SurfaceType::Color || b_type == SurfaceType::Texture)) {
124 return true;
125 }
126
127 if (a_type == SurfaceType::Depth && b_type == SurfaceType::Depth) {
128 return true;
129 }
130
131 if (a_type == SurfaceType::DepthStencil && b_type == SurfaceType::DepthStencil) {
132 return true;
133 }
134
135 return false;
136 }
137
138 static SurfaceType GetFormatType(PixelFormat pixel_format) {
139 if ((unsigned int)pixel_format < 5) {
140 return SurfaceType::Color;
141 }
142
143 if ((unsigned int)pixel_format < 14) {
144 return SurfaceType::Texture;
145 }
146
147 if (pixel_format == PixelFormat::D16 || pixel_format == PixelFormat::D24) {
148 return SurfaceType::Depth;
149 }
150
151 if (pixel_format == PixelFormat::D24S8) {
152 return SurfaceType::DepthStencil;
153 }
154
155 return SurfaceType::Invalid;
156 }
157
158 u32 GetScaledWidth() const {
159 return (u32)(width * res_scale_width);
160 }
161
162 u32 GetScaledHeight() const {
163 return (u32)(height * res_scale_height);
164 }
165
166 PAddr addr;
167 u32 size;
168
169 PAddr min_valid;
170 PAddr max_valid;
171
172 OGLTexture texture;
173 u32 width;
174 u32 height;
175 u32 stride = 0;
176 float res_scale_width = 1.f;
177 float res_scale_height = 1.f;
178
179 bool is_tiled;
180 PixelFormat pixel_format;
181 bool dirty;
182};
14 183
15class RasterizerCacheOpenGL : NonCopyable { 184class RasterizerCacheOpenGL : NonCopyable {
16public: 185public:
186 RasterizerCacheOpenGL();
17 ~RasterizerCacheOpenGL(); 187 ~RasterizerCacheOpenGL();
18 188
189 /// Blits one texture to another
190 bool BlitTextures(GLuint src_tex, GLuint dst_tex, CachedSurface::SurfaceType type, const MathUtil::Rectangle<int>& src_rect, const MathUtil::Rectangle<int>& dst_rect);
191
192 /// Attempt to blit one surface's texture to another
193 bool TryBlitSurfaces(CachedSurface* src_surface, const MathUtil::Rectangle<int>& src_rect, CachedSurface* dst_surface, const MathUtil::Rectangle<int>& dst_rect);
194
19 /// Loads a texture from 3DS memory to OpenGL and caches it (if not already cached) 195 /// Loads a texture from 3DS memory to OpenGL and caches it (if not already cached)
20 void LoadAndBindTexture(OpenGLState &state, unsigned texture_unit, const Pica::DebugUtils::TextureInfo& info); 196 CachedSurface* GetSurface(const CachedSurface& params, bool match_res_scale, bool load_if_create);
21 197
22 void LoadAndBindTexture(OpenGLState &state, unsigned texture_unit, const Pica::Regs::FullTextureConfig& config) { 198 /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from 3DS memory to OpenGL and caches it (if not already cached)
23 LoadAndBindTexture(state, texture_unit, Pica::DebugUtils::TextureInfo::FromPicaRegister(config.config, config.format)); 199 CachedSurface* GetSurfaceRect(const CachedSurface& params, bool match_res_scale, bool load_if_create, MathUtil::Rectangle<int>& out_rect);
24 }
25 200
26 /// Invalidate any cached resource intersecting the specified region. 201 /// Gets a surface based on the texture configuration
27 void InvalidateInRange(PAddr addr, u32 size, bool ignore_hash = false); 202 CachedSurface* GetTextureSurface(const Pica::Regs::FullTextureConfig& config);
28 203
29 /// Invalidate all cached OpenGL resources tracked by this cache manager 204 /// Gets the color and depth surfaces and rect (resolution scaled) based on the framebuffer configuration
30 void InvalidateAll(); 205 std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> GetFramebufferSurfaces(const Pica::Regs::FramebufferConfig& config);
31 206
32private: 207 /// Attempt to get a surface that exactly matches the fill region and format
33 struct CachedTexture { 208 CachedSurface* TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config);
34 OGLTexture texture; 209
35 GLuint width; 210 /// Write the surface back to memory
36 GLuint height; 211 void FlushSurface(CachedSurface* surface);
37 u32 size;
38 u64 hash;
39 PAddr addr;
40 };
41 212
42 std::map<PAddr, std::unique_ptr<CachedTexture>> texture_cache; 213 /// Write any cached resources overlapping the region back to memory (if dirty) and optionally invalidate them in the cache
214 void FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, bool invalidate);
215
216 /// Flush all cached resources tracked by this cache manager
217 void FlushAll();
218
219private:
220 SurfaceCache surface_cache;
221 OGLFramebuffer transfer_framebuffers[2];
43}; 222};
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index ee4b54ab9..9011caa39 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -2,9 +2,17 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <array>
6#include <cstddef>
7
8#include "common/assert.h"
9#include "common/bit_field.h"
10#include "common/logging/log.h"
11
5#include "video_core/pica.h" 12#include "video_core/pica.h"
6#include "video_core/renderer_opengl/gl_rasterizer.h" 13#include "video_core/renderer_opengl/gl_rasterizer.h"
7#include "video_core/renderer_opengl/gl_shader_gen.h" 14#include "video_core/renderer_opengl/gl_shader_gen.h"
15#include "video_core/renderer_opengl/gl_shader_util.h"
8 16
9using Pica::Regs; 17using Pica::Regs;
10using TevStageConfig = Regs::TevStageConfig; 18using TevStageConfig = Regs::TevStageConfig;
@@ -198,6 +206,9 @@ static void AppendColorCombiner(std::string& out, TevStageConfig::Operation oper
198 case Operation::AddThenMultiply: 206 case Operation::AddThenMultiply:
199 out += "min(" + variable_name + "[0] + " + variable_name + "[1], vec3(1.0)) * " + variable_name + "[2]"; 207 out += "min(" + variable_name + "[0] + " + variable_name + "[1], vec3(1.0)) * " + variable_name + "[2]";
200 break; 208 break;
209 case Operation::Dot3_RGB:
210 out += "vec3(dot(" + variable_name + "[0] - vec3(0.5), " + variable_name + "[1] - vec3(0.5)) * 4.0)";
211 break;
201 default: 212 default:
202 out += "vec3(0.0)"; 213 out += "vec3(0.0)";
203 LOG_CRITICAL(Render_OpenGL, "Unknown color combiner operation: %u", operation); 214 LOG_CRITICAL(Render_OpenGL, "Unknown color combiner operation: %u", operation);
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index 0ca9d2879..3eb07d57a 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -6,7 +6,7 @@
6 6
7#include <string> 7#include <string>
8 8
9#include "video_core/renderer_opengl/gl_rasterizer.h" 9struct PicaShaderConfig;
10 10
11namespace GLShader { 11namespace GLShader {
12 12
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp
index e3f7a5868..dded3db46 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_util.cpp
@@ -2,9 +2,10 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
6#include <vector> 5#include <vector>
7 6
7#include <glad/glad.h>
8
8#include "common/logging/log.h" 9#include "common/logging/log.h"
9#include "video_core/renderer_opengl/gl_shader_util.h" 10#include "video_core/renderer_opengl/gl_shader_util.h"
10 11
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 08e4d0b54..02cd9f417 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -2,7 +2,11 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "video_core/pica.h" 5#include <glad/glad.h>
6
7#include "common/common_funcs.h"
8#include "common/logging/log.h"
9
6#include "video_core/renderer_opengl/gl_state.h" 10#include "video_core/renderer_opengl/gl_state.h"
7 11
8OpenGLState OpenGLState::cur_state; 12OpenGLState OpenGLState::cur_state;
@@ -48,17 +52,19 @@ OpenGLState::OpenGLState() {
48 texture_unit.sampler = 0; 52 texture_unit.sampler = 0;
49 } 53 }
50 54
51 for (auto& lut : lighting_lut) { 55 for (auto& lut : lighting_luts) {
52 lut.texture_1d = 0; 56 lut.texture_1d = 0;
53 } 57 }
54 58
55 draw.framebuffer = 0; 59 draw.read_framebuffer = 0;
60 draw.draw_framebuffer = 0;
56 draw.vertex_array = 0; 61 draw.vertex_array = 0;
57 draw.vertex_buffer = 0; 62 draw.vertex_buffer = 0;
63 draw.uniform_buffer = 0;
58 draw.shader_program = 0; 64 draw.shader_program = 0;
59} 65}
60 66
61void OpenGLState::Apply() { 67void OpenGLState::Apply() const {
62 // Culling 68 // Culling
63 if (cull.enabled != cur_state.cull.enabled) { 69 if (cull.enabled != cur_state.cull.enabled) {
64 if (cull.enabled) { 70 if (cull.enabled) {
@@ -175,16 +181,19 @@ void OpenGLState::Apply() {
175 } 181 }
176 182
177 // Lighting LUTs 183 // Lighting LUTs
178 for (unsigned i = 0; i < ARRAY_SIZE(lighting_lut); ++i) { 184 for (unsigned i = 0; i < ARRAY_SIZE(lighting_luts); ++i) {
179 if (lighting_lut[i].texture_1d != cur_state.lighting_lut[i].texture_1d) { 185 if (lighting_luts[i].texture_1d != cur_state.lighting_luts[i].texture_1d) {
180 glActiveTexture(GL_TEXTURE3 + i); 186 glActiveTexture(GL_TEXTURE3 + i);
181 glBindTexture(GL_TEXTURE_1D, lighting_lut[i].texture_1d); 187 glBindTexture(GL_TEXTURE_1D, lighting_luts[i].texture_1d);
182 } 188 }
183 } 189 }
184 190
185 // Framebuffer 191 // Framebuffer
186 if (draw.framebuffer != cur_state.draw.framebuffer) { 192 if (draw.read_framebuffer != cur_state.draw.read_framebuffer) {
187 glBindFramebuffer(GL_FRAMEBUFFER, draw.framebuffer); 193 glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
194 }
195 if (draw.draw_framebuffer != cur_state.draw.draw_framebuffer) {
196 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer);
188 } 197 }
189 198
190 // Vertex array 199 // Vertex array
@@ -210,45 +219,58 @@ void OpenGLState::Apply() {
210 cur_state = *this; 219 cur_state = *this;
211} 220}
212 221
213void OpenGLState::ResetTexture(GLuint id) { 222GLenum OpenGLState::CheckFBStatus(GLenum target) {
223 GLenum fb_status = glCheckFramebufferStatus(target);
224 if (fb_status != GL_FRAMEBUFFER_COMPLETE) {
225 const char* fb_description = (target == GL_READ_FRAMEBUFFER ? "READ" : (target == GL_DRAW_FRAMEBUFFER ? "DRAW" : "UNK"));
226 LOG_CRITICAL(Render_OpenGL, "OpenGL %s framebuffer check failed, status %X", fb_description, fb_status);
227 }
228
229 return fb_status;
230}
231
232void OpenGLState::ResetTexture(GLuint handle) {
214 for (auto& unit : cur_state.texture_units) { 233 for (auto& unit : cur_state.texture_units) {
215 if (unit.texture_2d == id) { 234 if (unit.texture_2d == handle) {
216 unit.texture_2d = 0; 235 unit.texture_2d = 0;
217 } 236 }
218 } 237 }
219} 238}
220 239
221void OpenGLState::ResetSampler(GLuint id) { 240void OpenGLState::ResetSampler(GLuint handle) {
222 for (auto& unit : cur_state.texture_units) { 241 for (auto& unit : cur_state.texture_units) {
223 if (unit.sampler == id) { 242 if (unit.sampler == handle) {
224 unit.sampler = 0; 243 unit.sampler = 0;
225 } 244 }
226 } 245 }
227} 246}
228 247
229void OpenGLState::ResetProgram(GLuint id) { 248void OpenGLState::ResetProgram(GLuint handle) {
230 if (cur_state.draw.shader_program == id) { 249 if (cur_state.draw.shader_program == handle) {
231 cur_state.draw.shader_program = 0; 250 cur_state.draw.shader_program = 0;
232 } 251 }
233} 252}
234 253
235void OpenGLState::ResetBuffer(GLuint id) { 254void OpenGLState::ResetBuffer(GLuint handle) {
236 if (cur_state.draw.vertex_buffer == id) { 255 if (cur_state.draw.vertex_buffer == handle) {
237 cur_state.draw.vertex_buffer = 0; 256 cur_state.draw.vertex_buffer = 0;
238 } 257 }
239 if (cur_state.draw.uniform_buffer == id) { 258 if (cur_state.draw.uniform_buffer == handle) {
240 cur_state.draw.uniform_buffer = 0; 259 cur_state.draw.uniform_buffer = 0;
241 } 260 }
242} 261}
243 262
244void OpenGLState::ResetVertexArray(GLuint id) { 263void OpenGLState::ResetVertexArray(GLuint handle) {
245 if (cur_state.draw.vertex_array == id) { 264 if (cur_state.draw.vertex_array == handle) {
246 cur_state.draw.vertex_array = 0; 265 cur_state.draw.vertex_array = 0;
247 } 266 }
248} 267}
249 268
250void OpenGLState::ResetFramebuffer(GLuint id) { 269void OpenGLState::ResetFramebuffer(GLuint handle) {
251 if (cur_state.draw.framebuffer == id) { 270 if (cur_state.draw.read_framebuffer == handle) {
252 cur_state.draw.framebuffer = 0; 271 cur_state.draw.read_framebuffer = 0;
272 }
273 if (cur_state.draw.draw_framebuffer == handle) {
274 cur_state.draw.draw_framebuffer = 0;
253 } 275 }
254} 276}
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index e848058d7..24f20e47c 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -63,15 +63,15 @@ public:
63 63
64 struct { 64 struct {
65 GLuint texture_1d; // GL_TEXTURE_BINDING_1D 65 GLuint texture_1d; // GL_TEXTURE_BINDING_1D
66 } lighting_lut[6]; 66 } lighting_luts[6];
67 67
68 struct { 68 struct {
69 GLuint framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING 69 GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING
70 GLuint draw_framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING
70 GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING 71 GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING
71 GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING 72 GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING
72 GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING 73 GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING
73 GLuint shader_program; // GL_CURRENT_PROGRAM 74 GLuint shader_program; // GL_CURRENT_PROGRAM
74 bool shader_dirty;
75 } draw; 75 } draw;
76 76
77 OpenGLState(); 77 OpenGLState();
@@ -82,14 +82,18 @@ public:
82 } 82 }
83 83
84 /// Apply this state as the current OpenGL state 84 /// Apply this state as the current OpenGL state
85 void Apply(); 85 void Apply() const;
86 86
87 static void ResetTexture(GLuint id); 87 /// Check the status of the current OpenGL read or draw framebuffer configuration
88 static void ResetSampler(GLuint id); 88 static GLenum CheckFBStatus(GLenum target);
89 static void ResetProgram(GLuint id); 89
90 static void ResetBuffer(GLuint id); 90 /// Resets and unbinds any references to the given resource in the current OpenGL state
91 static void ResetVertexArray(GLuint id); 91 static void ResetTexture(GLuint handle);
92 static void ResetFramebuffer(GLuint id); 92 static void ResetSampler(GLuint handle);
93 static void ResetProgram(GLuint handle);
94 static void ResetBuffer(GLuint handle);
95 static void ResetVertexArray(GLuint handle);
96 static void ResetFramebuffer(GLuint handle);
93 97
94private: 98private:
95 static OpenGLState cur_state; 99 static OpenGLState cur_state;
diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h
index fd3617d77..976d1f364 100644
--- a/src/video_core/renderer_opengl/pica_to_gl.h
+++ b/src/video_core/renderer_opengl/pica_to_gl.h
@@ -4,9 +4,16 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <cstddef>
9
7#include <glad/glad.h> 10#include <glad/glad.h>
8 11
12#include "common/assert.h"
13#include "common/bit_field.h"
14#include "common/common_funcs.h"
9#include "common/common_types.h" 15#include "common/common_types.h"
16#include "common/logging/log.h"
10 17
11#include "video_core/pica.h" 18#include "video_core/pica.h"
12 19
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 11c4d0daf..8f424a435 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -5,23 +5,28 @@
5#include <algorithm> 5#include <algorithm>
6#include <cstddef> 6#include <cstddef>
7#include <cstdlib> 7#include <cstdlib>
8#include <memory>
9
10#include <glad/glad.h>
8 11
9#include "common/assert.h" 12#include "common/assert.h"
13#include "common/bit_field.h"
10#include "common/emu_window.h" 14#include "common/emu_window.h"
11#include "common/logging/log.h" 15#include "common/logging/log.h"
12#include "common/profiler_reporting.h" 16#include "common/profiler_reporting.h"
17#include "common/synchronized_wrapper.h"
13 18
14#include "core/memory.h"
15#include "core/settings.h"
16#include "core/hw/gpu.h" 19#include "core/hw/gpu.h"
17#include "core/hw/hw.h" 20#include "core/hw/hw.h"
18#include "core/hw/lcd.h" 21#include "core/hw/lcd.h"
22#include "core/memory.h"
23#include "core/settings.h"
24#include "core/tracer/recorder.h"
19 25
20#include "video_core/video_core.h"
21#include "video_core/debug_utils/debug_utils.h" 26#include "video_core/debug_utils/debug_utils.h"
22#include "video_core/renderer_opengl/gl_rasterizer.h" 27#include "video_core/rasterizer_interface.h"
23#include "video_core/renderer_opengl/gl_shader_util.h"
24#include "video_core/renderer_opengl/renderer_opengl.h" 28#include "video_core/renderer_opengl/renderer_opengl.h"
29#include "video_core/video_core.h"
25 30
26static const char vertex_shader[] = R"( 31static const char vertex_shader[] = R"(
27#version 150 core 32#version 150 core
@@ -107,7 +112,7 @@ void RendererOpenGL::SwapBuffers() {
107 OpenGLState prev_state = OpenGLState::GetCurState(); 112 OpenGLState prev_state = OpenGLState::GetCurState();
108 state.Apply(); 113 state.Apply();
109 114
110 for(int i : {0, 1}) { 115 for (int i : {0, 1}) {
111 const auto& framebuffer = GPU::g_regs.framebuffer_config[i]; 116 const auto& framebuffer = GPU::g_regs.framebuffer_config[i];
112 117
113 // Main LCD (0): 0x1ED02204, Sub LCD (1): 0x1ED02A04 118 // Main LCD (0): 0x1ED02204, Sub LCD (1): 0x1ED02A04
@@ -117,25 +122,25 @@ void RendererOpenGL::SwapBuffers() {
117 LCD::Read(color_fill.raw, lcd_color_addr); 122 LCD::Read(color_fill.raw, lcd_color_addr);
118 123
119 if (color_fill.is_enabled) { 124 if (color_fill.is_enabled) {
120 LoadColorToActiveGLTexture(color_fill.color_r, color_fill.color_g, color_fill.color_b, textures[i]); 125 LoadColorToActiveGLTexture(color_fill.color_r, color_fill.color_g, color_fill.color_b, screen_infos[i].texture);
121 126
122 // Resize the texture in case the framebuffer size has changed 127 // Resize the texture in case the framebuffer size has changed
123 textures[i].width = 1; 128 screen_infos[i].texture.width = 1;
124 textures[i].height = 1; 129 screen_infos[i].texture.height = 1;
125 } else { 130 } else {
126 if (textures[i].width != (GLsizei)framebuffer.width || 131 if (screen_infos[i].texture.width != (GLsizei)framebuffer.width ||
127 textures[i].height != (GLsizei)framebuffer.height || 132 screen_infos[i].texture.height != (GLsizei)framebuffer.height ||
128 textures[i].format != framebuffer.color_format) { 133 screen_infos[i].texture.format != framebuffer.color_format) {
129 // Reallocate texture if the framebuffer size has changed. 134 // Reallocate texture if the framebuffer size has changed.
130 // This is expected to not happen very often and hence should not be a 135 // This is expected to not happen very often and hence should not be a
131 // performance problem. 136 // performance problem.
132 ConfigureFramebufferTexture(textures[i], framebuffer); 137 ConfigureFramebufferTexture(screen_infos[i].texture, framebuffer);
133 } 138 }
134 LoadFBToActiveGLTexture(framebuffer, textures[i]); 139 LoadFBToScreenInfo(framebuffer, screen_infos[i]);
135 140
136 // Resize the texture in case the framebuffer size has changed 141 // Resize the texture in case the framebuffer size has changed
137 textures[i].width = framebuffer.width; 142 screen_infos[i].texture.width = framebuffer.width;
138 textures[i].height = framebuffer.height; 143 screen_infos[i].texture.height = framebuffer.height;
139 } 144 }
140 } 145 }
141 146
@@ -166,8 +171,8 @@ void RendererOpenGL::SwapBuffers() {
166/** 171/**
167 * Loads framebuffer from emulated memory into the active OpenGL texture. 172 * Loads framebuffer from emulated memory into the active OpenGL texture.
168 */ 173 */
169void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig& framebuffer, 174void RendererOpenGL::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer,
170 const TextureInfo& texture) { 175 ScreenInfo& screen_info) {
171 176
172 const PAddr framebuffer_addr = framebuffer.active_fb == 0 ? 177 const PAddr framebuffer_addr = framebuffer.active_fb == 0 ?
173 framebuffer.address_left1 : framebuffer.address_left2; 178 framebuffer.address_left1 : framebuffer.address_left2;
@@ -177,8 +182,6 @@ void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig&
177 framebuffer_addr, (int)framebuffer.width, 182 framebuffer_addr, (int)framebuffer.width,
178 (int)framebuffer.height, (int)framebuffer.format); 183 (int)framebuffer.height, (int)framebuffer.format);
179 184
180 const u8* framebuffer_data = Memory::GetPhysicalPointer(framebuffer_addr);
181
182 int bpp = GPU::Regs::BytesPerPixel(framebuffer.color_format); 185 int bpp = GPU::Regs::BytesPerPixel(framebuffer.color_format);
183 size_t pixel_stride = framebuffer.stride / bpp; 186 size_t pixel_stride = framebuffer.stride / bpp;
184 187
@@ -189,24 +192,34 @@ void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig&
189 // only allows rows to have a memory alignement of 4. 192 // only allows rows to have a memory alignement of 4.
190 ASSERT(pixel_stride % 4 == 0); 193 ASSERT(pixel_stride % 4 == 0);
191 194
192 state.texture_units[0].texture_2d = texture.handle; 195 if (!Rasterizer()->AccelerateDisplay(framebuffer, framebuffer_addr, static_cast<u32>(pixel_stride), screen_info)) {
193 state.Apply(); 196 // Reset the screen info's display texture to its own permanent texture
197 screen_info.display_texture = screen_info.texture.resource.handle;
198 screen_info.display_texcoords = MathUtil::Rectangle<float>(0.f, 0.f, 1.f, 1.f);
194 199
195 glActiveTexture(GL_TEXTURE0); 200 Memory::RasterizerFlushRegion(framebuffer_addr, framebuffer.stride * framebuffer.height);
196 glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)pixel_stride);
197 201
198 // Update existing texture 202 const u8* framebuffer_data = Memory::GetPhysicalPointer(framebuffer_addr);
199 // TODO: Test what happens on hardware when you change the framebuffer dimensions so that they
200 // differ from the LCD resolution.
201 // TODO: Applications could theoretically crash Citra here by specifying too large
202 // framebuffer sizes. We should make sure that this cannot happen.
203 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height,
204 texture.gl_format, texture.gl_type, framebuffer_data);
205 203
206 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); 204 state.texture_units[0].texture_2d = screen_info.texture.resource.handle;
205 state.Apply();
207 206
208 state.texture_units[0].texture_2d = 0; 207 glActiveTexture(GL_TEXTURE0);
209 state.Apply(); 208 glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)pixel_stride);
209
210 // Update existing texture
211 // TODO: Test what happens on hardware when you change the framebuffer dimensions so that they
212 // differ from the LCD resolution.
213 // TODO: Applications could theoretically crash Citra here by specifying too large
214 // framebuffer sizes. We should make sure that this cannot happen.
215 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height,
216 screen_info.texture.gl_format, screen_info.texture.gl_type, framebuffer_data);
217
218 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
219
220 state.texture_units[0].texture_2d = 0;
221 state.Apply();
222 }
210} 223}
211 224
212/** 225/**
@@ -216,7 +229,7 @@ void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig&
216 */ 229 */
217void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, 230void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b,
218 const TextureInfo& texture) { 231 const TextureInfo& texture) {
219 state.texture_units[0].texture_2d = texture.handle; 232 state.texture_units[0].texture_2d = texture.resource.handle;
220 state.Apply(); 233 state.Apply();
221 234
222 glActiveTexture(GL_TEXTURE0); 235 glActiveTexture(GL_TEXTURE0);
@@ -224,6 +237,9 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color
224 237
225 // Update existing texture 238 // Update existing texture
226 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, framebuffer_data); 239 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, framebuffer_data);
240
241 state.texture_units[0].texture_2d = 0;
242 state.Apply();
227} 243}
228 244
229/** 245/**
@@ -233,20 +249,22 @@ void RendererOpenGL::InitOpenGLObjects() {
233 glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, 0.0f); 249 glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, 0.0f);
234 250
235 // Link shaders and get variable locations 251 // Link shaders and get variable locations
236 program_id = GLShader::LoadProgram(vertex_shader, fragment_shader); 252 shader.Create(vertex_shader, fragment_shader);
237 uniform_modelview_matrix = glGetUniformLocation(program_id, "modelview_matrix"); 253 state.draw.shader_program = shader.handle;
238 uniform_color_texture = glGetUniformLocation(program_id, "color_texture"); 254 state.Apply();
239 attrib_position = glGetAttribLocation(program_id, "vert_position"); 255 uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix");
240 attrib_tex_coord = glGetAttribLocation(program_id, "vert_tex_coord"); 256 uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture");
257 attrib_position = glGetAttribLocation(shader.handle, "vert_position");
258 attrib_tex_coord = glGetAttribLocation(shader.handle, "vert_tex_coord");
241 259
242 // Generate VBO handle for drawing 260 // Generate VBO handle for drawing
243 glGenBuffers(1, &vertex_buffer_handle); 261 vertex_buffer.Create();
244 262
245 // Generate VAO 263 // Generate VAO
246 glGenVertexArrays(1, &vertex_array_handle); 264 vertex_array.Create();
247 265
248 state.draw.vertex_array = vertex_array_handle; 266 state.draw.vertex_array = vertex_array.handle;
249 state.draw.vertex_buffer = vertex_buffer_handle; 267 state.draw.vertex_buffer = vertex_buffer.handle;
250 state.draw.uniform_buffer = 0; 268 state.draw.uniform_buffer = 0;
251 state.Apply(); 269 state.Apply();
252 270
@@ -258,13 +276,13 @@ void RendererOpenGL::InitOpenGLObjects() {
258 glEnableVertexAttribArray(attrib_tex_coord); 276 glEnableVertexAttribArray(attrib_tex_coord);
259 277
260 // Allocate textures for each screen 278 // Allocate textures for each screen
261 for (auto& texture : textures) { 279 for (auto& screen_info : screen_infos) {
262 glGenTextures(1, &texture.handle); 280 screen_info.texture.resource.Create();
263 281
264 // Allocation of storage is deferred until the first frame, when we 282 // Allocation of storage is deferred until the first frame, when we
265 // know the framebuffer size. 283 // know the framebuffer size.
266 284
267 state.texture_units[0].texture_2d = texture.handle; 285 state.texture_units[0].texture_2d = screen_info.texture.resource.handle;
268 state.Apply(); 286 state.Apply();
269 287
270 glActiveTexture(GL_TEXTURE0); 288 glActiveTexture(GL_TEXTURE0);
@@ -273,6 +291,8 @@ void RendererOpenGL::InitOpenGLObjects() {
273 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); 291 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
274 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); 292 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
275 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); 293 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
294
295 screen_info.display_texture = screen_info.texture.resource.handle;
276 } 296 }
277 297
278 state.texture_units[0].texture_2d = 0; 298 state.texture_units[0].texture_2d = 0;
@@ -327,30 +347,38 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
327 UNIMPLEMENTED(); 347 UNIMPLEMENTED();
328 } 348 }
329 349
330 state.texture_units[0].texture_2d = texture.handle; 350 state.texture_units[0].texture_2d = texture.resource.handle;
331 state.Apply(); 351 state.Apply();
332 352
333 glActiveTexture(GL_TEXTURE0); 353 glActiveTexture(GL_TEXTURE0);
334 glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0, 354 glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0,
335 texture.gl_format, texture.gl_type, nullptr); 355 texture.gl_format, texture.gl_type, nullptr);
356
357 state.texture_units[0].texture_2d = 0;
358 state.Apply();
336} 359}
337 360
338/** 361/**
339 * Draws a single texture to the emulator window, rotating the texture to correct for the 3DS's LCD rotation. 362 * Draws a single texture to the emulator window, rotating the texture to correct for the 3DS's LCD rotation.
340 */ 363 */
341void RendererOpenGL::DrawSingleScreenRotated(const TextureInfo& texture, float x, float y, float w, float h) { 364void RendererOpenGL::DrawSingleScreenRotated(const ScreenInfo& screen_info, float x, float y, float w, float h) {
365 auto& texcoords = screen_info.display_texcoords;
366
342 std::array<ScreenRectVertex, 4> vertices = {{ 367 std::array<ScreenRectVertex, 4> vertices = {{
343 ScreenRectVertex(x, y, 1.f, 0.f), 368 ScreenRectVertex(x, y, texcoords.bottom, texcoords.left),
344 ScreenRectVertex(x+w, y, 1.f, 1.f), 369 ScreenRectVertex(x+w, y, texcoords.bottom, texcoords.right),
345 ScreenRectVertex(x, y+h, 0.f, 0.f), 370 ScreenRectVertex(x, y+h, texcoords.top, texcoords.left),
346 ScreenRectVertex(x+w, y+h, 0.f, 1.f), 371 ScreenRectVertex(x+w, y+h, texcoords.top, texcoords.right),
347 }}; 372 }};
348 373
349 state.texture_units[0].texture_2d = texture.handle; 374 state.texture_units[0].texture_2d = screen_info.display_texture;
350 state.Apply(); 375 state.Apply();
351 376
352 glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices.data()); 377 glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices.data());
353 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); 378 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
379
380 state.texture_units[0].texture_2d = 0;
381 state.Apply();
354} 382}
355 383
356/** 384/**
@@ -362,9 +390,6 @@ void RendererOpenGL::DrawScreens() {
362 glViewport(0, 0, layout.width, layout.height); 390 glViewport(0, 0, layout.width, layout.height);
363 glClear(GL_COLOR_BUFFER_BIT); 391 glClear(GL_COLOR_BUFFER_BIT);
364 392
365 state.draw.shader_program = program_id;
366 state.Apply();
367
368 // Set projection matrix 393 // Set projection matrix
369 std::array<GLfloat, 3 * 2> ortho_matrix = MakeOrthographicMatrix((float)layout.width, 394 std::array<GLfloat, 3 * 2> ortho_matrix = MakeOrthographicMatrix((float)layout.width,
370 (float)layout.height); 395 (float)layout.height);
@@ -374,9 +399,9 @@ void RendererOpenGL::DrawScreens() {
374 glActiveTexture(GL_TEXTURE0); 399 glActiveTexture(GL_TEXTURE0);
375 glUniform1i(uniform_color_texture, 0); 400 glUniform1i(uniform_color_texture, 0);
376 401
377 DrawSingleScreenRotated(textures[0], (float)layout.top_screen.left, (float)layout.top_screen.top, 402 DrawSingleScreenRotated(screen_infos[0], (float)layout.top_screen.left, (float)layout.top_screen.top,
378 (float)layout.top_screen.GetWidth(), (float)layout.top_screen.GetHeight()); 403 (float)layout.top_screen.GetWidth(), (float)layout.top_screen.GetHeight());
379 DrawSingleScreenRotated(textures[1], (float)layout.bottom_screen.left,(float)layout.bottom_screen.top, 404 DrawSingleScreenRotated(screen_infos[1], (float)layout.bottom_screen.left,(float)layout.bottom_screen.top,
380 (float)layout.bottom_screen.GetWidth(), (float)layout.bottom_screen.GetHeight()); 405 (float)layout.bottom_screen.GetWidth(), (float)layout.bottom_screen.GetHeight());
381 406
382 m_current_frame++; 407 m_current_frame++;
@@ -448,12 +473,6 @@ static void DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severity,
448bool RendererOpenGL::Init() { 473bool RendererOpenGL::Init() {
449 render_window->MakeCurrent(); 474 render_window->MakeCurrent();
450 475
451 // TODO: Make frontends initialize this, so they can use gladLoadGLLoader with their own loaders
452 if (!gladLoadGL()) {
453 LOG_CRITICAL(Render_OpenGL, "Failed to initialize GL functions! Exiting...");
454 exit(-1);
455 }
456
457 if (GLAD_GL_KHR_debug) { 476 if (GLAD_GL_KHR_debug) {
458 glEnable(GL_DEBUG_OUTPUT); 477 glEnable(GL_DEBUG_OUTPUT);
459 glDebugMessageCallback(DebugHandler, nullptr); 478 glDebugMessageCallback(DebugHandler, nullptr);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index fe4d142a5..00e1044ab 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -8,13 +8,34 @@
8 8
9#include <glad/glad.h> 9#include <glad/glad.h>
10 10
11#include "common/common_types.h"
12#include "common/math_util.h"
13
11#include "core/hw/gpu.h" 14#include "core/hw/gpu.h"
12 15
13#include "video_core/renderer_base.h" 16#include "video_core/renderer_base.h"
17#include "video_core/renderer_opengl/gl_resource_manager.h"
14#include "video_core/renderer_opengl/gl_state.h" 18#include "video_core/renderer_opengl/gl_state.h"
15 19
16class EmuWindow; 20class EmuWindow;
17 21
22/// Structure used for storing information about the textures for each 3DS screen
23struct TextureInfo {
24 OGLTexture resource;
25 GLsizei width;
26 GLsizei height;
27 GPU::Regs::PixelFormat format;
28 GLenum gl_format;
29 GLenum gl_type;
30};
31
32/// Structure used for storing information about the display target for each 3DS screen
33struct ScreenInfo {
34 GLuint display_texture;
35 MathUtil::Rectangle<float> display_texcoords;
36 TextureInfo texture;
37};
38
18class RendererOpenGL : public RendererBase { 39class RendererOpenGL : public RendererBase {
19public: 40public:
20 41
@@ -37,26 +58,16 @@ public:
37 void ShutDown() override; 58 void ShutDown() override;
38 59
39private: 60private:
40 /// Structure used for storing information about the textures for each 3DS screen
41 struct TextureInfo {
42 GLuint handle;
43 GLsizei width;
44 GLsizei height;
45 GPU::Regs::PixelFormat format;
46 GLenum gl_format;
47 GLenum gl_type;
48 };
49
50 void InitOpenGLObjects(); 61 void InitOpenGLObjects();
51 void ConfigureFramebufferTexture(TextureInfo& texture, 62 void ConfigureFramebufferTexture(TextureInfo& texture,
52 const GPU::Regs::FramebufferConfig& framebuffer); 63 const GPU::Regs::FramebufferConfig& framebuffer);
53 void DrawScreens(); 64 void DrawScreens();
54 void DrawSingleScreenRotated(const TextureInfo& texture, float x, float y, float w, float h); 65 void DrawSingleScreenRotated(const ScreenInfo& screen_info, float x, float y, float w, float h);
55 void UpdateFramerate(); 66 void UpdateFramerate();
56 67
57 // Loads framebuffer from emulated memory into the active OpenGL texture. 68 // Loads framebuffer from emulated memory into the display information structure
58 void LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig& framebuffer, 69 void LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer,
59 const TextureInfo& texture); 70 ScreenInfo& screen_info);
60 // Fills active OpenGL texture with the given RGB color. 71 // Fills active OpenGL texture with the given RGB color.
61 void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, 72 void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b,
62 const TextureInfo& texture); 73 const TextureInfo& texture);
@@ -69,10 +80,10 @@ private:
69 OpenGLState state; 80 OpenGLState state;
70 81
71 // OpenGL object IDs 82 // OpenGL object IDs
72 GLuint vertex_array_handle; 83 OGLVertexArray vertex_array;
73 GLuint vertex_buffer_handle; 84 OGLBuffer vertex_buffer;
74 GLuint program_id; 85 OGLShader shader;
75 std::array<TextureInfo, 2> textures; ///< Textures for top and bottom screens respectively 86 std::array<ScreenInfo, 2> screen_infos; ///< Display information for top and bottom screens respectively
76 // Shader uniform location indices 87 // Shader uniform location indices
77 GLuint uniform_modelview_matrix; 88 GLuint uniform_modelview_matrix;
78 GLuint uniform_color_texture; 89 GLuint uniform_color_texture;
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index 75301accd..65dcc9156 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -2,27 +2,30 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <memory> 5#include <atomic>
6#include <cmath>
7#include <cstring>
6#include <unordered_map> 8#include <unordered_map>
9#include <utility>
7 10
8#include <boost/range/algorithm/fill.hpp> 11#include <boost/range/algorithm/fill.hpp>
9 12
13#include "common/bit_field.h"
10#include "common/hash.h" 14#include "common/hash.h"
15#include "common/logging/log.h"
11#include "common/microprofile.h" 16#include "common/microprofile.h"
12#include "common/profiler.h"
13 17
14#include "video_core/debug_utils/debug_utils.h"
15#include "video_core/pica.h" 18#include "video_core/pica.h"
16#include "video_core/pica_state.h" 19#include "video_core/pica_state.h"
17#include "video_core/video_core.h" 20#include "video_core/shader/shader.h"
18 21#include "video_core/shader/shader_interpreter.h"
19#include "shader.h"
20#include "shader_interpreter.h"
21 22
22#ifdef ARCHITECTURE_x86_64 23#ifdef ARCHITECTURE_x86_64
23#include "shader_jit_x64.h" 24#include "video_core/shader/shader_jit_x64.h"
24#endif // ARCHITECTURE_x86_64 25#endif // ARCHITECTURE_x86_64
25 26
27#include "video_core/video_core.h"
28
26namespace Pica { 29namespace Pica {
27 30
28namespace Shader { 31namespace Shader {
@@ -57,13 +60,11 @@ void Shutdown() {
57#endif // ARCHITECTURE_x86_64 60#endif // ARCHITECTURE_x86_64
58} 61}
59 62
60static Common::Profiling::TimingCategory shader_category("Vertex Shader");
61MICROPROFILE_DEFINE(GPU_VertexShader, "GPU", "Vertex Shader", MP_RGB(50, 50, 240)); 63MICROPROFILE_DEFINE(GPU_VertexShader, "GPU", "Vertex Shader", MP_RGB(50, 50, 240));
62 64
63OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes) { 65OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes) {
64 auto& config = g_state.regs.vs; 66 auto& config = g_state.regs.vs;
65 67
66 Common::Profiling::ScopeTimer timer(shader_category);
67 MICROPROFILE_SCOPE(GPU_VertexShader); 68 MICROPROFILE_SCOPE(GPU_VertexShader);
68 69
69 state.program_counter = config.main_offset; 70 state.program_counter = config.main_offset;
@@ -73,24 +74,8 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr
73 // Setup input register table 74 // Setup input register table
74 const auto& attribute_register_map = config.input_register_map; 75 const auto& attribute_register_map = config.input_register_map;
75 76
76 // TODO: Instead of this cumbersome logic, just load the input data directly like 77 for (unsigned i = 0; i < num_attributes; i++)
77 // for (int attr = 0; attr < num_attributes; ++attr) { input_attr[0] = state.registers.input[attribute_register_map.attribute0_register]; } 78 state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i];
78 if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = input.attr[0];
79 if (num_attributes > 1) state.registers.input[attribute_register_map.attribute1_register] = input.attr[1];
80 if (num_attributes > 2) state.registers.input[attribute_register_map.attribute2_register] = input.attr[2];
81 if (num_attributes > 3) state.registers.input[attribute_register_map.attribute3_register] = input.attr[3];
82 if (num_attributes > 4) state.registers.input[attribute_register_map.attribute4_register] = input.attr[4];
83 if (num_attributes > 5) state.registers.input[attribute_register_map.attribute5_register] = input.attr[5];
84 if (num_attributes > 6) state.registers.input[attribute_register_map.attribute6_register] = input.attr[6];
85 if (num_attributes > 7) state.registers.input[attribute_register_map.attribute7_register] = input.attr[7];
86 if (num_attributes > 8) state.registers.input[attribute_register_map.attribute8_register] = input.attr[8];
87 if (num_attributes > 9) state.registers.input[attribute_register_map.attribute9_register] = input.attr[9];
88 if (num_attributes > 10) state.registers.input[attribute_register_map.attribute10_register] = input.attr[10];
89 if (num_attributes > 11) state.registers.input[attribute_register_map.attribute11_register] = input.attr[11];
90 if (num_attributes > 12) state.registers.input[attribute_register_map.attribute12_register] = input.attr[12];
91 if (num_attributes > 13) state.registers.input[attribute_register_map.attribute13_register] = input.attr[13];
92 if (num_attributes > 14) state.registers.input[attribute_register_map.attribute14_register] = input.attr[14];
93 if (num_attributes > 15) state.registers.input[attribute_register_map.attribute15_register] = input.attr[15];
94 79
95 state.conditional_code[0] = false; 80 state.conditional_code[0] = false;
96 state.conditional_code[1] = false; 81 state.conditional_code[1] = false;
@@ -167,22 +152,8 @@ DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, c
167 float24 dummy_register; 152 float24 dummy_register;
168 boost::fill(state.registers.input, &dummy_register); 153 boost::fill(state.registers.input, &dummy_register);
169 154
170 if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = &input.attr[0].x; 155 for (unsigned i = 0; i < num_attributes; i++)
171 if (num_attributes > 1) state.registers.input[attribute_register_map.attribute1_register] = &input.attr[1].x; 156 state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i];
172 if (num_attributes > 2) state.registers.input[attribute_register_map.attribute2_register] = &input.attr[2].x;
173 if (num_attributes > 3) state.registers.input[attribute_register_map.attribute3_register] = &input.attr[3].x;
174 if (num_attributes > 4) state.registers.input[attribute_register_map.attribute4_register] = &input.attr[4].x;
175 if (num_attributes > 5) state.registers.input[attribute_register_map.attribute5_register] = &input.attr[5].x;
176 if (num_attributes > 6) state.registers.input[attribute_register_map.attribute6_register] = &input.attr[6].x;
177 if (num_attributes > 7) state.registers.input[attribute_register_map.attribute7_register] = &input.attr[7].x;
178 if (num_attributes > 8) state.registers.input[attribute_register_map.attribute8_register] = &input.attr[8].x;
179 if (num_attributes > 9) state.registers.input[attribute_register_map.attribute9_register] = &input.attr[9].x;
180 if (num_attributes > 10) state.registers.input[attribute_register_map.attribute10_register] = &input.attr[10].x;
181 if (num_attributes > 11) state.registers.input[attribute_register_map.attribute11_register] = &input.attr[11].x;
182 if (num_attributes > 12) state.registers.input[attribute_register_map.attribute12_register] = &input.attr[12].x;
183 if (num_attributes > 13) state.registers.input[attribute_register_map.attribute13_register] = &input.attr[13].x;
184 if (num_attributes > 14) state.registers.input[attribute_register_map.attribute14_register] = &input.attr[14].x;
185 if (num_attributes > 15) state.registers.input[attribute_register_map.attribute15_register] = &input.attr[15].x;
186 157
187 state.conditional_code[0] = false; 158 state.conditional_code[0] = false;
188 state.conditional_code[1] = false; 159 state.conditional_code[1] = false;
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index 9c5bd97bd..56b83bfeb 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -4,17 +4,23 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <cstddef>
9#include <memory>
10#include <type_traits>
7#include <vector> 11#include <vector>
8 12
9#include <boost/container/static_vector.hpp> 13#include <boost/container/static_vector.hpp>
10 14
11#include <nihstro/shader_binary.h> 15#include <nihstro/shader_bytecode.h>
12 16
17#include "common/assert.h"
13#include "common/common_funcs.h" 18#include "common/common_funcs.h"
14#include "common/common_types.h" 19#include "common/common_types.h"
15#include "common/vector_math.h" 20#include "common/vector_math.h"
16 21
17#include "video_core/pica.h" 22#include "video_core/pica.h"
23#include "video_core/pica_types.h"
18 24
19using nihstro::RegisterType; 25using nihstro::RegisterType;
20using nihstro::SourceRegister; 26using nihstro::SourceRegister;
@@ -25,7 +31,7 @@ namespace Pica {
25namespace Shader { 31namespace Shader {
26 32
27struct InputVertex { 33struct InputVertex {
28 Math::Vec4<float24> attr[16]; 34 alignas(16) Math::Vec4<float24> attr[16];
29}; 35};
30 36
31struct OutputVertex { 37struct OutputVertex {
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp
index 9b978583e..7710f7fbc 100644
--- a/src/video_core/shader/shader_interpreter.cpp
+++ b/src/video_core/shader/shader_interpreter.cpp
@@ -2,12 +2,20 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
6#include <array>
7#include <cmath>
5#include <numeric> 8#include <numeric>
9
6#include <nihstro/shader_bytecode.h> 10#include <nihstro/shader_bytecode.h>
7 11
8#include "common/file_util.h" 12#include "common/assert.h"
9#include "video_core/pica.h" 13#include "common/common_types.h"
14#include "common/logging/log.h"
15#include "common/vector_math.h"
16
10#include "video_core/pica_state.h" 17#include "video_core/pica_state.h"
18#include "video_core/pica_types.h"
11#include "video_core/shader/shader.h" 19#include "video_core/shader/shader.h"
12#include "video_core/shader/shader_interpreter.h" 20#include "video_core/shader/shader_interpreter.h"
13 21
diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h
index 294bca50e..6048cdf3a 100644
--- a/src/video_core/shader/shader_interpreter.h
+++ b/src/video_core/shader/shader_interpreter.h
@@ -4,12 +4,12 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "video_core/shader/shader.h"
8
9namespace Pica { 7namespace Pica {
10 8
11namespace Shader { 9namespace Shader {
12 10
11template <bool Debug> struct UnitState;
12
13template<bool Debug> 13template<bool Debug>
14void RunInterpreter(UnitState<Debug>& state); 14void RunInterpreter(UnitState<Debug>& state);
15 15
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index b47d3beda..99f6c51eb 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -3,8 +3,15 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include <algorithm>
6#include <smmintrin.h> 6#include <cmath>
7#include <cstdint>
8#include <xmmintrin.h>
7 9
10#include <nihstro/shader_bytecode.h>
11
12#include "common/assert.h"
13#include "common/logging/log.h"
14#include "common/vector_math.h"
8#include "common/x64/abi.h" 15#include "common/x64/abi.h"
9#include "common/x64/cpu_detect.h" 16#include "common/x64/cpu_detect.h"
10#include "common/x64/emitter.h" 17#include "common/x64/emitter.h"
@@ -13,6 +20,7 @@
13#include "shader_jit_x64.h" 20#include "shader_jit_x64.h"
14 21
15#include "video_core/pica_state.h" 22#include "video_core/pica_state.h"
23#include "video_core/pica_types.h"
16 24
17namespace Pica { 25namespace Pica {
18 26
@@ -148,7 +156,7 @@ static Instruction GetVertexShaderInstruction(size_t offset) {
148} 156}
149 157
150static void LogCritical(const char* msg) { 158static void LogCritical(const char* msg) {
151 LOG_CRITICAL(HW_GPU, msg); 159 LOG_CRITICAL(HW_GPU, "%s", msg);
152} 160}
153 161
154void JitShader::Compile_Assert(bool condition, const char* msg) { 162void JitShader::Compile_Assert(bool condition, const char* msg) {
@@ -795,6 +803,8 @@ void JitShader::FindReturnOffsets() {
795 case OpCode::Id::CALLU: 803 case OpCode::Id::CALLU:
796 return_offsets.push_back(instr.flow_control.dest_offset + instr.flow_control.num_instructions); 804 return_offsets.push_back(instr.flow_control.dest_offset + instr.flow_control.num_instructions);
797 break; 805 break;
806 default:
807 break;
798 } 808 }
799 } 809 }
800 810
@@ -854,7 +864,7 @@ void JitShader::Compile() {
854 uintptr_t size = reinterpret_cast<uintptr_t>(GetCodePtr()) - reinterpret_cast<uintptr_t>(program); 864 uintptr_t size = reinterpret_cast<uintptr_t>(GetCodePtr()) - reinterpret_cast<uintptr_t>(program);
855 ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!"); 865 ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!");
856 866
857 LOG_DEBUG(HW_GPU, "Compiled shader size=%d", size); 867 LOG_DEBUG(HW_GPU, "Compiled shader size=%lu", size);
858} 868}
859 869
860JitShader::JitShader() { 870JitShader::JitShader() {
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h
index cd6280ade..30aa7ff30 100644
--- a/src/video_core/shader/shader_jit_x64.h
+++ b/src/video_core/shader/shader_jit_x64.h
@@ -4,14 +4,17 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <cstddef>
7#include <utility> 9#include <utility>
8#include <vector> 10#include <vector>
9 11
10#include <nihstro/shader_bytecode.h> 12#include <nihstro/shader_bytecode.h>
11 13
14#include "common/bit_set.h"
15#include "common/common_types.h"
12#include "common/x64/emitter.h" 16#include "common/x64/emitter.h"
13 17
14#include "video_core/pica.h"
15#include "video_core/shader/shader.h" 18#include "video_core/shader/shader.h"
16 19
17using nihstro::Instruction; 20using nihstro::Instruction;
diff --git a/src/video_core/swrasterizer.h b/src/video_core/swrasterizer.h
index 9a9a76d7a..0a028b774 100644
--- a/src/video_core/swrasterizer.h
+++ b/src/video_core/swrasterizer.h
@@ -8,19 +8,23 @@
8 8
9#include "video_core/rasterizer_interface.h" 9#include "video_core/rasterizer_interface.h"
10 10
11namespace Pica {
12namespace Shader {
13struct OutputVertex;
14}
15}
16
11namespace VideoCore { 17namespace VideoCore {
12 18
13class SWRasterizer : public RasterizerInterface { 19class SWRasterizer : public RasterizerInterface {
14 void InitObjects() override {}
15 void Reset() override {}
16 void AddTriangle(const Pica::Shader::OutputVertex& v0, 20 void AddTriangle(const Pica::Shader::OutputVertex& v0,
17 const Pica::Shader::OutputVertex& v1, 21 const Pica::Shader::OutputVertex& v1,
18 const Pica::Shader::OutputVertex& v2) override; 22 const Pica::Shader::OutputVertex& v2) override;
19 void DrawTriangles() override {} 23 void DrawTriangles() override {}
20 void FlushFramebuffer() override {}
21 void NotifyPicaRegisterChanged(u32 id) override {} 24 void NotifyPicaRegisterChanged(u32 id) override {}
25 void FlushAll() override {}
22 void FlushRegion(PAddr addr, u32 size) override {} 26 void FlushRegion(PAddr addr, u32 size) override {}
23 void InvalidateRegion(PAddr addr, u32 size) override {} 27 void FlushAndInvalidateRegion(PAddr addr, u32 size) override {}
24}; 28};
25 29
26} 30}
diff --git a/src/video_core/utils.cpp b/src/video_core/utils.cpp
deleted file mode 100644
index 6e1ff5cf4..000000000
--- a/src/video_core/utils.cpp
+++ /dev/null
@@ -1,36 +0,0 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstdio>
6#include <cstring>
7
8#include "video_core/utils.h"
9
10namespace VideoCore {
11
12/**
13 * Dumps a texture to TGA
14 * @param filename String filename to dump texture to
15 * @param width Width of texture in pixels
16 * @param height Height of texture in pixels
17 * @param raw_data Raw RGBA8 texture data to dump
18 * @todo This should be moved to some general purpose/common code
19 */
20void DumpTGA(std::string filename, short width, short height, u8* raw_data) {
21 TGAHeader hdr = {0, 0, 2, 0, 0, 0, 0, width, height, 24, 0};
22 FILE* fout = fopen(filename.c_str(), "wb");
23
24 fwrite(&hdr, sizeof(TGAHeader), 1, fout);
25
26 for (int y = 0; y < height; y++) {
27 for (int x = 0; x < width; x++) {
28 putc(raw_data[(3 * (y * width)) + (3 * x) + 0], fout); // b
29 putc(raw_data[(3 * (y * width)) + (3 * x) + 1], fout); // g
30 putc(raw_data[(3 * (y * width)) + (3 * x) + 2], fout); // r
31 }
32 }
33
34 fclose(fout);
35}
36} // namespace
diff --git a/src/video_core/utils.h b/src/video_core/utils.h
index 4fa60a10e..7ce83a055 100644
--- a/src/video_core/utils.h
+++ b/src/video_core/utils.h
@@ -4,37 +4,10 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <string>
8
9#include "common/common_types.h" 7#include "common/common_types.h"
10 8
11namespace VideoCore { 9namespace VideoCore {
12 10
13/// Structure for the TGA texture format (for dumping)
14struct TGAHeader {
15 char idlength;
16 char colormaptype;
17 char datatypecode;
18 short int colormaporigin;
19 short int colormaplength;
20 short int x_origin;
21 short int y_origin;
22 short width;
23 short height;
24 char bitsperpixel;
25 char imagedescriptor;
26};
27
28/**
29 * Dumps a texture to TGA
30 * @param filename String filename to dump texture to
31 * @param width Width of texture in pixels
32 * @param height Height of texture in pixels
33 * @param raw_data Raw RGBA8 texture data to dump
34 * @todo This should be moved to some general purpose/common code
35 */
36void DumpTGA(std::string filename, short width, short height, u8* raw_data);
37
38/** 11/**
39 * Interleave the lower 3 bits of each coordinate to get the intra-block offsets, which are 12 * Interleave the lower 3 bits of each coordinate to get the intra-block offsets, which are
40 * arranged in a Z-order curve. More details on the bit manipulation at: 13 * arranged in a Z-order curve. More details on the bit manipulation at:
diff --git a/src/video_core/vertex_loader.cpp b/src/video_core/vertex_loader.cpp
new file mode 100644
index 000000000..21ae52949
--- /dev/null
+++ b/src/video_core/vertex_loader.cpp
@@ -0,0 +1,140 @@
1#include <memory>
2
3#include <boost/range/algorithm/fill.hpp>
4
5#include "common/assert.h"
6#include "common/alignment.h"
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "common/logging/log.h"
10#include "common/vector_math.h"
11
12#include "core/memory.h"
13
14#include "video_core/debug_utils/debug_utils.h"
15#include "video_core/pica.h"
16#include "video_core/pica_state.h"
17#include "video_core/pica_types.h"
18#include "video_core/shader/shader.h"
19#include "video_core/vertex_loader.h"
20
21namespace Pica {
22
23void VertexLoader::Setup(const Pica::Regs& regs) {
24 const auto& attribute_config = regs.vertex_attributes;
25 num_total_attributes = attribute_config.GetNumTotalAttributes();
26
27 boost::fill(vertex_attribute_sources, 0xdeadbeef);
28
29 for (int i = 0; i < 16; i++) {
30 vertex_attribute_is_default[i] = attribute_config.IsDefaultAttribute(i);
31 }
32
33 // Setup attribute data from loaders
34 for (int loader = 0; loader < 12; ++loader) {
35 const auto& loader_config = attribute_config.attribute_loaders[loader];
36
37 u32 offset = 0;
38
39 // TODO: What happens if a loader overwrites a previous one's data?
40 for (unsigned component = 0; component < loader_config.component_count; ++component) {
41 if (component >= 12) {
42 LOG_ERROR(HW_GPU, "Overflow in the vertex attribute loader %u trying to load component %u", loader, component);
43 continue;
44 }
45
46 u32 attribute_index = loader_config.GetComponent(component);
47 if (attribute_index < 12) {
48 offset = Common::AlignUp(offset, attribute_config.GetElementSizeInBytes(attribute_index));
49 vertex_attribute_sources[attribute_index] = loader_config.data_offset + offset;
50 vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count);
51 vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index);
52 vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index);
53 offset += attribute_config.GetStride(attribute_index);
54 } else if (attribute_index < 16) {
55 // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively
56 offset = Common::AlignUp(offset, 4);
57 offset += (attribute_index - 11) * 4;
58 } else {
59 UNREACHABLE(); // This is truly unreachable due to the number of bits for each component
60 }
61 }
62 }
63}
64
65void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, DebugUtils::MemoryAccessTracker& memory_accesses) {
66 for (int i = 0; i < num_total_attributes; ++i) {
67 if (vertex_attribute_elements[i] != 0) {
68 // Load per-vertex data from the loader arrays
69 u32 source_addr = base_address + vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex;
70
71 if (g_debug_context && Pica::g_debug_context->recorder) {
72 memory_accesses.AddAccess(source_addr, vertex_attribute_elements[i] * (
73 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4
74 : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1));
75 }
76
77 switch (vertex_attribute_formats[i]) {
78 case Regs::VertexAttributeFormat::BYTE:
79 {
80 const s8* srcdata = reinterpret_cast<const s8*>(Memory::GetPhysicalPointer(source_addr));
81 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
82 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
83 }
84 break;
85 }
86 case Regs::VertexAttributeFormat::UBYTE:
87 {
88 const u8* srcdata = reinterpret_cast<const u8*>(Memory::GetPhysicalPointer(source_addr));
89 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
90 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
91 }
92 break;
93 }
94 case Regs::VertexAttributeFormat::SHORT:
95 {
96 const s16* srcdata = reinterpret_cast<const s16*>(Memory::GetPhysicalPointer(source_addr));
97 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
98 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
99 }
100 break;
101 }
102 case Regs::VertexAttributeFormat::FLOAT:
103 {
104 const float* srcdata = reinterpret_cast<const float*>(Memory::GetPhysicalPointer(source_addr));
105 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
106 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
107 }
108 break;
109 }
110 }
111
112 // Default attribute values set if array elements have < 4 components. This
113 // is *not* carried over from the default attribute settings even if they're
114 // enabled for this attribute.
115 for (unsigned int comp = vertex_attribute_elements[i]; comp < 4; ++comp) {
116 input.attr[i][comp] = comp == 3 ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f);
117 }
118
119 LOG_TRACE(HW_GPU, "Loaded %d components of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f %f %f %f",
120 vertex_attribute_elements[i], i, vertex, index,
121 base_address,
122 vertex_attribute_sources[i],
123 vertex_attribute_strides[i] * vertex,
124 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
125 } else if (vertex_attribute_is_default[i]) {
126 // Load the default attribute if we're configured to do so
127 input.attr[i] = g_state.vs.default_attributes[i];
128 LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)",
129 i, vertex, index,
130 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(),
131 input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
132 } else {
133 // TODO(yuriks): In this case, no data gets loaded and the vertex
134 // remains with the last value it had. This isn't currently maintained
135 // as global state, however, and so won't work in Citra yet.
136 }
137 }
138}
139
140} // namespace Pica
diff --git a/src/video_core/vertex_loader.h b/src/video_core/vertex_loader.h
new file mode 100644
index 000000000..becf5a403
--- /dev/null
+++ b/src/video_core/vertex_loader.h
@@ -0,0 +1,33 @@
1#pragma once
2
3#include "common/common_types.h"
4
5#include "video_core/pica.h"
6
7namespace Pica {
8
9namespace DebugUtils {
10class MemoryAccessTracker;
11}
12
13namespace Shader {
14class InputVertex;
15}
16
17class VertexLoader {
18public:
19 void Setup(const Pica::Regs& regs);
20 void LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, DebugUtils::MemoryAccessTracker& memory_accesses);
21
22 int GetNumTotalAttributes() const { return num_total_attributes; }
23
24private:
25 u32 vertex_attribute_sources[16];
26 u32 vertex_attribute_strides[16] = {};
27 Regs::VertexAttributeFormat vertex_attribute_formats[16] = {};
28 u32 vertex_attribute_elements[16] = {};
29 bool vertex_attribute_is_default[16];
30 int num_total_attributes;
31};
32
33} // namespace Pica
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index 256899c89..c9975876d 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -4,12 +4,8 @@
4 4
5#include <memory> 5#include <memory>
6 6
7#include "common/emu_window.h"
8#include "common/logging/log.h" 7#include "common/logging/log.h"
9 8
10#include "core/core.h"
11#include "core/settings.h"
12
13#include "video_core/pica.h" 9#include "video_core/pica.h"
14#include "video_core/renderer_base.h" 10#include "video_core/renderer_base.h"
15#include "video_core/video_core.h" 11#include "video_core/video_core.h"
@@ -25,6 +21,7 @@ std::unique_ptr<RendererBase> g_renderer; ///< Renderer plugin
25 21
26std::atomic<bool> g_hw_renderer_enabled; 22std::atomic<bool> g_hw_renderer_enabled;
27std::atomic<bool> g_shader_jit_enabled; 23std::atomic<bool> g_shader_jit_enabled;
24std::atomic<bool> g_scaled_resolution_enabled;
28 25
29/// Initialize the video core 26/// Initialize the video core
30bool Init(EmuWindow* emu_window) { 27bool Init(EmuWindow* emu_window) {
diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h
index bca67fb8c..30267489e 100644
--- a/src/video_core/video_core.h
+++ b/src/video_core/video_core.h
@@ -36,6 +36,7 @@ extern EmuWindow* g_emu_window; ///< Emu window
36// TODO: Wrap these in a user settings struct along with any other graphics settings (often set from qt ui) 36// TODO: Wrap these in a user settings struct along with any other graphics settings (often set from qt ui)
37extern std::atomic<bool> g_hw_renderer_enabled; 37extern std::atomic<bool> g_hw_renderer_enabled;
38extern std::atomic<bool> g_shader_jit_enabled; 38extern std::atomic<bool> g_shader_jit_enabled;
39extern std::atomic<bool> g_scaled_resolution_enabled;
39 40
40/// Start the video core 41/// Start the video core
41void Start(); 42void Start();