summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitmodules3
-rwxr-xr-x.travis-deps.sh5
-rw-r--r--CMakeLists.txt17
m---------externals/boost0
-rw-r--r--externals/microprofile/microprofileui.h7
m---------externals/soundtouch0
-rw-r--r--src/audio_core/CMakeLists.txt7
-rw-r--r--src/audio_core/audio_core.cpp13
-rw-r--r--src/audio_core/audio_core.h2
-rw-r--r--src/audio_core/hle/common.h9
-rw-r--r--src/audio_core/hle/dsp.cpp44
-rw-r--r--src/audio_core/hle/dsp.h21
-rw-r--r--src/audio_core/hle/pipe.cpp32
-rw-r--r--src/audio_core/hle/pipe.h4
-rw-r--r--src/audio_core/interpolate.cpp85
-rw-r--r--src/audio_core/interpolate.h41
-rw-r--r--src/citra/CMakeLists.txt2
-rw-r--r--src/citra/config.cpp1
-rw-r--r--src/citra/default_ini.h4
-rw-r--r--src/citra_qt/CMakeLists.txt2
-rw-r--r--src/citra_qt/bootmanager.cpp2
-rw-r--r--src/citra_qt/config.cpp2
-rw-r--r--src/citra_qt/configure_general.cpp2
-rw-r--r--src/citra_qt/configure_general.ui7
-rw-r--r--src/citra_qt/debugger/graphics_breakpoints.cpp4
-rw-r--r--src/citra_qt/debugger/graphics_framebuffer.cpp6
-rw-r--r--src/citra_qt/debugger/profiler.cpp39
-rw-r--r--src/citra_qt/debugger/profiler.h3
-rw-r--r--src/citra_qt/main.cpp9
-rw-r--r--src/common/CMakeLists.txt1
-rw-r--r--src/common/assert.h2
-rw-r--r--src/common/file_util.h4
-rw-r--r--src/common/microprofile.h4
-rw-r--r--src/common/microprofileui.h3
-rw-r--r--src/common/profiler.cpp82
-rw-r--r--src/common/profiler.h152
-rw-r--r--src/common/profiler_reporting.h27
-rw-r--r--src/core/arm/dyncom/arm_dyncom_interpreter.cpp7
-rw-r--r--src/core/gdbstub/gdbstub.cpp4
-rw-r--r--src/core/hle/result.h1
-rw-r--r--src/core/hle/service/am/am.cpp2
-rw-r--r--src/core/hle/service/dsp_dsp.cpp191
-rw-r--r--src/core/hle/service/dsp_dsp.h19
-rw-r--r--src/core/hle/service/fs/archive.cpp1
-rw-r--r--src/core/hle/service/fs/fs_user.cpp2
-rw-r--r--src/core/hle/service/gsp_gpu.cpp70
-rw-r--r--src/core/hle/service/y2r_u.cpp490
-rw-r--r--src/core/hle/service/y2r_u.h20
-rw-r--r--src/core/hle/svc.cpp4
-rw-r--r--src/core/hw/gpu.cpp327
-rw-r--r--src/core/hw/gpu.h4
-rw-r--r--src/core/loader/ncch.cpp2
-rw-r--r--src/core/memory.cpp140
-rw-r--r--src/core/memory.h16
-rw-r--r--src/core/settings.cpp2
-rw-r--r--src/core/settings.h1
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/command_processor.cpp133
-rw-r--r--src/video_core/debug_utils/debug_utils.cpp9
-rw-r--r--src/video_core/debug_utils/debug_utils.h46
-rw-r--r--src/video_core/pica.h9
-rw-r--r--src/video_core/rasterizer.cpp3
-rw-r--r--src/video_core/rasterizer_interface.h31
-rw-r--r--src/video_core/renderer_base.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp842
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h94
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp699
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h209
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp63
-rw-r--r--src/video_core/renderer_opengl/gl_state.h27
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp128
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h44
-rw-r--r--src/video_core/shader/shader.cpp3
-rw-r--r--src/video_core/shader/shader.h2
-rw-r--r--src/video_core/shader/shader_jit_x64.cpp6
-rw-r--r--src/video_core/swrasterizer.h6
-rw-r--r--src/video_core/vertex_loader.cpp140
-rw-r--r--src/video_core/vertex_loader.h28
-rw-r--r--src/video_core/video_core.cpp1
-rw-r--r--src/video_core/video_core.h1
81 files changed, 2892 insertions, 1590 deletions
diff --git a/.gitmodules b/.gitmodules
index 598e4c64d..059512902 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -7,3 +7,6 @@
7[submodule "nihstro"] 7[submodule "nihstro"]
8 path = externals/nihstro 8 path = externals/nihstro
9 url = https://github.com/neobrain/nihstro.git 9 url = https://github.com/neobrain/nihstro.git
10[submodule "soundtouch"]
11 path = externals/soundtouch
12 url = https://github.com/citra-emu/soundtouch.git
diff --git a/.travis-deps.sh b/.travis-deps.sh
index c7bb7e785..4a79feb70 100755
--- a/.travis-deps.sh
+++ b/.travis-deps.sh
@@ -9,7 +9,7 @@ if [ "$TRAVIS_OS_NAME" = "linux" -o -z "$TRAVIS_OS_NAME" ]; then
9 export CXX=g++-5 9 export CXX=g++-5
10 mkdir -p $HOME/.local 10 mkdir -p $HOME/.local
11 11
12 curl -L http://www.cmake.org/files/v2.8/cmake-2.8.11-Linux-i386.tar.gz \ 12 curl -L http://www.cmake.org/files/v3.1/cmake-3.1.0-Linux-i386.tar.gz \
13 | tar -xz -C $HOME/.local --strip-components=1 13 | tar -xz -C $HOME/.local --strip-components=1
14 14
15 ( 15 (
@@ -20,6 +20,7 @@ if [ "$TRAVIS_OS_NAME" = "linux" -o -z "$TRAVIS_OS_NAME" ]; then
20 ) 20 )
21elif [ "$TRAVIS_OS_NAME" = "osx" ]; then 21elif [ "$TRAVIS_OS_NAME" = "osx" ]; then
22 brew update > /dev/null # silence the very verbose output 22 brew update > /dev/null # silence the very verbose output
23 brew install qt5 sdl2 dylibbundler 23 brew unlink cmake
24 brew install cmake31 qt5 sdl2 dylibbundler
24 gem install xcpretty 25 gem install xcpretty
25fi 26fi
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3a0a161e7..d628ecc50 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,6 @@
1# CMake 2.8.11 required for Qt5 settings to be applied automatically on 1# CMake 3.1 required for Qt5 settings to be applied automatically on
2# dependent libraries. 2# dependent libraries and IMPORTED targets.
3cmake_minimum_required(VERSION 2.8.11) 3cmake_minimum_required(VERSION 3.1)
4 4
5function(download_bundled_external remote_path lib_name prefix_var) 5function(download_bundled_external remote_path lib_name prefix_var)
6 set(prefix "${CMAKE_BINARY_DIR}/externals/${lib_name}") 6 set(prefix "${CMAKE_BINARY_DIR}/externals/${lib_name}")
@@ -65,8 +65,8 @@ endif()
65message(STATUS "Target architecture: ${ARCHITECTURE}") 65message(STATUS "Target architecture: ${ARCHITECTURE}")
66 66
67if (NOT MSVC) 67if (NOT MSVC)
68 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++1y -Wno-attributes -pthread") 68 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++1y -Wno-attributes")
69 set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread") 69 set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
70 70
71 if (ARCHITECTURE_x86_64) 71 if (ARCHITECTURE_x86_64)
72 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1") 72 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1")
@@ -135,6 +135,10 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/externals/cmake-modules")
135find_package(OpenGL REQUIRED) 135find_package(OpenGL REQUIRED)
136include_directories(${OPENGL_INCLUDE_DIR}) 136include_directories(${OPENGL_INCLUDE_DIR})
137 137
138# Prefer the -pthread flag on Linux.
139set (THREADS_PREFER_PTHREAD_FLAG ON)
140find_package(Threads REQUIRED)
141
138if (ENABLE_SDL2) 142if (ENABLE_SDL2)
139 if (CITRA_USE_BUNDLED_SDL2) 143 if (CITRA_USE_BUNDLED_SDL2)
140 # Detect toolchain and platform 144 # Detect toolchain and platform
@@ -245,6 +249,9 @@ if(ENABLE_QT)
245 include_directories(externals/qhexedit) 249 include_directories(externals/qhexedit)
246 add_subdirectory(externals/qhexedit) 250 add_subdirectory(externals/qhexedit)
247endif() 251endif()
252
253add_subdirectory(externals/soundtouch)
254
248add_subdirectory(src) 255add_subdirectory(src)
249 256
250# Install freedesktop.org metadata files, following those specifications: 257# Install freedesktop.org metadata files, following those specifications:
diff --git a/externals/boost b/externals/boost
Subproject d81b9269900ae183d0dc98403eea4c971590a80 Subproject 2dcb9d979665b6aabb1635c617973e02914e60e
diff --git a/externals/microprofile/microprofileui.h b/externals/microprofile/microprofileui.h
index eac1119a4..45bec8af6 100644
--- a/externals/microprofile/microprofileui.h
+++ b/externals/microprofile/microprofileui.h
@@ -879,7 +879,7 @@ void MicroProfileDrawDetailedBars(uint32_t nWidth, uint32_t nHeight, int nBaseY,
879 static int64_t nRefCpu = 0, nRefGpu = 0; 879 static int64_t nRefCpu = 0, nRefGpu = 0;
880 if(MicroProfileGetGpuTickReference(&nTickReferenceCpu, &nTickReferenceGpu)) 880 if(MicroProfileGetGpuTickReference(&nTickReferenceCpu, &nTickReferenceGpu))
881 { 881 {
882 if(0 == nRefCpu || abs(nRefCpu-nBaseTicksCpu) > abs(nTickReferenceCpu-nBaseTicksCpu)) 882 if(0 == nRefCpu || std::abs(nRefCpu-nBaseTicksCpu) > std::abs(nTickReferenceCpu-nBaseTicksCpu))
883 { 883 {
884 nRefCpu = nTickReferenceCpu; 884 nRefCpu = nTickReferenceCpu;
885 nRefGpu = nTickReferenceGpu; 885 nRefGpu = nTickReferenceGpu;
@@ -1230,7 +1230,12 @@ void MicroProfileDrawDetailedBars(uint32_t nWidth, uint32_t nHeight, int nBaseY,
1230 char ThreadName[MicroProfileThreadLog::THREAD_MAX_LEN + 16]; 1230 char ThreadName[MicroProfileThreadLog::THREAD_MAX_LEN + 16];
1231 const char* cLocal = MicroProfileIsLocalThread(nThreadId) ? "*": " "; 1231 const char* cLocal = MicroProfileIsLocalThread(nThreadId) ? "*": " ";
1232 1232
1233#if defined(WIN32)
1234 // nThreadId is 32-bit on Windows
1233 int nStrLen = snprintf(ThreadName, sizeof(ThreadName)-1, "%04x: %s%s", nThreadId, cLocal, i < nNumThreadsBase ? &S.Pool[i]->ThreadName[0] : MICROPROFILE_THREAD_NAME_FROM_ID(nThreadId) ); 1235 int nStrLen = snprintf(ThreadName, sizeof(ThreadName)-1, "%04x: %s%s", nThreadId, cLocal, i < nNumThreadsBase ? &S.Pool[i]->ThreadName[0] : MICROPROFILE_THREAD_NAME_FROM_ID(nThreadId) );
1236#else
1237 int nStrLen = snprintf(ThreadName, sizeof(ThreadName)-1, "%04llx: %s%s", nThreadId, cLocal, i < nNumThreadsBase ? &S.Pool[i]->ThreadName[0] : MICROPROFILE_THREAD_NAME_FROM_ID(nThreadId) );
1238#endif
1234 uint32_t nThreadColor = -1; 1239 uint32_t nThreadColor = -1;
1235 if(nThreadId == nContextSwitchHoverThreadAfter || nThreadId == nContextSwitchHoverThreadBefore) 1240 if(nThreadId == nContextSwitchHoverThreadAfter || nThreadId == nContextSwitchHoverThreadBefore)
1236 nThreadColor = UI.nHoverColorShared|0x906060; 1241 nThreadColor = UI.nHoverColorShared|0x906060;
diff --git a/externals/soundtouch b/externals/soundtouch
new file mode 160000
Subproject 5274ec4dec498bd88ccbcd28862a0f78a3b95ef
diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt
index 869da5e83..a965af291 100644
--- a/src/audio_core/CMakeLists.txt
+++ b/src/audio_core/CMakeLists.txt
@@ -4,6 +4,7 @@ set(SRCS
4 hle/dsp.cpp 4 hle/dsp.cpp
5 hle/filter.cpp 5 hle/filter.cpp
6 hle/pipe.cpp 6 hle/pipe.cpp
7 interpolate.cpp
7 ) 8 )
8 9
9set(HEADERS 10set(HEADERS
@@ -13,9 +14,13 @@ set(HEADERS
13 hle/dsp.h 14 hle/dsp.h
14 hle/filter.h 15 hle/filter.h
15 hle/pipe.h 16 hle/pipe.h
17 interpolate.h
16 sink.h 18 sink.h
17 ) 19 )
18 20
21include_directories(../../externals/soundtouch/include)
22
19create_directory_groups(${SRCS} ${HEADERS}) 23create_directory_groups(${SRCS} ${HEADERS})
20 24
21add_library(audio_core STATIC ${SRCS} ${HEADERS}) \ No newline at end of file 25add_library(audio_core STATIC ${SRCS} ${HEADERS})
26target_link_libraries(audio_core SoundTouch)
diff --git a/src/audio_core/audio_core.cpp b/src/audio_core/audio_core.cpp
index 894f46990..cbe869a04 100644
--- a/src/audio_core/audio_core.cpp
+++ b/src/audio_core/audio_core.cpp
@@ -4,6 +4,7 @@
4 4
5#include "audio_core/audio_core.h" 5#include "audio_core/audio_core.h"
6#include "audio_core/hle/dsp.h" 6#include "audio_core/hle/dsp.h"
7#include "audio_core/hle/pipe.h"
7 8
8#include "core/core_timing.h" 9#include "core/core_timing.h"
9#include "core/hle/kernel/vm_manager.h" 10#include "core/hle/kernel/vm_manager.h"
@@ -17,10 +18,10 @@ static constexpr u64 audio_frame_ticks = 1310252ull; ///< Units: ARM11 cycles
17 18
18static void AudioTickCallback(u64 /*userdata*/, int cycles_late) { 19static void AudioTickCallback(u64 /*userdata*/, int cycles_late) {
19 if (DSP::HLE::Tick()) { 20 if (DSP::HLE::Tick()) {
20 // HACK: We're not signaling the interrups when they should be, but just firing them all off together. 21 // TODO(merry): Signal all the other interrupts as appropriate.
21 // It should be only (interrupt_id = 2, channel_id = 2) that's signalled here. 22 DSP_DSP::SignalPipeInterrupt(DSP::HLE::DspPipe::Audio);
22 // TODO(merry): Understand when the other interrupts are fired. 23 // HACK(merry): Added to prevent regressions. Will remove soon.
23 DSP_DSP::SignalAllInterrupts(); 24 DSP_DSP::SignalPipeInterrupt(DSP::HLE::DspPipe::Binary);
24 } 25 }
25 26
26 // Reschedule recurrent event 27 // Reschedule recurrent event
@@ -37,10 +38,10 @@ void Init() {
37 38
38/// Add DSP address spaces to Process's address space. 39/// Add DSP address spaces to Process's address space.
39void AddAddressSpace(Kernel::VMManager& address_space) { 40void AddAddressSpace(Kernel::VMManager& address_space) {
40 auto r0_vma = address_space.MapBackingMemory(DSP::HLE::region0_base, reinterpret_cast<u8*>(&DSP::HLE::g_region0), sizeof(DSP::HLE::SharedMemory), Kernel::MemoryState::IO).MoveFrom(); 41 auto r0_vma = address_space.MapBackingMemory(DSP::HLE::region0_base, reinterpret_cast<u8*>(&DSP::HLE::g_regions[0]), sizeof(DSP::HLE::SharedMemory), Kernel::MemoryState::IO).MoveFrom();
41 address_space.Reprotect(r0_vma, Kernel::VMAPermission::ReadWrite); 42 address_space.Reprotect(r0_vma, Kernel::VMAPermission::ReadWrite);
42 43
43 auto r1_vma = address_space.MapBackingMemory(DSP::HLE::region1_base, reinterpret_cast<u8*>(&DSP::HLE::g_region1), sizeof(DSP::HLE::SharedMemory), Kernel::MemoryState::IO).MoveFrom(); 44 auto r1_vma = address_space.MapBackingMemory(DSP::HLE::region1_base, reinterpret_cast<u8*>(&DSP::HLE::g_regions[1]), sizeof(DSP::HLE::SharedMemory), Kernel::MemoryState::IO).MoveFrom();
44 address_space.Reprotect(r1_vma, Kernel::VMAPermission::ReadWrite); 45 address_space.Reprotect(r1_vma, Kernel::VMAPermission::ReadWrite);
45} 46}
46 47
diff --git a/src/audio_core/audio_core.h b/src/audio_core/audio_core.h
index 64c330914..b349895ea 100644
--- a/src/audio_core/audio_core.h
+++ b/src/audio_core/audio_core.h
@@ -10,8 +10,6 @@ class VMManager;
10 10
11namespace AudioCore { 11namespace AudioCore {
12 12
13constexpr int num_sources = 24;
14constexpr int samples_per_frame = 160; ///< Samples per audio frame at native sample rate
15constexpr int native_sample_rate = 32728; ///< 32kHz 13constexpr int native_sample_rate = 32728; ///< 32kHz
16 14
17/// Initialise Audio Core 15/// Initialise Audio Core
diff --git a/src/audio_core/hle/common.h b/src/audio_core/hle/common.h
index 37d441eb2..7910f42ae 100644
--- a/src/audio_core/hle/common.h
+++ b/src/audio_core/hle/common.h
@@ -7,18 +7,19 @@
7#include <algorithm> 7#include <algorithm>
8#include <array> 8#include <array>
9 9
10#include "audio_core/audio_core.h"
11
12#include "common/common_types.h" 10#include "common/common_types.h"
13 11
14namespace DSP { 12namespace DSP {
15namespace HLE { 13namespace HLE {
16 14
15constexpr int num_sources = 24;
16constexpr int samples_per_frame = 160; ///< Samples per audio frame at native sample rate
17
17/// The final output to the speakers is stereo. Preprocessing output in Source is also stereo. 18/// The final output to the speakers is stereo. Preprocessing output in Source is also stereo.
18using StereoFrame16 = std::array<std::array<s16, 2>, AudioCore::samples_per_frame>; 19using StereoFrame16 = std::array<std::array<s16, 2>, samples_per_frame>;
19 20
20/// The DSP is quadraphonic internally. 21/// The DSP is quadraphonic internally.
21using QuadFrame32 = std::array<std::array<s32, 4>, AudioCore::samples_per_frame>; 22using QuadFrame32 = std::array<std::array<s32, 4>, samples_per_frame>;
22 23
23/** 24/**
24 * This performs the filter operation defined by FilterT::ProcessSample on the frame in-place. 25 * This performs the filter operation defined by FilterT::ProcessSample on the frame in-place.
diff --git a/src/audio_core/hle/dsp.cpp b/src/audio_core/hle/dsp.cpp
index c89356edc..5759a5b9e 100644
--- a/src/audio_core/hle/dsp.cpp
+++ b/src/audio_core/hle/dsp.cpp
@@ -8,8 +8,32 @@
8namespace DSP { 8namespace DSP {
9namespace HLE { 9namespace HLE {
10 10
11SharedMemory g_region0; 11std::array<SharedMemory, 2> g_regions;
12SharedMemory g_region1; 12
13static size_t CurrentRegionIndex() {
14 // The region with the higher frame counter is chosen unless there is wraparound.
15 // This function only returns a 0 or 1.
16
17 if (g_regions[0].frame_counter == 0xFFFFu && g_regions[1].frame_counter != 0xFFFEu) {
18 // Wraparound has occured.
19 return 1;
20 }
21
22 if (g_regions[1].frame_counter == 0xFFFFu && g_regions[0].frame_counter != 0xFFFEu) {
23 // Wraparound has occured.
24 return 0;
25 }
26
27 return (g_regions[0].frame_counter > g_regions[1].frame_counter) ? 0 : 1;
28}
29
30static SharedMemory& ReadRegion() {
31 return g_regions[CurrentRegionIndex()];
32}
33
34static SharedMemory& WriteRegion() {
35 return g_regions[1 - CurrentRegionIndex()];
36}
13 37
14void Init() { 38void Init() {
15 DSP::HLE::ResetPipes(); 39 DSP::HLE::ResetPipes();
@@ -22,21 +46,5 @@ bool Tick() {
22 return true; 46 return true;
23} 47}
24 48
25SharedMemory& CurrentRegion() {
26 // The region with the higher frame counter is chosen unless there is wraparound.
27
28 if (g_region0.frame_counter == 0xFFFFu && g_region1.frame_counter != 0xFFFEu) {
29 // Wraparound has occured.
30 return g_region1;
31 }
32
33 if (g_region1.frame_counter == 0xFFFFu && g_region0.frame_counter != 0xFFFEu) {
34 // Wraparound has occured.
35 return g_region0;
36 }
37
38 return (g_region0.frame_counter > g_region1.frame_counter) ? g_region0 : g_region1;
39}
40
41} // namespace HLE 49} // namespace HLE
42} // namespace DSP 50} // namespace DSP
diff --git a/src/audio_core/hle/dsp.h b/src/audio_core/hle/dsp.h
index c15ef0b7a..f0f125284 100644
--- a/src/audio_core/hle/dsp.h
+++ b/src/audio_core/hle/dsp.h
@@ -4,10 +4,11 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
7#include <cstddef> 8#include <cstddef>
8#include <type_traits> 9#include <type_traits>
9 10
10#include "audio_core/audio_core.h" 11#include "audio_core/hle/common.h"
11 12
12#include "common/bit_field.h" 13#include "common/bit_field.h"
13#include "common/common_funcs.h" 14#include "common/common_funcs.h"
@@ -30,10 +31,9 @@ namespace HLE {
30struct SharedMemory; 31struct SharedMemory;
31 32
32constexpr VAddr region0_base = 0x1FF50000; 33constexpr VAddr region0_base = 0x1FF50000;
33extern SharedMemory g_region0;
34
35constexpr VAddr region1_base = 0x1FF70000; 34constexpr VAddr region1_base = 0x1FF70000;
36extern SharedMemory g_region1; 35
36extern std::array<SharedMemory, 2> g_regions;
37 37
38/** 38/**
39 * The DSP is native 16-bit. The DSP also appears to be big-endian. When reading 32-bit numbers from 39 * The DSP is native 16-bit. The DSP also appears to be big-endian. When reading 32-bit numbers from
@@ -305,7 +305,7 @@ struct SourceConfiguration {
305 u16_le buffer_id; 305 u16_le buffer_id;
306 }; 306 };
307 307
308 Configuration config[AudioCore::num_sources]; 308 Configuration config[num_sources];
309}; 309};
310ASSERT_DSP_STRUCT(SourceConfiguration::Configuration, 192); 310ASSERT_DSP_STRUCT(SourceConfiguration::Configuration, 192);
311ASSERT_DSP_STRUCT(SourceConfiguration::Configuration::Buffer, 20); 311ASSERT_DSP_STRUCT(SourceConfiguration::Configuration::Buffer, 20);
@@ -320,7 +320,7 @@ struct SourceStatus {
320 INSERT_PADDING_DSPWORDS(1); 320 INSERT_PADDING_DSPWORDS(1);
321 }; 321 };
322 322
323 Status status[AudioCore::num_sources]; 323 Status status[num_sources];
324}; 324};
325ASSERT_DSP_STRUCT(SourceStatus::Status, 12); 325ASSERT_DSP_STRUCT(SourceStatus::Status, 12);
326 326
@@ -413,7 +413,7 @@ ASSERT_DSP_STRUCT(DspConfiguration::ReverbEffect, 52);
413struct AdpcmCoefficients { 413struct AdpcmCoefficients {
414 /// Coefficients are signed fixed point with 11 fractional bits. 414 /// Coefficients are signed fixed point with 11 fractional bits.
415 /// Each source has 16 coefficients associated with it. 415 /// Each source has 16 coefficients associated with it.
416 s16_le coeff[AudioCore::num_sources][16]; 416 s16_le coeff[num_sources][16];
417}; 417};
418ASSERT_DSP_STRUCT(AdpcmCoefficients, 768); 418ASSERT_DSP_STRUCT(AdpcmCoefficients, 768);
419 419
@@ -427,7 +427,7 @@ ASSERT_DSP_STRUCT(DspStatus, 32);
427/// Final mixed output in PCM16 stereo format, what you hear out of the speakers. 427/// Final mixed output in PCM16 stereo format, what you hear out of the speakers.
428/// When the application writes to this region it has no effect. 428/// When the application writes to this region it has no effect.
429struct FinalMixSamples { 429struct FinalMixSamples {
430 s16_le pcm16[2 * AudioCore::samples_per_frame]; 430 s16_le pcm16[2 * samples_per_frame];
431}; 431};
432ASSERT_DSP_STRUCT(FinalMixSamples, 640); 432ASSERT_DSP_STRUCT(FinalMixSamples, 640);
433 433
@@ -437,7 +437,7 @@ ASSERT_DSP_STRUCT(FinalMixSamples, 640);
437/// Values that exceed s16 range will be clipped by the DSP after further processing. 437/// Values that exceed s16 range will be clipped by the DSP after further processing.
438struct IntermediateMixSamples { 438struct IntermediateMixSamples {
439 struct Samples { 439 struct Samples {
440 s32_le pcm32[4][AudioCore::samples_per_frame]; ///< Little-endian as opposed to DSP middle-endian. 440 s32_le pcm32[4][samples_per_frame]; ///< Little-endian as opposed to DSP middle-endian.
441 }; 441 };
442 442
443 Samples mix1; 443 Samples mix1;
@@ -535,8 +535,5 @@ void Shutdown();
535 */ 535 */
536bool Tick(); 536bool Tick();
537 537
538/// Returns a mutable reference to the current region. Current region is selected based on the frame counter.
539SharedMemory& CurrentRegion();
540
541} // namespace HLE 538} // namespace HLE
542} // namespace DSP 539} // namespace DSP
diff --git a/src/audio_core/hle/pipe.cpp b/src/audio_core/hle/pipe.cpp
index 9381883b4..03280780f 100644
--- a/src/audio_core/hle/pipe.cpp
+++ b/src/audio_core/hle/pipe.cpp
@@ -12,12 +12,14 @@
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "common/logging/log.h" 13#include "common/logging/log.h"
14 14
15#include "core/hle/service/dsp_dsp.h"
16
15namespace DSP { 17namespace DSP {
16namespace HLE { 18namespace HLE {
17 19
18static DspState dsp_state = DspState::Off; 20static DspState dsp_state = DspState::Off;
19 21
20static std::array<std::vector<u8>, static_cast<size_t>(DspPipe::DspPipe_MAX)> pipe_data; 22static std::array<std::vector<u8>, NUM_DSP_PIPE> pipe_data;
21 23
22void ResetPipes() { 24void ResetPipes() {
23 for (auto& data : pipe_data) { 25 for (auto& data : pipe_data) {
@@ -27,16 +29,18 @@ void ResetPipes() {
27} 29}
28 30
29std::vector<u8> PipeRead(DspPipe pipe_number, u32 length) { 31std::vector<u8> PipeRead(DspPipe pipe_number, u32 length) {
30 if (pipe_number >= DspPipe::DspPipe_MAX) { 32 const size_t pipe_index = static_cast<size_t>(pipe_number);
31 LOG_ERROR(Audio_DSP, "pipe_number = %u invalid", pipe_number); 33
34 if (pipe_index >= NUM_DSP_PIPE) {
35 LOG_ERROR(Audio_DSP, "pipe_number = %zu invalid", pipe_index);
32 return {}; 36 return {};
33 } 37 }
34 38
35 std::vector<u8>& data = pipe_data[static_cast<size_t>(pipe_number)]; 39 std::vector<u8>& data = pipe_data[pipe_index];
36 40
37 if (length > data.size()) { 41 if (length > data.size()) {
38 LOG_WARNING(Audio_DSP, "pipe_number = %u is out of data, application requested read of %u but %zu remain", 42 LOG_WARNING(Audio_DSP, "pipe_number = %zu is out of data, application requested read of %u but %zu remain",
39 pipe_number, length, data.size()); 43 pipe_index, length, data.size());
40 length = data.size(); 44 length = data.size();
41 } 45 }
42 46
@@ -49,16 +53,20 @@ std::vector<u8> PipeRead(DspPipe pipe_number, u32 length) {
49} 53}
50 54
51size_t GetPipeReadableSize(DspPipe pipe_number) { 55size_t GetPipeReadableSize(DspPipe pipe_number) {
52 if (pipe_number >= DspPipe::DspPipe_MAX) { 56 const size_t pipe_index = static_cast<size_t>(pipe_number);
53 LOG_ERROR(Audio_DSP, "pipe_number = %u invalid", pipe_number); 57
58 if (pipe_index >= NUM_DSP_PIPE) {
59 LOG_ERROR(Audio_DSP, "pipe_number = %zu invalid", pipe_index);
54 return 0; 60 return 0;
55 } 61 }
56 62
57 return pipe_data[static_cast<size_t>(pipe_number)].size(); 63 return pipe_data[pipe_index].size();
58} 64}
59 65
60static void WriteU16(DspPipe pipe_number, u16 value) { 66static void WriteU16(DspPipe pipe_number, u16 value) {
61 std::vector<u8>& data = pipe_data[static_cast<size_t>(pipe_number)]; 67 const size_t pipe_index = static_cast<size_t>(pipe_number);
68
69 std::vector<u8>& data = pipe_data.at(pipe_index);
62 // Little endian 70 // Little endian
63 data.emplace_back(value & 0xFF); 71 data.emplace_back(value & 0xFF);
64 data.emplace_back(value >> 8); 72 data.emplace_back(value >> 8);
@@ -91,6 +99,8 @@ static void AudioPipeWriteStructAddresses() {
91 for (u16 addr : struct_addresses) { 99 for (u16 addr : struct_addresses) {
92 WriteU16(DspPipe::Audio, addr); 100 WriteU16(DspPipe::Audio, addr);
93 } 101 }
102 // Signal that we have data on this pipe.
103 DSP_DSP::SignalPipeInterrupt(DspPipe::Audio);
94} 104}
95 105
96void PipeWrite(DspPipe pipe_number, const std::vector<u8>& buffer) { 106void PipeWrite(DspPipe pipe_number, const std::vector<u8>& buffer) {
@@ -145,7 +155,7 @@ void PipeWrite(DspPipe pipe_number, const std::vector<u8>& buffer) {
145 return; 155 return;
146 } 156 }
147 default: 157 default:
148 LOG_CRITICAL(Audio_DSP, "pipe_number = %u unimplemented", pipe_number); 158 LOG_CRITICAL(Audio_DSP, "pipe_number = %zu unimplemented", static_cast<size_t>(pipe_number));
149 UNIMPLEMENTED(); 159 UNIMPLEMENTED();
150 return; 160 return;
151 } 161 }
diff --git a/src/audio_core/hle/pipe.h b/src/audio_core/hle/pipe.h
index 382d35e87..64d97f8ba 100644
--- a/src/audio_core/hle/pipe.h
+++ b/src/audio_core/hle/pipe.h
@@ -19,9 +19,9 @@ enum class DspPipe {
19 Debug = 0, 19 Debug = 0,
20 Dma = 1, 20 Dma = 1,
21 Audio = 2, 21 Audio = 2,
22 Binary = 3, 22 Binary = 3
23 DspPipe_MAX
24}; 23};
24constexpr size_t NUM_DSP_PIPE = 8;
25 25
26/** 26/**
27 * Read a DSP pipe. 27 * Read a DSP pipe.
diff --git a/src/audio_core/interpolate.cpp b/src/audio_core/interpolate.cpp
new file mode 100644
index 000000000..fcd3aa066
--- /dev/null
+++ b/src/audio_core/interpolate.cpp
@@ -0,0 +1,85 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "audio_core/interpolate.h"
6
7#include "common/assert.h"
8#include "common/math_util.h"
9
10namespace AudioInterp {
11
12// Calculations are done in fixed point with 24 fractional bits.
13// (This is not verified. This was chosen for minimal error.)
14constexpr u64 scale_factor = 1 << 24;
15constexpr u64 scale_mask = scale_factor - 1;
16
17/// Here we step over the input in steps of rate_multiplier, until we consume all of the input.
18/// Three adjacent samples are passed to fn each step.
19template <typename Function>
20static StereoBuffer16 StepOverSamples(State& state, const StereoBuffer16& input, float rate_multiplier, Function fn) {
21 ASSERT(rate_multiplier > 0);
22
23 if (input.size() < 2)
24 return {};
25
26 StereoBuffer16 output;
27 output.reserve(static_cast<size_t>(input.size() / rate_multiplier));
28
29 u64 step_size = static_cast<u64>(rate_multiplier * scale_factor);
30
31 u64 fposition = 0;
32 const u64 max_fposition = input.size() * scale_factor;
33
34 while (fposition < 1 * scale_factor) {
35 u64 fraction = fposition & scale_mask;
36
37 output.push_back(fn(fraction, state.xn2, state.xn1, input[0]));
38
39 fposition += step_size;
40 }
41
42 while (fposition < 2 * scale_factor) {
43 u64 fraction = fposition & scale_mask;
44
45 output.push_back(fn(fraction, state.xn1, input[0], input[1]));
46
47 fposition += step_size;
48 }
49
50 while (fposition < max_fposition) {
51 u64 fraction = fposition & scale_mask;
52
53 size_t index = static_cast<size_t>(fposition / scale_factor);
54 output.push_back(fn(fraction, input[index - 2], input[index - 1], input[index]));
55
56 fposition += step_size;
57 }
58
59 state.xn2 = input[input.size() - 2];
60 state.xn1 = input[input.size() - 1];
61
62 return output;
63}
64
65StereoBuffer16 None(State& state, const StereoBuffer16& input, float rate_multiplier) {
66 return StepOverSamples(state, input, rate_multiplier, [](u64 fraction, const auto& x0, const auto& x1, const auto& x2) {
67 return x0;
68 });
69}
70
71StereoBuffer16 Linear(State& state, const StereoBuffer16& input, float rate_multiplier) {
72 // Note on accuracy: Some values that this produces are +/- 1 from the actual firmware.
73 return StepOverSamples(state, input, rate_multiplier, [](u64 fraction, const auto& x0, const auto& x1, const auto& x2) {
74 // This is a saturated subtraction. (Verified by black-box fuzzing.)
75 s64 delta0 = MathUtil::Clamp<s64>(x1[0] - x0[0], -32768, 32767);
76 s64 delta1 = MathUtil::Clamp<s64>(x1[1] - x0[1], -32768, 32767);
77
78 return std::array<s16, 2> {
79 static_cast<s16>(x0[0] + fraction * delta0 / scale_factor),
80 static_cast<s16>(x0[1] + fraction * delta1 / scale_factor)
81 };
82 });
83}
84
85} // namespace AudioInterp
diff --git a/src/audio_core/interpolate.h b/src/audio_core/interpolate.h
new file mode 100644
index 000000000..a4c0a453d
--- /dev/null
+++ b/src/audio_core/interpolate.h
@@ -0,0 +1,41 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <vector>
9
10#include "common/common_types.h"
11
12namespace AudioInterp {
13
14/// A variable length buffer of signed PCM16 stereo samples.
15using StereoBuffer16 = std::vector<std::array<s16, 2>>;
16
17struct State {
18 // Two historical samples.
19 std::array<s16, 2> xn1 = {}; ///< x[n-1]
20 std::array<s16, 2> xn2 = {}; ///< x[n-2]
21};
22
23/**
24 * No interpolation. This is equivalent to a zero-order hold. There is a two-sample predelay.
25 * @param input Input buffer.
26 * @param rate_multiplier Stretch factor. Must be a positive non-zero value.
27 * rate_multiplier > 1.0 performs decimation and rate_multipler < 1.0 performs upsampling.
28 * @return The resampled audio buffer.
29 */
30StereoBuffer16 None(State& state, const StereoBuffer16& input, float rate_multiplier);
31
32/**
33 * Linear interpolation. This is equivalent to a first-order hold. There is a two-sample predelay.
34 * @param input Input buffer.
35 * @param rate_multiplier Stretch factor. Must be a positive non-zero value.
36 * rate_multiplier > 1.0 performs decimation and rate_multipler < 1.0 performs upsampling.
37 * @return The resampled audio buffer.
38 */
39StereoBuffer16 Linear(State& state, const StereoBuffer16& input, float rate_multiplier);
40
41} // namespace AudioInterp
diff --git a/src/citra/CMakeLists.txt b/src/citra/CMakeLists.txt
index fa615deb9..43fa06b4e 100644
--- a/src/citra/CMakeLists.txt
+++ b/src/citra/CMakeLists.txt
@@ -21,7 +21,7 @@ target_link_libraries(citra ${SDL2_LIBRARY} ${OPENGL_gl_LIBRARY} inih glad)
21if (MSVC) 21if (MSVC)
22 target_link_libraries(citra getopt) 22 target_link_libraries(citra getopt)
23endif() 23endif()
24target_link_libraries(citra ${PLATFORM_LIBRARIES}) 24target_link_libraries(citra ${PLATFORM_LIBRARIES} Threads::Threads)
25 25
26if(${CMAKE_SYSTEM_NAME} MATCHES "Linux|FreeBSD|OpenBSD|NetBSD") 26if(${CMAKE_SYSTEM_NAME} MATCHES "Linux|FreeBSD|OpenBSD|NetBSD")
27 install(TARGETS citra RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/bin") 27 install(TARGETS citra RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/bin")
diff --git a/src/citra/config.cpp b/src/citra/config.cpp
index 6b6617352..9e2ecd307 100644
--- a/src/citra/config.cpp
+++ b/src/citra/config.cpp
@@ -65,6 +65,7 @@ void Config::ReadValues() {
65 // Renderer 65 // Renderer
66 Settings::values.use_hw_renderer = sdl2_config->GetBoolean("Renderer", "use_hw_renderer", false); 66 Settings::values.use_hw_renderer = sdl2_config->GetBoolean("Renderer", "use_hw_renderer", false);
67 Settings::values.use_shader_jit = sdl2_config->GetBoolean("Renderer", "use_shader_jit", true); 67 Settings::values.use_shader_jit = sdl2_config->GetBoolean("Renderer", "use_shader_jit", true);
68 Settings::values.use_scaled_resolution = sdl2_config->GetBoolean("Renderer", "use_scaled_resolution", false);
68 69
69 Settings::values.bg_red = (float)sdl2_config->GetReal("Renderer", "bg_red", 1.0); 70 Settings::values.bg_red = (float)sdl2_config->GetReal("Renderer", "bg_red", 1.0);
70 Settings::values.bg_green = (float)sdl2_config->GetReal("Renderer", "bg_green", 1.0); 71 Settings::values.bg_green = (float)sdl2_config->GetReal("Renderer", "bg_green", 1.0);
diff --git a/src/citra/default_ini.h b/src/citra/default_ini.h
index c9b490a00..1f1aa716b 100644
--- a/src/citra/default_ini.h
+++ b/src/citra/default_ini.h
@@ -46,6 +46,10 @@ use_hw_renderer =
46# 0 : Interpreter (slow), 1 (default): JIT (fast) 46# 0 : Interpreter (slow), 1 (default): JIT (fast)
47use_shader_jit = 47use_shader_jit =
48 48
49# Whether to use native 3DS screen resolution or to scale rendering resolution to the displayed screen size.
50# 0 (default): Native, 1: Scaled
51use_scaled_resolution =
52
49# The clear color for the renderer. What shows up on the sides of the bottom screen. 53# The clear color for the renderer. What shows up on the sides of the bottom screen.
50# Must be in range of 0.0-1.0. Defaults to 1.0 for all. 54# Must be in range of 0.0-1.0. Defaults to 1.0 for all.
51bg_red = 55bg_red =
diff --git a/src/citra_qt/CMakeLists.txt b/src/citra_qt/CMakeLists.txt
index 6660d9879..cc9e0c624 100644
--- a/src/citra_qt/CMakeLists.txt
+++ b/src/citra_qt/CMakeLists.txt
@@ -92,7 +92,7 @@ else()
92endif() 92endif()
93target_link_libraries(citra-qt core video_core audio_core common qhexedit) 93target_link_libraries(citra-qt core video_core audio_core common qhexedit)
94target_link_libraries(citra-qt ${OPENGL_gl_LIBRARY} ${CITRA_QT_LIBS}) 94target_link_libraries(citra-qt ${OPENGL_gl_LIBRARY} ${CITRA_QT_LIBS})
95target_link_libraries(citra-qt ${PLATFORM_LIBRARIES}) 95target_link_libraries(citra-qt ${PLATFORM_LIBRARIES} Threads::Threads)
96 96
97if(${CMAKE_SYSTEM_NAME} MATCHES "Linux|FreeBSD|OpenBSD|NetBSD") 97if(${CMAKE_SYSTEM_NAME} MATCHES "Linux|FreeBSD|OpenBSD|NetBSD")
98 install(TARGETS citra-qt RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/bin") 98 install(TARGETS citra-qt RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/bin")
diff --git a/src/citra_qt/bootmanager.cpp b/src/citra_qt/bootmanager.cpp
index 8e60b9cad..01b81c11c 100644
--- a/src/citra_qt/bootmanager.cpp
+++ b/src/citra_qt/bootmanager.cpp
@@ -71,7 +71,9 @@ void EmuThread::run() {
71 // Shutdown the core emulation 71 // Shutdown the core emulation
72 System::Shutdown(); 72 System::Shutdown();
73 73
74#if MICROPROFILE_ENABLED
74 MicroProfileOnThreadExit(); 75 MicroProfileOnThreadExit();
76#endif
75 77
76 render_window->moveContext(); 78 render_window->moveContext();
77} 79}
diff --git a/src/citra_qt/config.cpp b/src/citra_qt/config.cpp
index e363be38a..7dc61fe40 100644
--- a/src/citra_qt/config.cpp
+++ b/src/citra_qt/config.cpp
@@ -45,6 +45,7 @@ void Config::ReadValues() {
45 qt_config->beginGroup("Renderer"); 45 qt_config->beginGroup("Renderer");
46 Settings::values.use_hw_renderer = qt_config->value("use_hw_renderer", false).toBool(); 46 Settings::values.use_hw_renderer = qt_config->value("use_hw_renderer", false).toBool();
47 Settings::values.use_shader_jit = qt_config->value("use_shader_jit", true).toBool(); 47 Settings::values.use_shader_jit = qt_config->value("use_shader_jit", true).toBool();
48 Settings::values.use_scaled_resolution = qt_config->value("use_scaled_resolution", false).toBool();
48 49
49 Settings::values.bg_red = qt_config->value("bg_red", 1.0).toFloat(); 50 Settings::values.bg_red = qt_config->value("bg_red", 1.0).toFloat();
50 Settings::values.bg_green = qt_config->value("bg_green", 1.0).toFloat(); 51 Settings::values.bg_green = qt_config->value("bg_green", 1.0).toFloat();
@@ -129,6 +130,7 @@ void Config::SaveValues() {
129 qt_config->beginGroup("Renderer"); 130 qt_config->beginGroup("Renderer");
130 qt_config->setValue("use_hw_renderer", Settings::values.use_hw_renderer); 131 qt_config->setValue("use_hw_renderer", Settings::values.use_hw_renderer);
131 qt_config->setValue("use_shader_jit", Settings::values.use_shader_jit); 132 qt_config->setValue("use_shader_jit", Settings::values.use_shader_jit);
133 qt_config->setValue("use_scaled_resolution", Settings::values.use_scaled_resolution);
132 134
133 // Cast to double because Qt's written float values are not human-readable 135 // Cast to double because Qt's written float values are not human-readable
134 qt_config->setValue("bg_red", (double)Settings::values.bg_red); 136 qt_config->setValue("bg_red", (double)Settings::values.bg_red);
diff --git a/src/citra_qt/configure_general.cpp b/src/citra_qt/configure_general.cpp
index a27d0d26c..62648e665 100644
--- a/src/citra_qt/configure_general.cpp
+++ b/src/citra_qt/configure_general.cpp
@@ -25,6 +25,7 @@ void ConfigureGeneral::setConfiguration() {
25 ui->region_combobox->setCurrentIndex(Settings::values.region_value); 25 ui->region_combobox->setCurrentIndex(Settings::values.region_value);
26 ui->toogle_hw_renderer->setChecked(Settings::values.use_hw_renderer); 26 ui->toogle_hw_renderer->setChecked(Settings::values.use_hw_renderer);
27 ui->toogle_shader_jit->setChecked(Settings::values.use_shader_jit); 27 ui->toogle_shader_jit->setChecked(Settings::values.use_shader_jit);
28 ui->toogle_scaled_resolution->setChecked(Settings::values.use_scaled_resolution);
28} 29}
29 30
30void ConfigureGeneral::applyConfiguration() { 31void ConfigureGeneral::applyConfiguration() {
@@ -33,5 +34,6 @@ void ConfigureGeneral::applyConfiguration() {
33 Settings::values.region_value = ui->region_combobox->currentIndex(); 34 Settings::values.region_value = ui->region_combobox->currentIndex();
34 Settings::values.use_hw_renderer = ui->toogle_hw_renderer->isChecked(); 35 Settings::values.use_hw_renderer = ui->toogle_hw_renderer->isChecked();
35 Settings::values.use_shader_jit = ui->toogle_shader_jit->isChecked(); 36 Settings::values.use_shader_jit = ui->toogle_shader_jit->isChecked();
37 Settings::values.use_scaled_resolution = ui->toogle_scaled_resolution->isChecked();
36 Settings::Apply(); 38 Settings::Apply();
37} 39}
diff --git a/src/citra_qt/configure_general.ui b/src/citra_qt/configure_general.ui
index 47184c5c6..5eb309793 100644
--- a/src/citra_qt/configure_general.ui
+++ b/src/citra_qt/configure_general.ui
@@ -128,6 +128,13 @@
128 </property> 128 </property>
129 </widget> 129 </widget>
130 </item> 130 </item>
131 <item>
132 <widget class="QCheckBox" name="toogle_scaled_resolution">
133 <property name="text">
134 <string>Enable scaled resolution</string>
135 </property>
136 </widget>
137 </item>
131 </layout> 138 </layout>
132 </item> 139 </item>
133 </layout> 140 </layout>
diff --git a/src/citra_qt/debugger/graphics_breakpoints.cpp b/src/citra_qt/debugger/graphics_breakpoints.cpp
index 819ec7707..c8510128a 100644
--- a/src/citra_qt/debugger/graphics_breakpoints.cpp
+++ b/src/citra_qt/debugger/graphics_breakpoints.cpp
@@ -75,7 +75,7 @@ QVariant BreakPointModel::data(const QModelIndex& index, int role) const
75 case Role_IsEnabled: 75 case Role_IsEnabled:
76 { 76 {
77 auto context = context_weak.lock(); 77 auto context = context_weak.lock();
78 return context && context->breakpoints[event].enabled; 78 return context && context->breakpoints[(int)event].enabled;
79 } 79 }
80 80
81 default: 81 default:
@@ -110,7 +110,7 @@ bool BreakPointModel::setData(const QModelIndex& index, const QVariant& value, i
110 if (!context) 110 if (!context)
111 return false; 111 return false;
112 112
113 context->breakpoints[event].enabled = value == Qt::Checked; 113 context->breakpoints[(int)event].enabled = value == Qt::Checked;
114 QModelIndex changed_index = createIndex(index.row(), 0); 114 QModelIndex changed_index = createIndex(index.row(), 0);
115 emit dataChanged(changed_index, changed_index); 115 emit dataChanged(changed_index, changed_index);
116 return true; 116 return true;
diff --git a/src/citra_qt/debugger/graphics_framebuffer.cpp b/src/citra_qt/debugger/graphics_framebuffer.cpp
index c30e75933..68cff78b2 100644
--- a/src/citra_qt/debugger/graphics_framebuffer.cpp
+++ b/src/citra_qt/debugger/graphics_framebuffer.cpp
@@ -346,5 +346,11 @@ u32 GraphicsFramebufferWidget::BytesPerPixel(GraphicsFramebufferWidget::Format f
346 case Format::RGBA4: 346 case Format::RGBA4:
347 case Format::D16: 347 case Format::D16:
348 return 2; 348 return 2;
349 default:
350 UNREACHABLE_MSG("GraphicsFramebufferWidget::BytesPerPixel: this "
351 "should not be reached as this function should "
352 "be given a format which is in "
353 "GraphicsFramebufferWidget::Format. Instead got %i",
354 static_cast<int>(format));
349 } 355 }
350} 356}
diff --git a/src/citra_qt/debugger/profiler.cpp b/src/citra_qt/debugger/profiler.cpp
index 4f6ba0e1f..7bb010f77 100644
--- a/src/citra_qt/debugger/profiler.cpp
+++ b/src/citra_qt/debugger/profiler.cpp
@@ -9,13 +9,16 @@
9#include "citra_qt/debugger/profiler.h" 9#include "citra_qt/debugger/profiler.h"
10#include "citra_qt/util/util.h" 10#include "citra_qt/util/util.h"
11 11
12#include "common/common_types.h"
12#include "common/microprofile.h" 13#include "common/microprofile.h"
13#include "common/profiler_reporting.h" 14#include "common/profiler_reporting.h"
14 15
15// Include the implementation of the UI in this file. This isn't in microprofile.cpp because the 16// Include the implementation of the UI in this file. This isn't in microprofile.cpp because the
16// non-Qt frontends don't need it (and don't implement the UI drawing hooks either). 17// non-Qt frontends don't need it (and don't implement the UI drawing hooks either).
18#if MICROPROFILE_ENABLED
17#define MICROPROFILEUI_IMPL 1 19#define MICROPROFILEUI_IMPL 1
18#include "common/microprofileui.h" 20#include "common/microprofileui.h"
21#endif
19 22
20using namespace Common::Profiling; 23using namespace Common::Profiling;
21 24
@@ -34,21 +37,9 @@ static QVariant GetDataForColumn(int col, const AggregatedDuration& duration)
34 } 37 }
35} 38}
36 39
37static const TimingCategoryInfo* GetCategoryInfo(int id)
38{
39 const auto& categories = GetProfilingManager().GetTimingCategoriesInfo();
40 if ((size_t)id >= categories.size()) {
41 return nullptr;
42 } else {
43 return &categories[id];
44 }
45}
46
47ProfilerModel::ProfilerModel(QObject* parent) : QAbstractItemModel(parent) 40ProfilerModel::ProfilerModel(QObject* parent) : QAbstractItemModel(parent)
48{ 41{
49 updateProfilingInfo(); 42 updateProfilingInfo();
50 const auto& categories = GetProfilingManager().GetTimingCategoriesInfo();
51 results.time_per_category.resize(categories.size());
52} 43}
53 44
54QVariant ProfilerModel::headerData(int section, Qt::Orientation orientation, int role) const 45QVariant ProfilerModel::headerData(int section, Qt::Orientation orientation, int role) const
@@ -85,7 +76,7 @@ int ProfilerModel::rowCount(const QModelIndex& parent) const
85 if (parent.isValid()) { 76 if (parent.isValid()) {
86 return 0; 77 return 0;
87 } else { 78 } else {
88 return static_cast<int>(results.time_per_category.size() + 2); 79 return 2;
89 } 80 }
90} 81}
91 82
@@ -104,17 +95,6 @@ QVariant ProfilerModel::data(const QModelIndex& index, int role) const
104 } else { 95 } else {
105 return GetDataForColumn(index.column(), results.interframe_time); 96 return GetDataForColumn(index.column(), results.interframe_time);
106 } 97 }
107 } else {
108 if (index.column() == 0) {
109 const TimingCategoryInfo* info = GetCategoryInfo(index.row() - 2);
110 return info != nullptr ? QString(info->name) : QVariant();
111 } else {
112 if (index.row() - 2 < (int)results.time_per_category.size()) {
113 return GetDataForColumn(index.column(), results.time_per_category[index.row() - 2]);
114 } else {
115 return QVariant();
116 }
117 }
118 } 98 }
119 } 99 }
120 100
@@ -148,6 +128,8 @@ void ProfilerWidget::setProfilingInfoUpdateEnabled(bool enable)
148 } 128 }
149} 129}
150 130
131#if MICROPROFILE_ENABLED
132
151class MicroProfileWidget : public QWidget { 133class MicroProfileWidget : public QWidget {
152public: 134public:
153 MicroProfileWidget(QWidget* parent = nullptr); 135 MicroProfileWidget(QWidget* parent = nullptr);
@@ -171,6 +153,8 @@ private:
171 QTimer update_timer; 153 QTimer update_timer;
172}; 154};
173 155
156#endif
157
174MicroProfileDialog::MicroProfileDialog(QWidget* parent) 158MicroProfileDialog::MicroProfileDialog(QWidget* parent)
175 : QWidget(parent, Qt::Dialog) 159 : QWidget(parent, Qt::Dialog)
176{ 160{
@@ -180,6 +164,8 @@ MicroProfileDialog::MicroProfileDialog(QWidget* parent)
180 // Remove the "?" button from the titlebar and enable the maximize button 164 // Remove the "?" button from the titlebar and enable the maximize button
181 setWindowFlags(windowFlags() & ~Qt::WindowContextHelpButtonHint | Qt::WindowMaximizeButtonHint); 165 setWindowFlags(windowFlags() & ~Qt::WindowContextHelpButtonHint | Qt::WindowMaximizeButtonHint);
182 166
167#if MICROPROFILE_ENABLED
168
183 MicroProfileWidget* widget = new MicroProfileWidget(this); 169 MicroProfileWidget* widget = new MicroProfileWidget(this);
184 170
185 QLayout* layout = new QVBoxLayout(this); 171 QLayout* layout = new QVBoxLayout(this);
@@ -191,6 +177,7 @@ MicroProfileDialog::MicroProfileDialog(QWidget* parent)
191 setFocusProxy(widget); 177 setFocusProxy(widget);
192 widget->setFocusPolicy(Qt::StrongFocus); 178 widget->setFocusPolicy(Qt::StrongFocus);
193 widget->setFocus(); 179 widget->setFocus();
180#endif
194} 181}
195 182
196QAction* MicroProfileDialog::toggleViewAction() { 183QAction* MicroProfileDialog::toggleViewAction() {
@@ -218,6 +205,9 @@ void MicroProfileDialog::hideEvent(QHideEvent* ev) {
218 QWidget::hideEvent(ev); 205 QWidget::hideEvent(ev);
219} 206}
220 207
208
209#if MICROPROFILE_ENABLED
210
221/// There's no way to pass a user pointer to MicroProfile, so this variable is used to make the 211/// There's no way to pass a user pointer to MicroProfile, so this variable is used to make the
222/// QPainter available inside the drawing callbacks. 212/// QPainter available inside the drawing callbacks.
223static QPainter* mp_painter = nullptr; 213static QPainter* mp_painter = nullptr;
@@ -337,3 +327,4 @@ void MicroProfileDrawLine2D(u32 vertices_length, float* vertices, u32 hex_color)
337 mp_painter->drawPolyline(point_buf.data(), vertices_length); 327 mp_painter->drawPolyline(point_buf.data(), vertices_length);
338 point_buf.clear(); 328 point_buf.clear();
339} 329}
330#endif
diff --git a/src/citra_qt/debugger/profiler.h b/src/citra_qt/debugger/profiler.h
index 036054740..3b38ed8ec 100644
--- a/src/citra_qt/debugger/profiler.h
+++ b/src/citra_qt/debugger/profiler.h
@@ -7,8 +7,10 @@
7#include <QAbstractItemModel> 7#include <QAbstractItemModel>
8#include <QDockWidget> 8#include <QDockWidget>
9#include <QTimer> 9#include <QTimer>
10
10#include "ui_profiler.h" 11#include "ui_profiler.h"
11 12
13#include "common/microprofile.h"
12#include "common/profiler_reporting.h" 14#include "common/profiler_reporting.h"
13 15
14class ProfilerModel : public QAbstractItemModel 16class ProfilerModel : public QAbstractItemModel
@@ -49,6 +51,7 @@ private:
49 QTimer update_timer; 51 QTimer update_timer;
50}; 52};
51 53
54
52class MicroProfileDialog : public QWidget { 55class MicroProfileDialog : public QWidget {
53 Q_OBJECT 56 Q_OBJECT
54 57
diff --git a/src/citra_qt/main.cpp b/src/citra_qt/main.cpp
index 2ca1e51f6..f1ab29755 100644
--- a/src/citra_qt/main.cpp
+++ b/src/citra_qt/main.cpp
@@ -69,8 +69,10 @@ GMainWindow::GMainWindow() : config(new Config()), emu_thread(nullptr)
69 addDockWidget(Qt::BottomDockWidgetArea, profilerWidget); 69 addDockWidget(Qt::BottomDockWidgetArea, profilerWidget);
70 profilerWidget->hide(); 70 profilerWidget->hide();
71 71
72#if MICROPROFILE_ENABLED
72 microProfileDialog = new MicroProfileDialog(this); 73 microProfileDialog = new MicroProfileDialog(this);
73 microProfileDialog->hide(); 74 microProfileDialog->hide();
75#endif
74 76
75 disasmWidget = new DisassemblerWidget(this, emu_thread.get()); 77 disasmWidget = new DisassemblerWidget(this, emu_thread.get());
76 addDockWidget(Qt::BottomDockWidgetArea, disasmWidget); 78 addDockWidget(Qt::BottomDockWidgetArea, disasmWidget);
@@ -110,7 +112,9 @@ GMainWindow::GMainWindow() : config(new Config()), emu_thread(nullptr)
110 112
111 QMenu* debug_menu = ui.menu_View->addMenu(tr("Debugging")); 113 QMenu* debug_menu = ui.menu_View->addMenu(tr("Debugging"));
112 debug_menu->addAction(profilerWidget->toggleViewAction()); 114 debug_menu->addAction(profilerWidget->toggleViewAction());
115#if MICROPROFILE_ENABLED
113 debug_menu->addAction(microProfileDialog->toggleViewAction()); 116 debug_menu->addAction(microProfileDialog->toggleViewAction());
117#endif
114 debug_menu->addAction(disasmWidget->toggleViewAction()); 118 debug_menu->addAction(disasmWidget->toggleViewAction());
115 debug_menu->addAction(registersWidget->toggleViewAction()); 119 debug_menu->addAction(registersWidget->toggleViewAction());
116 debug_menu->addAction(callstackWidget->toggleViewAction()); 120 debug_menu->addAction(callstackWidget->toggleViewAction());
@@ -136,8 +140,10 @@ GMainWindow::GMainWindow() : config(new Config()), emu_thread(nullptr)
136 restoreGeometry(UISettings::values.geometry); 140 restoreGeometry(UISettings::values.geometry);
137 restoreState(UISettings::values.state); 141 restoreState(UISettings::values.state);
138 render_window->restoreGeometry(UISettings::values.renderwindow_geometry); 142 render_window->restoreGeometry(UISettings::values.renderwindow_geometry);
143#if MICROPROFILE_ENABLED
139 microProfileDialog->restoreGeometry(UISettings::values.microprofile_geometry); 144 microProfileDialog->restoreGeometry(UISettings::values.microprofile_geometry);
140 microProfileDialog->setVisible(UISettings::values.microprofile_visible); 145 microProfileDialog->setVisible(UISettings::values.microprofile_visible);
146#endif
141 147
142 game_list->LoadInterfaceLayout(); 148 game_list->LoadInterfaceLayout();
143 149
@@ -511,9 +517,10 @@ void GMainWindow::closeEvent(QCloseEvent* event) {
511 UISettings::values.geometry = saveGeometry(); 517 UISettings::values.geometry = saveGeometry();
512 UISettings::values.state = saveState(); 518 UISettings::values.state = saveState();
513 UISettings::values.renderwindow_geometry = render_window->saveGeometry(); 519 UISettings::values.renderwindow_geometry = render_window->saveGeometry();
520#if MICROPROFILE_ENABLED
514 UISettings::values.microprofile_geometry = microProfileDialog->saveGeometry(); 521 UISettings::values.microprofile_geometry = microProfileDialog->saveGeometry();
515 UISettings::values.microprofile_visible = microProfileDialog->isVisible(); 522 UISettings::values.microprofile_visible = microProfileDialog->isVisible();
516 523#endif
517 UISettings::values.single_window_mode = ui.action_Single_Window_Mode->isChecked(); 524 UISettings::values.single_window_mode = ui.action_Single_Window_Mode->isChecked();
518 UISettings::values.display_titlebar = ui.actionDisplay_widget_title_bars->isChecked(); 525 UISettings::values.display_titlebar = ui.actionDisplay_widget_title_bars->isChecked();
519 UISettings::values.first_start = false; 526 UISettings::values.first_start = false;
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index c839ce173..aa6eee2a3 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -47,7 +47,6 @@ set(HEADERS
47 microprofile.h 47 microprofile.h
48 microprofileui.h 48 microprofileui.h
49 platform.h 49 platform.h
50 profiler.h
51 profiler_reporting.h 50 profiler_reporting.h
52 scm_rev.h 51 scm_rev.h
53 scope_exit.h 52 scope_exit.h
diff --git a/src/common/assert.h b/src/common/assert.h
index 6849778b7..cd9b819a9 100644
--- a/src/common/assert.h
+++ b/src/common/assert.h
@@ -39,6 +39,7 @@ static void assert_noinline_call(const Fn& fn) {
39 }); } while (0) 39 }); } while (0)
40 40
41#define UNREACHABLE() ASSERT_MSG(false, "Unreachable code!") 41#define UNREACHABLE() ASSERT_MSG(false, "Unreachable code!")
42#define UNREACHABLE_MSG(...) ASSERT_MSG(false, __VA_ARGS__)
42 43
43#ifdef _DEBUG 44#ifdef _DEBUG
44#define DEBUG_ASSERT(_a_) ASSERT(_a_) 45#define DEBUG_ASSERT(_a_) ASSERT(_a_)
@@ -49,3 +50,4 @@ static void assert_noinline_call(const Fn& fn) {
49#endif 50#endif
50 51
51#define UNIMPLEMENTED() DEBUG_ASSERT_MSG(false, "Unimplemented code!") 52#define UNIMPLEMENTED() DEBUG_ASSERT_MSG(false, "Unimplemented code!")
53#define UNIMPLEMENTED_MSG(_a_, ...) ASSERT_MSG(false, _a_, __VA_ARGS__) \ No newline at end of file
diff --git a/src/common/file_util.h b/src/common/file_util.h
index b54a9fb72..3aac4fa46 100644
--- a/src/common/file_util.h
+++ b/src/common/file_util.h
@@ -192,7 +192,9 @@ public:
192 size_t ReadArray(T* data, size_t length) 192 size_t ReadArray(T* data, size_t length)
193 { 193 {
194 static_assert(std::is_standard_layout<T>(), "Given array does not consist of standard layout objects"); 194 static_assert(std::is_standard_layout<T>(), "Given array does not consist of standard layout objects");
195#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER)
195 static_assert(std::is_trivially_copyable<T>(), "Given array does not consist of trivially copyable objects"); 196 static_assert(std::is_trivially_copyable<T>(), "Given array does not consist of trivially copyable objects");
197#endif
196 198
197 if (!IsOpen()) { 199 if (!IsOpen()) {
198 m_good = false; 200 m_good = false;
@@ -210,7 +212,9 @@ public:
210 size_t WriteArray(const T* data, size_t length) 212 size_t WriteArray(const T* data, size_t length)
211 { 213 {
212 static_assert(std::is_standard_layout<T>(), "Given array does not consist of standard layout objects"); 214 static_assert(std::is_standard_layout<T>(), "Given array does not consist of standard layout objects");
215#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER)
213 static_assert(std::is_trivially_copyable<T>(), "Given array does not consist of trivially copyable objects"); 216 static_assert(std::is_trivially_copyable<T>(), "Given array does not consist of trivially copyable objects");
217#endif
214 218
215 if (!IsOpen()) { 219 if (!IsOpen()) {
216 m_good = false; 220 m_good = false;
diff --git a/src/common/microprofile.h b/src/common/microprofile.h
index d3b6cb97c..ef312c6e1 100644
--- a/src/common/microprofile.h
+++ b/src/common/microprofile.h
@@ -4,6 +4,10 @@
4 4
5#pragma once 5#pragma once
6 6
7// Uncomment this to disable microprofile. This will get you cleaner profiles when using
8// external sampling profilers like "Very Sleepy", and will improve performance somewhat.
9// #define MICROPROFILE_ENABLED 0
10
7// Customized Citra settings. 11// Customized Citra settings.
8// This file wraps the MicroProfile header so that these are consistent everywhere. 12// This file wraps the MicroProfile header so that these are consistent everywhere.
9#define MICROPROFILE_WEBSERVER 0 13#define MICROPROFILE_WEBSERVER 0
diff --git a/src/common/microprofileui.h b/src/common/microprofileui.h
index 97c369bd9..41abe6b75 100644
--- a/src/common/microprofileui.h
+++ b/src/common/microprofileui.h
@@ -13,4 +13,7 @@
13#define MICROPROFILE_HELP_ALT "Right-Click" 13#define MICROPROFILE_HELP_ALT "Right-Click"
14#define MICROPROFILE_HELP_MOD "Ctrl" 14#define MICROPROFILE_HELP_MOD "Ctrl"
15 15
16// This isn't included by microprofileui.h :(
17#include <cstdlib> // For std::abs
18
16#include <microprofileui.h> 19#include <microprofileui.h>
diff --git a/src/common/profiler.cpp b/src/common/profiler.cpp
index 7792edd2f..49eb3f40c 100644
--- a/src/common/profiler.cpp
+++ b/src/common/profiler.cpp
@@ -7,71 +7,16 @@
7#include <vector> 7#include <vector>
8 8
9#include "common/assert.h" 9#include "common/assert.h"
10#include "common/profiler.h"
11#include "common/profiler_reporting.h" 10#include "common/profiler_reporting.h"
12#include "common/synchronized_wrapper.h" 11#include "common/synchronized_wrapper.h"
13 12
14#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013.
15 #define WIN32_LEAN_AND_MEAN
16 #include <Windows.h> // For QueryPerformanceCounter/Frequency
17#endif
18
19namespace Common { 13namespace Common {
20namespace Profiling { 14namespace Profiling {
21 15
22#if ENABLE_PROFILING
23thread_local Timer* Timer::current_timer = nullptr;
24#endif
25
26#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013
27QPCClock::time_point QPCClock::now() {
28 static LARGE_INTEGER freq;
29 // Use this dummy local static to ensure this gets initialized once.
30 static BOOL dummy = QueryPerformanceFrequency(&freq);
31
32 LARGE_INTEGER ticks;
33 QueryPerformanceCounter(&ticks);
34
35 // This is prone to overflow when multiplying, which is why I'm using micro instead of nano. The
36 // correct way to approach this would be to just return ticks as a time_point and then subtract
37 // and do this conversion when creating a duration from two time_points, however, as far as I
38 // could tell the C++ requirements for these types are incompatible with this approach.
39 return time_point(duration(ticks.QuadPart * std::micro::den / freq.QuadPart));
40}
41#endif
42
43TimingCategory::TimingCategory(const char* name, TimingCategory* parent)
44 : accumulated_duration(0) {
45
46 ProfilingManager& manager = GetProfilingManager();
47 category_id = manager.RegisterTimingCategory(this, name);
48 if (parent != nullptr)
49 manager.SetTimingCategoryParent(category_id, parent->category_id);
50}
51
52ProfilingManager::ProfilingManager() 16ProfilingManager::ProfilingManager()
53 : last_frame_end(Clock::now()), this_frame_start(Clock::now()) { 17 : last_frame_end(Clock::now()), this_frame_start(Clock::now()) {
54} 18}
55 19
56unsigned int ProfilingManager::RegisterTimingCategory(TimingCategory* category, const char* name) {
57 TimingCategoryInfo info;
58 info.category = category;
59 info.name = name;
60 info.parent = TimingCategoryInfo::NO_PARENT;
61
62 unsigned int id = (unsigned int)timing_categories.size();
63 timing_categories.push_back(std::move(info));
64
65 return id;
66}
67
68void ProfilingManager::SetTimingCategoryParent(unsigned int category, unsigned int parent) {
69 ASSERT(category < timing_categories.size());
70 ASSERT(parent < timing_categories.size());
71
72 timing_categories[category].parent = parent;
73}
74
75void ProfilingManager::BeginFrame() { 20void ProfilingManager::BeginFrame() {
76 this_frame_start = Clock::now(); 21 this_frame_start = Clock::now();
77} 22}
@@ -82,11 +27,6 @@ void ProfilingManager::FinishFrame() {
82 results.interframe_time = now - last_frame_end; 27 results.interframe_time = now - last_frame_end;
83 results.frame_time = now - this_frame_start; 28 results.frame_time = now - this_frame_start;
84 29
85 results.time_per_category.resize(timing_categories.size());
86 for (size_t i = 0; i < timing_categories.size(); ++i) {
87 results.time_per_category[i] = timing_categories[i].category->GetAccumulatedTime();
88 }
89
90 last_frame_end = now; 30 last_frame_end = now;
91} 31}
92 32
@@ -100,26 +40,9 @@ void TimingResultsAggregator::Clear() {
100 window_size = cursor = 0; 40 window_size = cursor = 0;
101} 41}
102 42
103void TimingResultsAggregator::SetNumberOfCategories(size_t n) {
104 size_t old_size = times_per_category.size();
105 if (n == old_size)
106 return;
107
108 times_per_category.resize(n);
109
110 for (size_t i = old_size; i < n; ++i) {
111 times_per_category[i].resize(max_window_size, Duration::zero());
112 }
113}
114
115void TimingResultsAggregator::AddFrame(const ProfilingFrameResult& frame_result) { 43void TimingResultsAggregator::AddFrame(const ProfilingFrameResult& frame_result) {
116 SetNumberOfCategories(frame_result.time_per_category.size());
117
118 interframe_times[cursor] = frame_result.interframe_time; 44 interframe_times[cursor] = frame_result.interframe_time;
119 frame_times[cursor] = frame_result.frame_time; 45 frame_times[cursor] = frame_result.frame_time;
120 for (size_t i = 0; i < frame_result.time_per_category.size(); ++i) {
121 times_per_category[i][cursor] = frame_result.time_per_category[i];
122 }
123 46
124 ++cursor; 47 ++cursor;
125 if (cursor == max_window_size) 48 if (cursor == max_window_size)
@@ -162,11 +85,6 @@ AggregatedFrameResult TimingResultsAggregator::GetAggregatedResults() const {
162 result.fps = 0.0f; 85 result.fps = 0.0f;
163 } 86 }
164 87
165 result.time_per_category.resize(times_per_category.size());
166 for (size_t i = 0; i < times_per_category.size(); ++i) {
167 result.time_per_category[i] = AggregateField(times_per_category[i], window_size);
168 }
169
170 return result; 88 return result;
171} 89}
172 90
diff --git a/src/common/profiler.h b/src/common/profiler.h
deleted file mode 100644
index 3e967b4bc..000000000
--- a/src/common/profiler.h
+++ /dev/null
@@ -1,152 +0,0 @@
1// Copyright 2015 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <atomic>
8#include <chrono>
9
10#include "common/assert.h"
11#include "common/thread.h"
12
13namespace Common {
14namespace Profiling {
15
16// If this is defined to 0, it turns all Timers into no-ops.
17#ifndef ENABLE_PROFILING
18#define ENABLE_PROFILING 1
19#endif
20
21#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013
22// MSVC up to 2013 doesn't use QueryPerformanceCounter for high_resolution_clock, so it has bad
23// precision. We manually implement a clock based on QPC to get good results.
24
25struct QPCClock {
26 using duration = std::chrono::microseconds;
27 using time_point = std::chrono::time_point<QPCClock>;
28 using rep = duration::rep;
29 using period = duration::period;
30 static const bool is_steady = false;
31
32 static time_point now();
33};
34
35using Clock = QPCClock;
36#else
37using Clock = std::chrono::high_resolution_clock;
38#endif
39
40using Duration = Clock::duration;
41
42/**
43 * Represents a timing category that measured time can be accounted towards. Should be declared as a
44 * global variable and passed to Timers.
45 */
46class TimingCategory final {
47public:
48 TimingCategory(const char* name, TimingCategory* parent = nullptr);
49
50 unsigned int GetCategoryId() const {
51 return category_id;
52 }
53
54 /// Adds some time to this category. Can safely be called from multiple threads at the same time.
55 void AddTime(Duration amount) {
56 std::atomic_fetch_add_explicit(
57 &accumulated_duration, amount.count(),
58 std::memory_order_relaxed);
59 }
60
61 /**
62 * Atomically retrieves the accumulated measured time for this category and resets the counter
63 * to zero. Can be safely called concurrently with AddTime.
64 */
65 Duration GetAccumulatedTime() {
66 return Duration(std::atomic_exchange_explicit(
67 &accumulated_duration, (Duration::rep)0,
68 std::memory_order_relaxed));
69 }
70
71private:
72 unsigned int category_id;
73 std::atomic<Duration::rep> accumulated_duration;
74};
75
76/**
77 * Measures time elapsed between a call to Start and a call to Stop and attributes it to the given
78 * TimingCategory. Start/Stop can be called multiple times on the same timer, but each call must be
79 * appropriately paired.
80 *
81 * When a Timer is started, it automatically pauses a previously running timer on the same thread,
82 * which is resumed when it is stopped. As such, no special action needs to be taken to avoid
83 * double-accounting of time on two categories.
84 */
85class Timer {
86public:
87 Timer(TimingCategory& category) : category(category) {
88 }
89
90 void Start() {
91#if ENABLE_PROFILING
92 ASSERT(!running);
93 previous_timer = current_timer;
94 current_timer = this;
95 if (previous_timer != nullptr)
96 previous_timer->StopTiming();
97
98 StartTiming();
99#endif
100 }
101
102 void Stop() {
103#if ENABLE_PROFILING
104 ASSERT(running);
105 StopTiming();
106
107 if (previous_timer != nullptr)
108 previous_timer->StartTiming();
109 current_timer = previous_timer;
110#endif
111 }
112
113private:
114#if ENABLE_PROFILING
115 void StartTiming() {
116 start = Clock::now();
117 running = true;
118 }
119
120 void StopTiming() {
121 auto duration = Clock::now() - start;
122 running = false;
123 category.AddTime(std::chrono::duration_cast<Duration>(duration));
124 }
125
126 Clock::time_point start;
127 bool running = false;
128
129 Timer* previous_timer;
130 static thread_local Timer* current_timer;
131#endif
132
133 TimingCategory& category;
134};
135
136/**
137 * A Timer that automatically starts timing when created and stops at the end of the scope. Should
138 * be used in the majority of cases.
139 */
140class ScopeTimer : public Timer {
141public:
142 ScopeTimer(TimingCategory& category) : Timer(category) {
143 Start();
144 }
145
146 ~ScopeTimer() {
147 Stop();
148 }
149};
150
151} // namespace Profiling
152} // namespace Common
diff --git a/src/common/profiler_reporting.h b/src/common/profiler_reporting.h
index df98e05b7..fa1ac883f 100644
--- a/src/common/profiler_reporting.h
+++ b/src/common/profiler_reporting.h
@@ -4,22 +4,17 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <chrono>
7#include <cstddef> 8#include <cstddef>
8#include <vector> 9#include <vector>
9 10
10#include "common/profiler.h"
11#include "common/synchronized_wrapper.h" 11#include "common/synchronized_wrapper.h"
12 12
13namespace Common { 13namespace Common {
14namespace Profiling { 14namespace Profiling {
15 15
16struct TimingCategoryInfo { 16using Clock = std::chrono::high_resolution_clock;
17 static const unsigned int NO_PARENT = -1; 17using Duration = Clock::duration;
18
19 TimingCategory* category;
20 const char* name;
21 unsigned int parent;
22};
23 18
24struct ProfilingFrameResult { 19struct ProfilingFrameResult {
25 /// Time since the last delivered frame 20 /// Time since the last delivered frame
@@ -27,22 +22,12 @@ struct ProfilingFrameResult {
27 22
28 /// Time spent processing a frame, excluding VSync 23 /// Time spent processing a frame, excluding VSync
29 Duration frame_time; 24 Duration frame_time;
30
31 /// Total amount of time spent inside each category in this frame. Indexed by the category id
32 std::vector<Duration> time_per_category;
33}; 25};
34 26
35class ProfilingManager final { 27class ProfilingManager final {
36public: 28public:
37 ProfilingManager(); 29 ProfilingManager();
38 30
39 unsigned int RegisterTimingCategory(TimingCategory* category, const char* name);
40 void SetTimingCategoryParent(unsigned int category, unsigned int parent);
41
42 const std::vector<TimingCategoryInfo>& GetTimingCategoriesInfo() const {
43 return timing_categories;
44 }
45
46 /// This should be called after swapping screen buffers. 31 /// This should be called after swapping screen buffers.
47 void BeginFrame(); 32 void BeginFrame();
48 /// This should be called before swapping screen buffers. 33 /// This should be called before swapping screen buffers.
@@ -54,7 +39,6 @@ public:
54 } 39 }
55 40
56private: 41private:
57 std::vector<TimingCategoryInfo> timing_categories;
58 Clock::time_point last_frame_end; 42 Clock::time_point last_frame_end;
59 Clock::time_point this_frame_start; 43 Clock::time_point this_frame_start;
60 44
@@ -73,9 +57,6 @@ struct AggregatedFrameResult {
73 AggregatedDuration frame_time; 57 AggregatedDuration frame_time;
74 58
75 float fps; 59 float fps;
76
77 /// Total amount of time spent inside each category in this frame. Indexed by the category id
78 std::vector<AggregatedDuration> time_per_category;
79}; 60};
80 61
81class TimingResultsAggregator final { 62class TimingResultsAggregator final {
@@ -83,7 +64,6 @@ public:
83 TimingResultsAggregator(size_t window_size); 64 TimingResultsAggregator(size_t window_size);
84 65
85 void Clear(); 66 void Clear();
86 void SetNumberOfCategories(size_t n);
87 67
88 void AddFrame(const ProfilingFrameResult& frame_result); 68 void AddFrame(const ProfilingFrameResult& frame_result);
89 69
@@ -95,7 +75,6 @@ public:
95 75
96 std::vector<Duration> interframe_times; 76 std::vector<Duration> interframe_times;
97 std::vector<Duration> frame_times; 77 std::vector<Duration> frame_times;
98 std::vector<std::vector<Duration>> times_per_category;
99}; 78};
100 79
101ProfilingManager& GetProfilingManager(); 80ProfilingManager& GetProfilingManager();
diff --git a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
index 647784208..8d4b26815 100644
--- a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
+++ b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
@@ -10,7 +10,6 @@
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "common/logging/log.h" 11#include "common/logging/log.h"
12#include "common/microprofile.h" 12#include "common/microprofile.h"
13#include "common/profiler.h"
14 13
15#include "core/memory.h" 14#include "core/memory.h"
16#include "core/hle/svc.h" 15#include "core/hle/svc.h"
@@ -25,9 +24,6 @@
25 24
26#include "core/gdbstub/gdbstub.h" 25#include "core/gdbstub/gdbstub.h"
27 26
28Common::Profiling::TimingCategory profile_execute("DynCom::Execute");
29Common::Profiling::TimingCategory profile_decode("DynCom::Decode");
30
31enum { 27enum {
32 COND = (1 << 0), 28 COND = (1 << 0),
33 NON_BRANCH = (1 << 1), 29 NON_BRANCH = (1 << 1),
@@ -3496,7 +3492,6 @@ static unsigned int InterpreterTranslateInstruction(const ARMul_State* cpu, cons
3496} 3492}
3497 3493
3498static int InterpreterTranslateBlock(ARMul_State* cpu, int& bb_start, u32 addr) { 3494static int InterpreterTranslateBlock(ARMul_State* cpu, int& bb_start, u32 addr) {
3499 Common::Profiling::ScopeTimer timer_decode(profile_decode);
3500 MICROPROFILE_SCOPE(DynCom_Decode); 3495 MICROPROFILE_SCOPE(DynCom_Decode);
3501 3496
3502 // Decode instruction, get index 3497 // Decode instruction, get index
@@ -3530,7 +3525,6 @@ static int InterpreterTranslateBlock(ARMul_State* cpu, int& bb_start, u32 addr)
3530} 3525}
3531 3526
3532static int InterpreterTranslateSingle(ARMul_State* cpu, int& bb_start, u32 addr) { 3527static int InterpreterTranslateSingle(ARMul_State* cpu, int& bb_start, u32 addr) {
3533 Common::Profiling::ScopeTimer timer_decode(profile_decode);
3534 MICROPROFILE_SCOPE(DynCom_Decode); 3528 MICROPROFILE_SCOPE(DynCom_Decode);
3535 3529
3536 ARM_INST_PTR inst_base = nullptr; 3530 ARM_INST_PTR inst_base = nullptr;
@@ -3565,7 +3559,6 @@ static int clz(unsigned int x) {
3565MICROPROFILE_DEFINE(DynCom_Execute, "DynCom", "Execute", MP_RGB(255, 0, 0)); 3559MICROPROFILE_DEFINE(DynCom_Execute, "DynCom", "Execute", MP_RGB(255, 0, 0));
3566 3560
3567unsigned InterpreterMainLoop(ARMul_State* cpu) { 3561unsigned InterpreterMainLoop(ARMul_State* cpu) {
3568 Common::Profiling::ScopeTimer timer_execute(profile_execute);
3569 MICROPROFILE_SCOPE(DynCom_Execute); 3562 MICROPROFILE_SCOPE(DynCom_Execute);
3570 3563
3571 GDBStub::BreakpointAddress breakpoint_data; 3564 GDBStub::BreakpointAddress breakpoint_data;
diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp
index c1a7ec5bf..ae0c116ef 100644
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -529,7 +529,7 @@ static void ReadRegister() {
529 id |= HexCharToValue(command_buffer[2]); 529 id |= HexCharToValue(command_buffer[2]);
530 } 530 }
531 531
532 if (id >= R0_REGISTER && id <= R15_REGISTER) { 532 if (id <= R15_REGISTER) {
533 IntToGdbHex(reply, Core::g_app_core->GetReg(id)); 533 IntToGdbHex(reply, Core::g_app_core->GetReg(id));
534 } else if (id == CPSR_REGISTER) { 534 } else if (id == CPSR_REGISTER) {
535 IntToGdbHex(reply, Core::g_app_core->GetCPSR()); 535 IntToGdbHex(reply, Core::g_app_core->GetCPSR());
@@ -584,7 +584,7 @@ static void WriteRegister() {
584 id |= HexCharToValue(command_buffer[2]); 584 id |= HexCharToValue(command_buffer[2]);
585 } 585 }
586 586
587 if (id >= R0_REGISTER && id <= R15_REGISTER) { 587 if (id <= R15_REGISTER) {
588 Core::g_app_core->SetReg(id, GdbHexToInt(buffer_ptr)); 588 Core::g_app_core->SetReg(id, GdbHexToInt(buffer_ptr));
589 } else if (id == CPSR_REGISTER) { 589 } else if (id == CPSR_REGISTER) {
590 Core::g_app_core->SetCPSR(GdbHexToInt(buffer_ptr)); 590 Core::g_app_core->SetCPSR(GdbHexToInt(buffer_ptr));
diff --git a/src/core/hle/result.h b/src/core/hle/result.h
index 2d22652d9..53931a106 100644
--- a/src/core/hle/result.h
+++ b/src/core/hle/result.h
@@ -18,6 +18,7 @@
18/// Detailed description of the error. This listing is likely incomplete. 18/// Detailed description of the error. This listing is likely incomplete.
19enum class ErrorDescription : u32 { 19enum class ErrorDescription : u32 {
20 Success = 0, 20 Success = 0,
21 OS_InvalidBufferDescriptor = 48,
21 WrongAddress = 53, 22 WrongAddress = 53,
22 FS_NotFound = 120, 23 FS_NotFound = 120,
23 FS_AlreadyExists = 190, 24 FS_AlreadyExists = 190,
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index 9591522e5..3f71e7f2b 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -43,7 +43,7 @@ void FindContentInfos(Service::Interface* self) {
43 am_content_count[media_type] = cmd_buff[4]; 43 am_content_count[media_type] = cmd_buff[4];
44 44
45 cmd_buff[1] = RESULT_SUCCESS.raw; 45 cmd_buff[1] = RESULT_SUCCESS.raw;
46 LOG_WARNING(Service_AM, "(STUBBED) media_type=%u, title_id=0x%016lx, content_cound=%u, content_ids_pointer=0x%08x, content_info_pointer=0x%08x", 46 LOG_WARNING(Service_AM, "(STUBBED) media_type=%u, title_id=0x%016llx, content_cound=%u, content_ids_pointer=0x%08x, content_info_pointer=0x%08x",
47 media_type, title_id, am_content_count[media_type], content_ids_pointer, content_info_pointer); 47 media_type, title_id, am_content_count[media_type], content_ids_pointer, content_info_pointer);
48} 48}
49 49
diff --git a/src/core/hle/service/dsp_dsp.cpp b/src/core/hle/service/dsp_dsp.cpp
index 08e437125..995bee3f9 100644
--- a/src/core/hle/service/dsp_dsp.cpp
+++ b/src/core/hle/service/dsp_dsp.cpp
@@ -2,6 +2,7 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
5#include <cinttypes> 6#include <cinttypes>
6 7
7#include "audio_core/hle/pipe.h" 8#include "audio_core/hle/pipe.h"
@@ -12,37 +13,80 @@
12#include "core/hle/kernel/event.h" 13#include "core/hle/kernel/event.h"
13#include "core/hle/service/dsp_dsp.h" 14#include "core/hle/service/dsp_dsp.h"
14 15
16using DspPipe = DSP::HLE::DspPipe;
17
15//////////////////////////////////////////////////////////////////////////////////////////////////// 18////////////////////////////////////////////////////////////////////////////////////////////////////
16// Namespace DSP_DSP 19// Namespace DSP_DSP
17 20
18namespace DSP_DSP { 21namespace DSP_DSP {
19 22
20static u32 read_pipe_count;
21static Kernel::SharedPtr<Kernel::Event> semaphore_event; 23static Kernel::SharedPtr<Kernel::Event> semaphore_event;
22 24
23struct PairHash { 25/// There are three types of interrupts
24 template <typename T, typename U> 26enum class InterruptType {
25 std::size_t operator()(const std::pair<T, U> &x) const { 27 Zero, One, Pipe
26 // TODO(yuriks): Replace with better hash combining function. 28};
27 return std::hash<T>()(x.first) ^ std::hash<U>()(x.second); 29constexpr size_t NUM_INTERRUPT_TYPE = 3;
30
31class InterruptEvents final {
32public:
33 void Signal(InterruptType type, DspPipe pipe) {
34 Kernel::SharedPtr<Kernel::Event>& event = Get(type, pipe);
35 if (event) {
36 event->Signal();
37 }
28 } 38 }
39
40 Kernel::SharedPtr<Kernel::Event>& Get(InterruptType type, DspPipe dsp_pipe) {
41 switch (type) {
42 case InterruptType::Zero:
43 return zero;
44 case InterruptType::One:
45 return one;
46 case InterruptType::Pipe: {
47 const size_t pipe_index = static_cast<size_t>(dsp_pipe);
48 ASSERT(pipe_index < DSP::HLE::NUM_DSP_PIPE);
49 return pipe[pipe_index];
50 }
51 }
52
53 UNREACHABLE_MSG("Invalid interrupt type = %zu", static_cast<size_t>(type));
54 }
55
56 bool HasTooManyEventsRegistered() const {
57 // Actual service implementation only has 6 'slots' for interrupts.
58 constexpr size_t max_number_of_interrupt_events = 6;
59
60 size_t number = std::count_if(pipe.begin(), pipe.end(), [](const auto& evt) {
61 return evt != nullptr;
62 });
63
64 if (zero != nullptr)
65 number++;
66 if (one != nullptr)
67 number++;
68
69 return number >= max_number_of_interrupt_events;
70 }
71
72private:
73 /// Currently unknown purpose
74 Kernel::SharedPtr<Kernel::Event> zero = nullptr;
75 /// Currently unknown purpose
76 Kernel::SharedPtr<Kernel::Event> one = nullptr;
77 /// Each DSP pipe has an associated interrupt
78 std::array<Kernel::SharedPtr<Kernel::Event>, DSP::HLE::NUM_DSP_PIPE> pipe = {{}};
29}; 79};
30 80
31/// Map of (audio interrupt number, channel number) to Kernel::Events. See: RegisterInterruptEvents 81static InterruptEvents interrupt_events;
32static std::unordered_map<std::pair<u32, u32>, Kernel::SharedPtr<Kernel::Event>, PairHash> interrupt_events;
33 82
34// DSP Interrupts: 83// DSP Interrupts:
35// Interrupt #2 occurs every frame tick. Userland programs normally have a thread that's waiting 84// The audio-pipe interrupt occurs every frame tick. Userland programs normally have a thread
36// for an interrupt event. Immediately after this interrupt event, userland normally updates the 85// that's waiting for an interrupt event. Immediately after this interrupt event, userland
37// state in the next region and increments the relevant frame counter by two. 86// normally updates the state in the next region and increments the relevant frame counter by
38void SignalAllInterrupts() { 87// two.
39 // HACK: The other interrupts have currently unknown purpose, we trigger them each tick in any case. 88void SignalPipeInterrupt(DspPipe pipe) {
40 for (auto& interrupt_event : interrupt_events) 89 interrupt_events.Signal(InterruptType::Pipe, pipe);
41 interrupt_event.second->Signal();
42}
43
44void SignalInterrupt(u32 interrupt, u32 channel) {
45 interrupt_events[std::make_pair(interrupt, channel)]->Signal();
46} 90}
47 91
48/** 92/**
@@ -58,7 +102,10 @@ static void ConvertProcessAddressFromDspDram(Service::Interface* self) {
58 102
59 u32 addr = cmd_buff[1]; 103 u32 addr = cmd_buff[1];
60 104
105 cmd_buff[0] = IPC::MakeHeader(0xC, 2, 0);
61 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 106 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
107
108 // TODO(merry): There is a per-region offset missing in this calculation (that seems to be always zero).
62 cmd_buff[2] = (addr << 1) + (Memory::DSP_RAM_VADDR + 0x40000); 109 cmd_buff[2] = (addr << 1) + (Memory::DSP_RAM_VADDR + 0x40000);
63 110
64 LOG_DEBUG(Service_DSP, "addr=0x%08X", addr); 111 LOG_DEBUG(Service_DSP, "addr=0x%08X", addr);
@@ -113,7 +160,9 @@ static void LoadComponent(Service::Interface* self) {
113static void GetSemaphoreEventHandle(Service::Interface* self) { 160static void GetSemaphoreEventHandle(Service::Interface* self) {
114 u32* cmd_buff = Kernel::GetCommandBuffer(); 161 u32* cmd_buff = Kernel::GetCommandBuffer();
115 162
163 cmd_buff[0] = IPC::MakeHeader(0x16, 1, 2);
116 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 164 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
165 // cmd_buff[2] not set
117 cmd_buff[3] = Kernel::g_handle_table.Create(semaphore_event).MoveFrom(); // Event handle 166 cmd_buff[3] = Kernel::g_handle_table.Create(semaphore_event).MoveFrom(); // Event handle
118 167
119 LOG_WARNING(Service_DSP, "(STUBBED) called"); 168 LOG_WARNING(Service_DSP, "(STUBBED) called");
@@ -138,8 +187,7 @@ static void FlushDataCache(Service::Interface* self) {
138 u32 size = cmd_buff[2]; 187 u32 size = cmd_buff[2];
139 u32 process = cmd_buff[4]; 188 u32 process = cmd_buff[4];
140 189
141 // TODO(purpasmart96): Verify return header on HW 190 cmd_buff[0] = IPC::MakeHeader(0x13, 1, 0);
142
143 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 191 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
144 192
145 LOG_TRACE(Service_DSP, "called address=0x%08X, size=0x%X, process=0x%08X", address, size, process); 193 LOG_TRACE(Service_DSP, "called address=0x%08X, size=0x%X, process=0x%08X", address, size, process);
@@ -148,8 +196,8 @@ static void FlushDataCache(Service::Interface* self) {
148/** 196/**
149 * DSP_DSP::RegisterInterruptEvents service function 197 * DSP_DSP::RegisterInterruptEvents service function
150 * Inputs: 198 * Inputs:
151 * 1 : Interrupt Number 199 * 1 : Interrupt Type
152 * 2 : Channel Number 200 * 2 : Pipe Number
153 * 4 : Interrupt event handle 201 * 4 : Interrupt event handle
154 * Outputs: 202 * Outputs:
155 * 1 : Result of function, 0 on success, otherwise error code 203 * 1 : Result of function, 0 on success, otherwise error code
@@ -157,23 +205,40 @@ static void FlushDataCache(Service::Interface* self) {
157static void RegisterInterruptEvents(Service::Interface* self) { 205static void RegisterInterruptEvents(Service::Interface* self) {
158 u32* cmd_buff = Kernel::GetCommandBuffer(); 206 u32* cmd_buff = Kernel::GetCommandBuffer();
159 207
160 u32 interrupt = cmd_buff[1]; 208 u32 type_index = cmd_buff[1];
161 u32 channel = cmd_buff[2]; 209 u32 pipe_index = cmd_buff[2];
162 u32 event_handle = cmd_buff[4]; 210 u32 event_handle = cmd_buff[4];
163 211
212 ASSERT_MSG(type_index < NUM_INTERRUPT_TYPE && pipe_index < DSP::HLE::NUM_DSP_PIPE,
213 "Invalid type or pipe: type = %u, pipe = %u", type_index, pipe_index);
214
215 InterruptType type = static_cast<InterruptType>(cmd_buff[1]);
216 DspPipe pipe = static_cast<DspPipe>(cmd_buff[2]);
217
218 cmd_buff[0] = IPC::MakeHeader(0x15, 1, 0);
219
164 if (event_handle) { 220 if (event_handle) {
165 auto evt = Kernel::g_handle_table.Get<Kernel::Event>(cmd_buff[4]); 221 auto evt = Kernel::g_handle_table.Get<Kernel::Event>(cmd_buff[4]);
166 if (evt) { 222
167 interrupt_events[std::make_pair(interrupt, channel)] = evt; 223 if (!evt) {
168 cmd_buff[1] = RESULT_SUCCESS.raw; 224 LOG_INFO(Service_DSP, "Invalid event handle! type=%u, pipe=%u, event_handle=0x%08X", type_index, pipe_index, event_handle);
169 LOG_INFO(Service_DSP, "Registered interrupt=%u, channel=%u, event_handle=0x%08X", interrupt, channel, event_handle); 225 ASSERT(false); // TODO: This should really be handled at an IPC translation layer.
170 } else { 226 }
171 LOG_CRITICAL(Service_DSP, "Invalid event handle! interrupt=%u, channel=%u, event_handle=0x%08X", interrupt, channel, event_handle); 227
172 ASSERT(false); // This should really be handled at a IPC translation layer. 228 if (interrupt_events.HasTooManyEventsRegistered()) {
229 LOG_INFO(Service_DSP, "Ran out of space to register interrupts (Attempted to register type=%u, pipe=%u, event_handle=0x%08X)",
230 type_index, pipe_index, event_handle);
231 cmd_buff[1] = ResultCode(ErrorDescription::InvalidResultValue, ErrorModule::DSP, ErrorSummary::OutOfResource, ErrorLevel::Status).raw;
232 return;
173 } 233 }
234
235 interrupt_events.Get(type, pipe) = evt;
236 LOG_INFO(Service_DSP, "Registered type=%u, pipe=%u, event_handle=0x%08X", type_index, pipe_index, event_handle);
237 cmd_buff[1] = RESULT_SUCCESS.raw;
174 } else { 238 } else {
175 interrupt_events.erase(std::make_pair(interrupt, channel)); 239 interrupt_events.Get(type, pipe) = nullptr;
176 LOG_INFO(Service_DSP, "Unregistered interrupt=%u, channel=%u, event_handle=0x%08X", interrupt, channel, event_handle); 240 LOG_INFO(Service_DSP, "Unregistered interrupt=%u, channel=%u, event_handle=0x%08X", type_index, pipe_index, event_handle);
241 cmd_buff[1] = RESULT_SUCCESS.raw;
177 } 242 }
178} 243}
179 244
@@ -187,6 +252,7 @@ static void RegisterInterruptEvents(Service::Interface* self) {
187static void SetSemaphore(Service::Interface* self) { 252static void SetSemaphore(Service::Interface* self) {
188 u32* cmd_buff = Kernel::GetCommandBuffer(); 253 u32* cmd_buff = Kernel::GetCommandBuffer();
189 254
255 cmd_buff[0] = IPC::MakeHeader(0x7, 1, 0);
190 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 256 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
191 257
192 LOG_WARNING(Service_DSP, "(STUBBED) called"); 258 LOG_WARNING(Service_DSP, "(STUBBED) called");
@@ -195,7 +261,7 @@ static void SetSemaphore(Service::Interface* self) {
195/** 261/**
196 * DSP_DSP::WriteProcessPipe service function 262 * DSP_DSP::WriteProcessPipe service function
197 * Inputs: 263 * Inputs:
198 * 1 : Channel 264 * 1 : Pipe Number
199 * 2 : Size 265 * 2 : Size
200 * 3 : (size << 14) | 0x402 266 * 3 : (size << 14) | 0x402
201 * 4 : Buffer 267 * 4 : Buffer
@@ -206,24 +272,32 @@ static void SetSemaphore(Service::Interface* self) {
206static void WriteProcessPipe(Service::Interface* self) { 272static void WriteProcessPipe(Service::Interface* self) {
207 u32* cmd_buff = Kernel::GetCommandBuffer(); 273 u32* cmd_buff = Kernel::GetCommandBuffer();
208 274
209 DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(cmd_buff[1]); 275 u32 pipe_index = cmd_buff[1];
210 u32 size = cmd_buff[2]; 276 u32 size = cmd_buff[2];
211 u32 buffer = cmd_buff[4]; 277 u32 buffer = cmd_buff[4];
212 278
213 ASSERT_MSG(IPC::StaticBufferDesc(size, 1) == cmd_buff[3], "IPC static buffer descriptor failed validation (0x%X). pipe=%u, size=0x%X, buffer=0x%08X", cmd_buff[3], pipe, size, buffer); 279 DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(pipe_index);
214 ASSERT_MSG(Memory::GetPointer(buffer) != nullptr, "Invalid Buffer: pipe=%u, size=0x%X, buffer=0x%08X", pipe, size, buffer);
215 280
216 std::vector<u8> message(size); 281 if (IPC::StaticBufferDesc(size, 1) != cmd_buff[3]) {
282 LOG_ERROR(Service_DSP, "IPC static buffer descriptor failed validation (0x%X). pipe=%u, size=0x%X, buffer=0x%08X", cmd_buff[3], pipe_index, size, buffer);
283 cmd_buff[0] = IPC::MakeHeader(0, 1, 0);
284 cmd_buff[1] = ResultCode(ErrorDescription::OS_InvalidBufferDescriptor, ErrorModule::OS, ErrorSummary::WrongArgument, ErrorLevel::Permanent).raw;
285 return;
286 }
287
288 ASSERT_MSG(Memory::GetPointer(buffer) != nullptr, "Invalid Buffer: pipe=%u, size=0x%X, buffer=0x%08X", pipe_index, size, buffer);
217 289
290 std::vector<u8> message(size);
218 for (size_t i = 0; i < size; i++) { 291 for (size_t i = 0; i < size; i++) {
219 message[i] = Memory::Read8(buffer + i); 292 message[i] = Memory::Read8(buffer + i);
220 } 293 }
221 294
222 DSP::HLE::PipeWrite(pipe, message); 295 DSP::HLE::PipeWrite(pipe, message);
223 296
297 cmd_buff[0] = IPC::MakeHeader(0xD, 1, 0);
224 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 298 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
225 299
226 LOG_DEBUG(Service_DSP, "pipe=%u, size=0x%X, buffer=0x%08X", pipe, size, buffer); 300 LOG_DEBUG(Service_DSP, "pipe=%u, size=0x%X, buffer=0x%08X", pipe_index, size, buffer);
227} 301}
228 302
229/** 303/**
@@ -243,13 +317,16 @@ static void WriteProcessPipe(Service::Interface* self) {
243static void ReadPipeIfPossible(Service::Interface* self) { 317static void ReadPipeIfPossible(Service::Interface* self) {
244 u32* cmd_buff = Kernel::GetCommandBuffer(); 318 u32* cmd_buff = Kernel::GetCommandBuffer();
245 319
246 DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(cmd_buff[1]); 320 u32 pipe_index = cmd_buff[1];
247 u32 unknown = cmd_buff[2]; 321 u32 unknown = cmd_buff[2];
248 u32 size = cmd_buff[3] & 0xFFFF; // Lower 16 bits are size 322 u32 size = cmd_buff[3] & 0xFFFF; // Lower 16 bits are size
249 VAddr addr = cmd_buff[0x41]; 323 VAddr addr = cmd_buff[0x41];
250 324
251 ASSERT_MSG(Memory::GetPointer(addr) != nullptr, "Invalid addr: pipe=0x%08X, unknown=0x%08X, size=0x%X, buffer=0x%08X", pipe, unknown, size, addr); 325 DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(pipe_index);
326
327 ASSERT_MSG(Memory::GetPointer(addr) != nullptr, "Invalid addr: pipe=%u, unknown=0x%08X, size=0x%X, buffer=0x%08X", pipe_index, unknown, size, addr);
252 328
329 cmd_buff[0] = IPC::MakeHeader(0x10, 1, 2);
253 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 330 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
254 if (DSP::HLE::GetPipeReadableSize(pipe) >= size) { 331 if (DSP::HLE::GetPipeReadableSize(pipe) >= size) {
255 std::vector<u8> response = DSP::HLE::PipeRead(pipe, size); 332 std::vector<u8> response = DSP::HLE::PipeRead(pipe, size);
@@ -260,8 +337,10 @@ static void ReadPipeIfPossible(Service::Interface* self) {
260 } else { 337 } else {
261 cmd_buff[2] = 0; // Return no data 338 cmd_buff[2] = 0; // Return no data
262 } 339 }
340 cmd_buff[3] = IPC::StaticBufferDesc(size, 0);
341 cmd_buff[4] = addr;
263 342
264 LOG_DEBUG(Service_DSP, "pipe=0x%08X, unknown=0x%08X, size=0x%X, buffer=0x%08X, return cmd_buff[2]=0x%08X", pipe, unknown, size, addr, cmd_buff[2]); 343 LOG_DEBUG(Service_DSP, "pipe=%u, unknown=0x%08X, size=0x%X, buffer=0x%08X, return cmd_buff[2]=0x%08X", pipe_index, unknown, size, addr, cmd_buff[2]);
265} 344}
266 345
267/** 346/**
@@ -278,26 +357,31 @@ static void ReadPipeIfPossible(Service::Interface* self) {
278static void ReadPipe(Service::Interface* self) { 357static void ReadPipe(Service::Interface* self) {
279 u32* cmd_buff = Kernel::GetCommandBuffer(); 358 u32* cmd_buff = Kernel::GetCommandBuffer();
280 359
281 DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(cmd_buff[1]); 360 u32 pipe_index = cmd_buff[1];
282 u32 unknown = cmd_buff[2]; 361 u32 unknown = cmd_buff[2];
283 u32 size = cmd_buff[3] & 0xFFFF; // Lower 16 bits are size 362 u32 size = cmd_buff[3] & 0xFFFF; // Lower 16 bits are size
284 VAddr addr = cmd_buff[0x41]; 363 VAddr addr = cmd_buff[0x41];
285 364
286 ASSERT_MSG(Memory::GetPointer(addr) != nullptr, "Invalid addr: pipe=0x%08X, unknown=0x%08X, size=0x%X, buffer=0x%08X", pipe, unknown, size, addr); 365 DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(pipe_index);
366
367 ASSERT_MSG(Memory::GetPointer(addr) != nullptr, "Invalid addr: pipe=%u, unknown=0x%08X, size=0x%X, buffer=0x%08X", pipe_index, unknown, size, addr);
287 368
288 if (DSP::HLE::GetPipeReadableSize(pipe) >= size) { 369 if (DSP::HLE::GetPipeReadableSize(pipe) >= size) {
289 std::vector<u8> response = DSP::HLE::PipeRead(pipe, size); 370 std::vector<u8> response = DSP::HLE::PipeRead(pipe, size);
290 371
291 Memory::WriteBlock(addr, response.data(), response.size()); 372 Memory::WriteBlock(addr, response.data(), response.size());
292 373
374 cmd_buff[0] = IPC::MakeHeader(0xE, 2, 2);
293 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 375 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
294 cmd_buff[2] = static_cast<u32>(response.size()); 376 cmd_buff[2] = static_cast<u32>(response.size());
377 cmd_buff[3] = IPC::StaticBufferDesc(size, 0);
378 cmd_buff[4] = addr;
295 } else { 379 } else {
296 // No more data is in pipe. Hardware hangs in this case; this should never happen. 380 // No more data is in pipe. Hardware hangs in this case; this should never happen.
297 UNREACHABLE(); 381 UNREACHABLE();
298 } 382 }
299 383
300 LOG_DEBUG(Service_DSP, "pipe=0x%08X, unknown=0x%08X, size=0x%X, buffer=0x%08X, return cmd_buff[2]=0x%08X", pipe, unknown, size, addr, cmd_buff[2]); 384 LOG_DEBUG(Service_DSP, "pipe=%u, unknown=0x%08X, size=0x%X, buffer=0x%08X, return cmd_buff[2]=0x%08X", pipe_index, unknown, size, addr, cmd_buff[2]);
301} 385}
302 386
303/** 387/**
@@ -312,13 +396,16 @@ static void ReadPipe(Service::Interface* self) {
312static void GetPipeReadableSize(Service::Interface* self) { 396static void GetPipeReadableSize(Service::Interface* self) {
313 u32* cmd_buff = Kernel::GetCommandBuffer(); 397 u32* cmd_buff = Kernel::GetCommandBuffer();
314 398
315 DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(cmd_buff[1]); 399 u32 pipe_index = cmd_buff[1];
316 u32 unknown = cmd_buff[2]; 400 u32 unknown = cmd_buff[2];
317 401
402 DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(pipe_index);
403
404 cmd_buff[0] = IPC::MakeHeader(0xF, 2, 0);
318 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 405 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
319 cmd_buff[2] = DSP::HLE::GetPipeReadableSize(pipe); 406 cmd_buff[2] = DSP::HLE::GetPipeReadableSize(pipe);
320 407
321 LOG_DEBUG(Service_DSP, "pipe=0x%08X, unknown=0x%08X, return cmd_buff[2]=0x%08X", pipe, unknown, cmd_buff[2]); 408 LOG_DEBUG(Service_DSP, "pipe=%u, unknown=0x%08X, return cmd_buff[2]=0x%08X", pipe_index, unknown, cmd_buff[2]);
322} 409}
323 410
324/** 411/**
@@ -333,6 +420,7 @@ static void SetSemaphoreMask(Service::Interface* self) {
333 420
334 u32 mask = cmd_buff[1]; 421 u32 mask = cmd_buff[1];
335 422
423 cmd_buff[0] = IPC::MakeHeader(0x17, 1, 0);
336 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 424 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
337 425
338 LOG_WARNING(Service_DSP, "(STUBBED) called mask=0x%08X", mask); 426 LOG_WARNING(Service_DSP, "(STUBBED) called mask=0x%08X", mask);
@@ -350,6 +438,7 @@ static void SetSemaphoreMask(Service::Interface* self) {
350static void GetHeadphoneStatus(Service::Interface* self) { 438static void GetHeadphoneStatus(Service::Interface* self) {
351 u32* cmd_buff = Kernel::GetCommandBuffer(); 439 u32* cmd_buff = Kernel::GetCommandBuffer();
352 440
441 cmd_buff[0] = IPC::MakeHeader(0x1F, 2, 0);
353 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 442 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
354 cmd_buff[2] = 0; // Not using headphones? 443 cmd_buff[2] = 0; // Not using headphones?
355 444
@@ -376,6 +465,7 @@ static void RecvData(Service::Interface* self) {
376 465
377 // Application reads this after requesting DSP shutdown, to verify the DSP has indeed shutdown or slept. 466 // Application reads this after requesting DSP shutdown, to verify the DSP has indeed shutdown or slept.
378 467
468 cmd_buff[0] = IPC::MakeHeader(0x1, 2, 0);
379 cmd_buff[1] = RESULT_SUCCESS.raw; 469 cmd_buff[1] = RESULT_SUCCESS.raw;
380 switch (DSP::HLE::GetDspState()) { 470 switch (DSP::HLE::GetDspState()) {
381 case DSP::HLE::DspState::On: 471 case DSP::HLE::DspState::On:
@@ -411,6 +501,7 @@ static void RecvDataIsReady(Service::Interface* self) {
411 501
412 ASSERT_MSG(register_number == 0, "Unknown register_number %u", register_number); 502 ASSERT_MSG(register_number == 0, "Unknown register_number %u", register_number);
413 503
504 cmd_buff[0] = IPC::MakeHeader(0x2, 2, 0);
414 cmd_buff[1] = RESULT_SUCCESS.raw; 505 cmd_buff[1] = RESULT_SUCCESS.raw;
415 cmd_buff[2] = 1; // Ready to read 506 cmd_buff[2] = 1; // Ready to read
416 507
@@ -458,14 +549,14 @@ const Interface::FunctionInfo FunctionTable[] = {
458 549
459Interface::Interface() { 550Interface::Interface() {
460 semaphore_event = Kernel::Event::Create(Kernel::ResetType::OneShot, "DSP_DSP::semaphore_event"); 551 semaphore_event = Kernel::Event::Create(Kernel::ResetType::OneShot, "DSP_DSP::semaphore_event");
461 read_pipe_count = 0; 552 interrupt_events = {};
462 553
463 Register(FunctionTable); 554 Register(FunctionTable);
464} 555}
465 556
466Interface::~Interface() { 557Interface::~Interface() {
467 semaphore_event = nullptr; 558 semaphore_event = nullptr;
468 interrupt_events.clear(); 559 interrupt_events = {};
469} 560}
470 561
471} // namespace 562} // namespace
diff --git a/src/core/hle/service/dsp_dsp.h b/src/core/hle/service/dsp_dsp.h
index 32b89e9bb..22f6687cc 100644
--- a/src/core/hle/service/dsp_dsp.h
+++ b/src/core/hle/service/dsp_dsp.h
@@ -8,6 +8,12 @@
8 8
9#include "core/hle/service/service.h" 9#include "core/hle/service/service.h"
10 10
11namespace DSP {
12namespace HLE {
13enum class DspPipe;
14}
15}
16
11//////////////////////////////////////////////////////////////////////////////////////////////////// 17////////////////////////////////////////////////////////////////////////////////////////////////////
12// Namespace DSP_DSP 18// Namespace DSP_DSP
13 19
@@ -23,15 +29,10 @@ public:
23 } 29 }
24}; 30};
25 31
26/// Signal all audio related interrupts.
27void SignalAllInterrupts();
28
29/** 32/**
30 * Signal a specific audio related interrupt based on interrupt id and channel id. 33 * Signal a specific DSP related interrupt of type == InterruptType::Pipe, pipe == pipe.
31 * @param interrupt_id The interrupt id 34 * @param pipe The DSP pipe for which to signal an interrupt for.
32 * @param channel_id The channel id
33 * The significance of various values of interrupt_id and channel_id is not yet known.
34 */ 35 */
35void SignalInterrupt(u32 interrupt_id, u32 channel_id); 36void SignalPipeInterrupt(DSP::HLE::DspPipe pipe);
36 37
37} // namespace 38} // namespace DSP_DSP
diff --git a/src/core/hle/service/fs/archive.cpp b/src/core/hle/service/fs/archive.cpp
index e9588cb72..cc51ede0c 100644
--- a/src/core/hle/service/fs/archive.cpp
+++ b/src/core/hle/service/fs/archive.cpp
@@ -114,6 +114,7 @@ ResultVal<bool> File::SyncRequest() {
114 return read.Code(); 114 return read.Code();
115 } 115 }
116 cmd_buff[2] = static_cast<u32>(*read); 116 cmd_buff[2] = static_cast<u32>(*read);
117 Memory::RasterizerFlushAndInvalidateRegion(Memory::VirtualToPhysicalAddress(address), length);
117 break; 118 break;
118 } 119 }
119 120
diff --git a/src/core/hle/service/fs/fs_user.cpp b/src/core/hle/service/fs/fs_user.cpp
index 3ec7ceb30..7df7da5a4 100644
--- a/src/core/hle/service/fs/fs_user.cpp
+++ b/src/core/hle/service/fs/fs_user.cpp
@@ -250,7 +250,7 @@ static void CreateFile(Service::Interface* self) {
250 250
251 FileSys::Path file_path(filename_type, filename_size, filename_ptr); 251 FileSys::Path file_path(filename_type, filename_size, filename_ptr);
252 252
253 LOG_DEBUG(Service_FS, "type=%d size=%llu data=%s", filename_type, filename_size, file_path.DebugStr().c_str()); 253 LOG_DEBUG(Service_FS, "type=%d size=%llu data=%s", filename_type, file_size, file_path.DebugStr().c_str());
254 254
255 cmd_buff[1] = CreateFileInArchive(archive_handle, file_path, file_size).raw; 255 cmd_buff[1] = CreateFileInArchive(archive_handle, file_path, file_size).raw;
256} 256}
diff --git a/src/core/hle/service/gsp_gpu.cpp b/src/core/hle/service/gsp_gpu.cpp
index 0c655395e..b4c146e08 100644
--- a/src/core/hle/service/gsp_gpu.cpp
+++ b/src/core/hle/service/gsp_gpu.cpp
@@ -15,8 +15,6 @@
15 15
16#include "video_core/gpu_debugger.h" 16#include "video_core/gpu_debugger.h"
17#include "video_core/debug_utils/debug_utils.h" 17#include "video_core/debug_utils/debug_utils.h"
18#include "video_core/renderer_base.h"
19#include "video_core/video_core.h"
20 18
21#include "gsp_gpu.h" 19#include "gsp_gpu.h"
22 20
@@ -45,6 +43,8 @@ Kernel::SharedPtr<Kernel::SharedMemory> g_shared_memory;
45/// Thread index into interrupt relay queue 43/// Thread index into interrupt relay queue
46u32 g_thread_id = 0; 44u32 g_thread_id = 0;
47 45
46static bool gpu_right_acquired = false;
47
48/// Gets a pointer to a thread command buffer in GSP shared memory 48/// Gets a pointer to a thread command buffer in GSP shared memory
49static inline u8* GetCommandBuffer(u32 thread_id) { 49static inline u8* GetCommandBuffer(u32 thread_id) {
50 return g_shared_memory->GetPointer(0x800 + (thread_id * sizeof(CommandBuffer))); 50 return g_shared_memory->GetPointer(0x800 + (thread_id * sizeof(CommandBuffer)));
@@ -291,8 +291,6 @@ static void FlushDataCache(Service::Interface* self) {
291 u32 size = cmd_buff[2]; 291 u32 size = cmd_buff[2];
292 u32 process = cmd_buff[4]; 292 u32 process = cmd_buff[4];
293 293
294 VideoCore::g_renderer->Rasterizer()->InvalidateRegion(Memory::VirtualToPhysicalAddress(address), size);
295
296 // TODO(purpasmart96): Verify return header on HW 294 // TODO(purpasmart96): Verify return header on HW
297 295
298 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 296 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
@@ -374,6 +372,9 @@ static void UnregisterInterruptRelayQueue(Service::Interface* self) {
374 * @todo This probably does not belong in the GSP module, instead move to video_core 372 * @todo This probably does not belong in the GSP module, instead move to video_core
375 */ 373 */
376void SignalInterrupt(InterruptId interrupt_id) { 374void SignalInterrupt(InterruptId interrupt_id) {
375 if (!gpu_right_acquired) {
376 return;
377 }
377 if (nullptr == g_interrupt_event) { 378 if (nullptr == g_interrupt_event) {
378 LOG_WARNING(Service_GSP, "cannot synchronize until GSP event has been created!"); 379 LOG_WARNING(Service_GSP, "cannot synchronize until GSP event has been created!");
379 return; 380 return;
@@ -408,6 +409,8 @@ void SignalInterrupt(InterruptId interrupt_id) {
408 g_interrupt_event->Signal(); 409 g_interrupt_event->Signal();
409} 410}
410 411
412MICROPROFILE_DEFINE(GPU_GSP_DMA, "GPU", "GSP DMA", MP_RGB(100, 0, 255));
413
411/// Executes the next GSP command 414/// Executes the next GSP command
412static void ExecuteCommand(const Command& command, u32 thread_id) { 415static void ExecuteCommand(const Command& command, u32 thread_id) {
413 // Utility function to convert register ID to address 416 // Utility function to convert register ID to address
@@ -419,18 +422,21 @@ static void ExecuteCommand(const Command& command, u32 thread_id) {
419 422
420 // GX request DMA - typically used for copying memory from GSP heap to VRAM 423 // GX request DMA - typically used for copying memory from GSP heap to VRAM
421 case CommandId::REQUEST_DMA: 424 case CommandId::REQUEST_DMA:
422 VideoCore::g_renderer->Rasterizer()->FlushRegion(Memory::VirtualToPhysicalAddress(command.dma_request.source_address), 425 {
423 command.dma_request.size); 426 MICROPROFILE_SCOPE(GPU_GSP_DMA);
427
428 // TODO: Consider attempting rasterizer-accelerated surface blit if that usage is ever possible/likely
429 Memory::RasterizerFlushRegion(Memory::VirtualToPhysicalAddress(command.dma_request.source_address),
430 command.dma_request.size);
431 Memory::RasterizerFlushAndInvalidateRegion(Memory::VirtualToPhysicalAddress(command.dma_request.dest_address),
432 command.dma_request.size);
424 433
425 memcpy(Memory::GetPointer(command.dma_request.dest_address), 434 memcpy(Memory::GetPointer(command.dma_request.dest_address),
426 Memory::GetPointer(command.dma_request.source_address), 435 Memory::GetPointer(command.dma_request.source_address),
427 command.dma_request.size); 436 command.dma_request.size);
428 SignalInterrupt(InterruptId::DMA); 437 SignalInterrupt(InterruptId::DMA);
429
430 VideoCore::g_renderer->Rasterizer()->InvalidateRegion(Memory::VirtualToPhysicalAddress(command.dma_request.dest_address),
431 command.dma_request.size);
432 break; 438 break;
433 439 }
434 // TODO: This will need some rework in the future. (why?) 440 // TODO: This will need some rework in the future. (why?)
435 case CommandId::SUBMIT_GPU_CMDLIST: 441 case CommandId::SUBMIT_GPU_CMDLIST:
436 { 442 {
@@ -517,13 +523,8 @@ static void ExecuteCommand(const Command& command, u32 thread_id) {
517 523
518 case CommandId::CACHE_FLUSH: 524 case CommandId::CACHE_FLUSH:
519 { 525 {
520 for (auto& region : command.cache_flush.regions) { 526 // NOTE: Rasterizer flushing handled elsewhere in CPU read/write and other GPU handlers
521 if (region.size == 0) 527 // Use command.cache_flush.regions to implement this handler
522 break;
523
524 VideoCore::g_renderer->Rasterizer()->InvalidateRegion(
525 Memory::VirtualToPhysicalAddress(region.address), region.size);
526 }
527 break; 528 break;
528 } 529 }
529 530
@@ -628,6 +629,35 @@ static void ImportDisplayCaptureInfo(Service::Interface* self) {
628 LOG_WARNING(Service_GSP, "called"); 629 LOG_WARNING(Service_GSP, "called");
629} 630}
630 631
632/**
633 * GSP_GPU::AcquireRight service function
634 * Outputs:
635 * 1: Result code
636 */
637static void AcquireRight(Service::Interface* self) {
638 u32* cmd_buff = Kernel::GetCommandBuffer();
639
640 gpu_right_acquired = true;
641
642 cmd_buff[1] = RESULT_SUCCESS.raw;
643
644 LOG_WARNING(Service_GSP, "called");
645}
646
647/**
648 * GSP_GPU::ReleaseRight service function
649 * Outputs:
650 * 1: Result code
651 */
652static void ReleaseRight(Service::Interface* self) {
653 u32* cmd_buff = Kernel::GetCommandBuffer();
654
655 gpu_right_acquired = false;
656
657 cmd_buff[1] = RESULT_SUCCESS.raw;
658
659 LOG_WARNING(Service_GSP, "called");
660}
631 661
632const Interface::FunctionInfo FunctionTable[] = { 662const Interface::FunctionInfo FunctionTable[] = {
633 {0x00010082, WriteHWRegs, "WriteHWRegs"}, 663 {0x00010082, WriteHWRegs, "WriteHWRegs"},
@@ -651,8 +681,8 @@ const Interface::FunctionInfo FunctionTable[] = {
651 {0x00130042, RegisterInterruptRelayQueue, "RegisterInterruptRelayQueue"}, 681 {0x00130042, RegisterInterruptRelayQueue, "RegisterInterruptRelayQueue"},
652 {0x00140000, UnregisterInterruptRelayQueue, "UnregisterInterruptRelayQueue"}, 682 {0x00140000, UnregisterInterruptRelayQueue, "UnregisterInterruptRelayQueue"},
653 {0x00150002, nullptr, "TryAcquireRight"}, 683 {0x00150002, nullptr, "TryAcquireRight"},
654 {0x00160042, nullptr, "AcquireRight"}, 684 {0x00160042, AcquireRight, "AcquireRight"},
655 {0x00170000, nullptr, "ReleaseRight"}, 685 {0x00170000, ReleaseRight, "ReleaseRight"},
656 {0x00180000, ImportDisplayCaptureInfo, "ImportDisplayCaptureInfo"}, 686 {0x00180000, ImportDisplayCaptureInfo, "ImportDisplayCaptureInfo"},
657 {0x00190000, nullptr, "SaveVramSysArea"}, 687 {0x00190000, nullptr, "SaveVramSysArea"},
658 {0x001A0000, nullptr, "RestoreVramSysArea"}, 688 {0x001A0000, nullptr, "RestoreVramSysArea"},
@@ -673,11 +703,13 @@ Interface::Interface() {
673 g_shared_memory = nullptr; 703 g_shared_memory = nullptr;
674 704
675 g_thread_id = 0; 705 g_thread_id = 0;
706 gpu_right_acquired = false;
676} 707}
677 708
678Interface::~Interface() { 709Interface::~Interface() {
679 g_interrupt_event = nullptr; 710 g_interrupt_event = nullptr;
680 g_shared_memory = nullptr; 711 g_shared_memory = nullptr;
712 gpu_right_acquired = false;
681} 713}
682 714
683} // namespace 715} // namespace
diff --git a/src/core/hle/service/y2r_u.cpp b/src/core/hle/service/y2r_u.cpp
index 22f373adf..d16578f87 100644
--- a/src/core/hle/service/y2r_u.cpp
+++ b/src/core/hle/service/y2r_u.cpp
@@ -4,6 +4,7 @@
4 4
5#include <cstring> 5#include <cstring>
6 6
7#include "common/common_funcs.h"
7#include "common/common_types.h" 8#include "common/common_types.h"
8#include "common/logging/log.h" 9#include "common/logging/log.h"
9 10
@@ -12,9 +13,6 @@
12#include "core/hle/service/y2r_u.h" 13#include "core/hle/service/y2r_u.h"
13#include "core/hw/y2r.h" 14#include "core/hw/y2r.h"
14 15
15#include "video_core/renderer_base.h"
16#include "video_core/video_core.h"
17
18//////////////////////////////////////////////////////////////////////////////////////////////////// 16////////////////////////////////////////////////////////////////////////////////////////////////////
19// Namespace Y2R_U 17// Namespace Y2R_U
20 18
@@ -28,13 +26,17 @@ struct ConversionParameters {
28 u16 input_line_width; 26 u16 input_line_width;
29 u16 input_lines; 27 u16 input_lines;
30 StandardCoefficient standard_coefficient; 28 StandardCoefficient standard_coefficient;
31 u8 reserved; 29 u8 padding;
32 u16 alpha; 30 u16 alpha;
33}; 31};
34static_assert(sizeof(ConversionParameters) == 12, "ConversionParameters struct has incorrect size"); 32static_assert(sizeof(ConversionParameters) == 12, "ConversionParameters struct has incorrect size");
35 33
36static Kernel::SharedPtr<Kernel::Event> completion_event; 34static Kernel::SharedPtr<Kernel::Event> completion_event;
37static ConversionConfiguration conversion; 35static ConversionConfiguration conversion;
36static DitheringWeightParams dithering_weight_params;
37static u32 temporal_dithering_enabled = 0;
38static u32 transfer_end_interrupt_enabled = 0;
39static u32 spacial_dithering_enabled = 0;
38 40
39static const CoefficientSet standard_coefficients[4] = { 41static const CoefficientSet standard_coefficients[4] = {
40 {{ 0x100, 0x166, 0xB6, 0x58, 0x1C5, -0x166F, 0x10EE, -0x1C5B }}, // ITU_Rec601 42 {{ 0x100, 0x166, 0xB6, 0x58, 0x1C5, -0x166F, 0x10EE, -0x1C5B }}, // ITU_Rec601
@@ -73,7 +75,7 @@ ResultCode ConversionConfiguration::SetInputLines(u16 lines) {
73 75
74ResultCode ConversionConfiguration::SetStandardCoefficient(StandardCoefficient standard_coefficient) { 76ResultCode ConversionConfiguration::SetStandardCoefficient(StandardCoefficient standard_coefficient) {
75 size_t index = static_cast<size_t>(standard_coefficient); 77 size_t index = static_cast<size_t>(standard_coefficient);
76 if (index >= 4) { 78 if (index >= ARRAY_SIZE(standard_coefficients)) {
77 return ResultCode(ErrorDescription::InvalidEnumValue, ErrorModule::CAM, 79 return ResultCode(ErrorDescription::InvalidEnumValue, ErrorModule::CAM,
78 ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E053ED 80 ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E053ED
79 } 81 }
@@ -86,44 +88,183 @@ static void SetInputFormat(Service::Interface* self) {
86 u32* cmd_buff = Kernel::GetCommandBuffer(); 88 u32* cmd_buff = Kernel::GetCommandBuffer();
87 89
88 conversion.input_format = static_cast<InputFormat>(cmd_buff[1]); 90 conversion.input_format = static_cast<InputFormat>(cmd_buff[1]);
91
92 cmd_buff[0] = IPC::MakeHeader(0x1, 1, 0);
93 cmd_buff[1] = RESULT_SUCCESS.raw;
94
89 LOG_DEBUG(Service_Y2R, "called input_format=%hhu", conversion.input_format); 95 LOG_DEBUG(Service_Y2R, "called input_format=%hhu", conversion.input_format);
96}
97
98static void GetInputFormat(Service::Interface* self) {
99 u32* cmd_buff = Kernel::GetCommandBuffer();
90 100
101 cmd_buff[0] = IPC::MakeHeader(0x2, 2, 0);
91 cmd_buff[1] = RESULT_SUCCESS.raw; 102 cmd_buff[1] = RESULT_SUCCESS.raw;
103 cmd_buff[2] = static_cast<u32>(conversion.input_format);
104
105 LOG_DEBUG(Service_Y2R, "called input_format=%hhu", conversion.input_format);
92} 106}
93 107
94static void SetOutputFormat(Service::Interface* self) { 108static void SetOutputFormat(Service::Interface* self) {
95 u32* cmd_buff = Kernel::GetCommandBuffer(); 109 u32* cmd_buff = Kernel::GetCommandBuffer();
96 110
97 conversion.output_format = static_cast<OutputFormat>(cmd_buff[1]); 111 conversion.output_format = static_cast<OutputFormat>(cmd_buff[1]);
112
113 cmd_buff[0] = IPC::MakeHeader(0x3, 1, 0);
114 cmd_buff[1] = RESULT_SUCCESS.raw;
115
98 LOG_DEBUG(Service_Y2R, "called output_format=%hhu", conversion.output_format); 116 LOG_DEBUG(Service_Y2R, "called output_format=%hhu", conversion.output_format);
117}
118
119static void GetOutputFormat(Service::Interface* self) {
120 u32* cmd_buff = Kernel::GetCommandBuffer();
99 121
122 cmd_buff[0] = IPC::MakeHeader(0x4, 2, 0);
100 cmd_buff[1] = RESULT_SUCCESS.raw; 123 cmd_buff[1] = RESULT_SUCCESS.raw;
124 cmd_buff[2] = static_cast<u32>(conversion.output_format);
125
126 LOG_DEBUG(Service_Y2R, "called output_format=%hhu", conversion.output_format);
101} 127}
102 128
103static void SetRotation(Service::Interface* self) { 129static void SetRotation(Service::Interface* self) {
104 u32* cmd_buff = Kernel::GetCommandBuffer(); 130 u32* cmd_buff = Kernel::GetCommandBuffer();
105 131
106 conversion.rotation = static_cast<Rotation>(cmd_buff[1]); 132 conversion.rotation = static_cast<Rotation>(cmd_buff[1]);
133
134 cmd_buff[0] = IPC::MakeHeader(0x5, 1, 0);
135 cmd_buff[1] = RESULT_SUCCESS.raw;
136
107 LOG_DEBUG(Service_Y2R, "called rotation=%hhu", conversion.rotation); 137 LOG_DEBUG(Service_Y2R, "called rotation=%hhu", conversion.rotation);
138}
139
140static void GetRotation(Service::Interface* self) {
141 u32* cmd_buff = Kernel::GetCommandBuffer();
108 142
143 cmd_buff[0] = IPC::MakeHeader(0x6, 2, 0);
109 cmd_buff[1] = RESULT_SUCCESS.raw; 144 cmd_buff[1] = RESULT_SUCCESS.raw;
145 cmd_buff[2] = static_cast<u32>(conversion.rotation);
146
147 LOG_DEBUG(Service_Y2R, "called rotation=%hhu", conversion.rotation);
110} 148}
111 149
112static void SetBlockAlignment(Service::Interface* self) { 150static void SetBlockAlignment(Service::Interface* self) {
113 u32* cmd_buff = Kernel::GetCommandBuffer(); 151 u32* cmd_buff = Kernel::GetCommandBuffer();
114 152
115 conversion.block_alignment = static_cast<BlockAlignment>(cmd_buff[1]); 153 conversion.block_alignment = static_cast<BlockAlignment>(cmd_buff[1]);
116 LOG_DEBUG(Service_Y2R, "called alignment=%hhu", conversion.block_alignment);
117 154
155 cmd_buff[0] = IPC::MakeHeader(0x7, 1, 0);
156 cmd_buff[1] = RESULT_SUCCESS.raw;
157
158 LOG_DEBUG(Service_Y2R, "called block_alignment=%hhu", conversion.block_alignment);
159}
160
161static void GetBlockAlignment(Service::Interface* self) {
162 u32* cmd_buff = Kernel::GetCommandBuffer();
163
164 cmd_buff[0] = IPC::MakeHeader(0x8, 2, 0);
165 cmd_buff[1] = RESULT_SUCCESS.raw;
166 cmd_buff[2] = static_cast<u32>(conversion.block_alignment);
167
168 LOG_DEBUG(Service_Y2R, "called block_alignment=%hhu", conversion.block_alignment);
169}
170
171/**
172 * Y2R_U::SetSpacialDithering service function
173 * Inputs:
174 * 1 : u8, 0 = Disabled, 1 = Enabled
175 * Outputs:
176 * 1 : Result of function, 0 on success, otherwise error code
177 */
178static void SetSpacialDithering(Service::Interface* self) {
179 u32* cmd_buff = Kernel::GetCommandBuffer();
180 spacial_dithering_enabled = cmd_buff[1] & 0xF;
181
182 cmd_buff[0] = IPC::MakeHeader(0x9, 1, 0);
183 cmd_buff[1] = RESULT_SUCCESS.raw;
184
185 LOG_WARNING(Service_Y2R, "(STUBBED) called");
186}
187
188/**
189 * Y2R_U::GetSpacialDithering service function
190 * Outputs:
191 * 1 : Result of function, 0 on success, otherwise error code
192 * 2 : u8, 0 = Disabled, 1 = Enabled
193 */
194static void GetSpacialDithering(Service::Interface* self) {
195 u32* cmd_buff = Kernel::GetCommandBuffer();
196
197 cmd_buff[0] = IPC::MakeHeader(0xA, 2, 0);
198 cmd_buff[1] = RESULT_SUCCESS.raw;
199 cmd_buff[2] = spacial_dithering_enabled;
200
201 LOG_WARNING(Service_Y2R, "(STUBBED) called");
202}
203
204/**
205 * Y2R_U::SetTemporalDithering service function
206 * Inputs:
207 * 1 : u8, 0 = Disabled, 1 = Enabled
208 * Outputs:
209 * 1 : Result of function, 0 on success, otherwise error code
210 */
211static void SetTemporalDithering(Service::Interface* self) {
212 u32* cmd_buff = Kernel::GetCommandBuffer();
213 temporal_dithering_enabled = cmd_buff[1] & 0xF;
214
215 cmd_buff[0] = IPC::MakeHeader(0xB, 1, 0);
118 cmd_buff[1] = RESULT_SUCCESS.raw; 216 cmd_buff[1] = RESULT_SUCCESS.raw;
217
218 LOG_WARNING(Service_Y2R, "(STUBBED) called");
119} 219}
120 220
221/**
222 * Y2R_U::GetTemporalDithering service function
223 * Outputs:
224 * 1 : Result of function, 0 on success, otherwise error code
225 * 2 : u8, 0 = Disabled, 1 = Enabled
226 */
227static void GetTemporalDithering(Service::Interface* self) {
228 u32* cmd_buff = Kernel::GetCommandBuffer();
229
230 cmd_buff[0] = IPC::MakeHeader(0xC, 2, 0);
231 cmd_buff[1] = RESULT_SUCCESS.raw;
232 cmd_buff[2] = temporal_dithering_enabled;
233
234 LOG_WARNING(Service_Y2R, "(STUBBED) called");
235}
236
237/**
238 * Y2R_U::SetTransferEndInterrupt service function
239 * Inputs:
240 * 1 : u8, 0 = Disabled, 1 = Enabled
241 * Outputs:
242 * 1 : Result of function, 0 on success, otherwise error code
243 */
121static void SetTransferEndInterrupt(Service::Interface* self) { 244static void SetTransferEndInterrupt(Service::Interface* self) {
122 u32* cmd_buff = Kernel::GetCommandBuffer(); 245 u32* cmd_buff = Kernel::GetCommandBuffer();
246 transfer_end_interrupt_enabled = cmd_buff[1] & 0xf;
123 247
124 cmd_buff[0] = IPC::MakeHeader(0xD, 1, 0); 248 cmd_buff[0] = IPC::MakeHeader(0xD, 1, 0);
125 cmd_buff[1] = RESULT_SUCCESS.raw; 249 cmd_buff[1] = RESULT_SUCCESS.raw;
126 LOG_DEBUG(Service_Y2R, "(STUBBED) called"); 250
251 LOG_WARNING(Service_Y2R, "(STUBBED) called");
252}
253
254/**
255 * Y2R_U::GetTransferEndInterrupt service function
256 * Outputs:
257 * 1 : Result of function, 0 on success, otherwise error code
258 * 2 : u8, 0 = Disabled, 1 = Enabled
259 */
260static void GetTransferEndInterrupt(Service::Interface* self) {
261 u32* cmd_buff = Kernel::GetCommandBuffer();
262
263 cmd_buff[0] = IPC::MakeHeader(0xE, 2, 0);
264 cmd_buff[1] = RESULT_SUCCESS.raw;
265 cmd_buff[2] = transfer_end_interrupt_enabled;
266
267 LOG_WARNING(Service_Y2R, "(STUBBED) called");
127} 268}
128 269
129/** 270/**
@@ -135,8 +276,10 @@ static void SetTransferEndInterrupt(Service::Interface* self) {
135static void GetTransferEndEvent(Service::Interface* self) { 276static void GetTransferEndEvent(Service::Interface* self) {
136 u32* cmd_buff = Kernel::GetCommandBuffer(); 277 u32* cmd_buff = Kernel::GetCommandBuffer();
137 278
279 cmd_buff[0] = IPC::MakeHeader(0xF, 2, 0);
138 cmd_buff[1] = RESULT_SUCCESS.raw; 280 cmd_buff[1] = RESULT_SUCCESS.raw;
139 cmd_buff[3] = Kernel::g_handle_table.Create(completion_event).MoveFrom(); 281 cmd_buff[3] = Kernel::g_handle_table.Create(completion_event).MoveFrom();
282
140 LOG_DEBUG(Service_Y2R, "called"); 283 LOG_DEBUG(Service_Y2R, "called");
141} 284}
142 285
@@ -147,12 +290,12 @@ static void SetSendingY(Service::Interface* self) {
147 conversion.src_Y.image_size = cmd_buff[2]; 290 conversion.src_Y.image_size = cmd_buff[2];
148 conversion.src_Y.transfer_unit = cmd_buff[3]; 291 conversion.src_Y.transfer_unit = cmd_buff[3];
149 conversion.src_Y.gap = cmd_buff[4]; 292 conversion.src_Y.gap = cmd_buff[4];
150 u32 src_process_handle = cmd_buff[6];
151 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
152 "src_process_handle=0x%08X", conversion.src_Y.image_size,
153 conversion.src_Y.transfer_unit, conversion.src_Y.gap, src_process_handle);
154 293
294 cmd_buff[0] = IPC::MakeHeader(0x10, 1, 0);
155 cmd_buff[1] = RESULT_SUCCESS.raw; 295 cmd_buff[1] = RESULT_SUCCESS.raw;
296
297 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, src_process_handle=0x%08X",
298 conversion.src_Y.image_size, conversion.src_Y.transfer_unit, conversion.src_Y.gap, cmd_buff[6]);
156} 299}
157 300
158static void SetSendingU(Service::Interface* self) { 301static void SetSendingU(Service::Interface* self) {
@@ -162,12 +305,12 @@ static void SetSendingU(Service::Interface* self) {
162 conversion.src_U.image_size = cmd_buff[2]; 305 conversion.src_U.image_size = cmd_buff[2];
163 conversion.src_U.transfer_unit = cmd_buff[3]; 306 conversion.src_U.transfer_unit = cmd_buff[3];
164 conversion.src_U.gap = cmd_buff[4]; 307 conversion.src_U.gap = cmd_buff[4];
165 u32 src_process_handle = cmd_buff[6];
166 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
167 "src_process_handle=0x%08X", conversion.src_U.image_size,
168 conversion.src_U.transfer_unit, conversion.src_U.gap, src_process_handle);
169 308
309 cmd_buff[0] = IPC::MakeHeader(0x11, 1, 0);
170 cmd_buff[1] = RESULT_SUCCESS.raw; 310 cmd_buff[1] = RESULT_SUCCESS.raw;
311
312 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, src_process_handle=0x%08X",
313 conversion.src_U.image_size, conversion.src_U.transfer_unit, conversion.src_U.gap, cmd_buff[6]);
171} 314}
172 315
173static void SetSendingV(Service::Interface* self) { 316static void SetSendingV(Service::Interface* self) {
@@ -177,12 +320,12 @@ static void SetSendingV(Service::Interface* self) {
177 conversion.src_V.image_size = cmd_buff[2]; 320 conversion.src_V.image_size = cmd_buff[2];
178 conversion.src_V.transfer_unit = cmd_buff[3]; 321 conversion.src_V.transfer_unit = cmd_buff[3];
179 conversion.src_V.gap = cmd_buff[4]; 322 conversion.src_V.gap = cmd_buff[4];
180 u32 src_process_handle = cmd_buff[6];
181 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
182 "src_process_handle=0x%08X", conversion.src_V.image_size,
183 conversion.src_V.transfer_unit, conversion.src_V.gap, src_process_handle);
184 323
324 cmd_buff[0] = IPC::MakeHeader(0x12, 1, 0);
185 cmd_buff[1] = RESULT_SUCCESS.raw; 325 cmd_buff[1] = RESULT_SUCCESS.raw;
326
327 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, src_process_handle=0x%08X",
328 conversion.src_V.image_size, conversion.src_V.transfer_unit, conversion.src_V.gap, cmd_buff[6]);
186} 329}
187 330
188static void SetSendingYUYV(Service::Interface* self) { 331static void SetSendingYUYV(Service::Interface* self) {
@@ -192,12 +335,76 @@ static void SetSendingYUYV(Service::Interface* self) {
192 conversion.src_YUYV.image_size = cmd_buff[2]; 335 conversion.src_YUYV.image_size = cmd_buff[2];
193 conversion.src_YUYV.transfer_unit = cmd_buff[3]; 336 conversion.src_YUYV.transfer_unit = cmd_buff[3];
194 conversion.src_YUYV.gap = cmd_buff[4]; 337 conversion.src_YUYV.gap = cmd_buff[4];
195 u32 src_process_handle = cmd_buff[6];
196 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
197 "src_process_handle=0x%08X", conversion.src_YUYV.image_size,
198 conversion.src_YUYV.transfer_unit, conversion.src_YUYV.gap, src_process_handle);
199 338
339 cmd_buff[0] = IPC::MakeHeader(0x13, 1, 0);
340 cmd_buff[1] = RESULT_SUCCESS.raw;
341
342 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, src_process_handle=0x%08X",
343 conversion.src_YUYV.image_size, conversion.src_YUYV.transfer_unit, conversion.src_YUYV.gap, cmd_buff[6]);
344}
345
346/**
347 * Y2R::IsFinishedSendingYuv service function
348 * Output:
349 * 1 : Result of the function, 0 on success, otherwise error code
350 * 2 : u8, 0 = Not Finished, 1 = Finished
351 */
352static void IsFinishedSendingYuv(Service::Interface* self) {
353 u32* cmd_buff = Kernel::GetCommandBuffer();
354
355 cmd_buff[0] = IPC::MakeHeader(0x14, 2, 0);
356 cmd_buff[1] = RESULT_SUCCESS.raw;
357 cmd_buff[2] = 1;
358
359 LOG_WARNING(Service_Y2R, "(STUBBED) called");
360}
361
362/**
363 * Y2R::IsFinishedSendingY service function
364 * Output:
365 * 1 : Result of the function, 0 on success, otherwise error code
366 * 2 : u8, 0 = Not Finished, 1 = Finished
367 */
368static void IsFinishedSendingY(Service::Interface* self) {
369 u32* cmd_buff = Kernel::GetCommandBuffer();
370
371 cmd_buff[0] = IPC::MakeHeader(0x15, 2, 0);
200 cmd_buff[1] = RESULT_SUCCESS.raw; 372 cmd_buff[1] = RESULT_SUCCESS.raw;
373 cmd_buff[2] = 1;
374
375 LOG_WARNING(Service_Y2R, "(STUBBED) called");
376}
377
378/**
379 * Y2R::IsFinishedSendingU service function
380 * Output:
381 * 1 : Result of the function, 0 on success, otherwise error code
382 * 2 : u8, 0 = Not Finished, 1 = Finished
383 */
384static void IsFinishedSendingU(Service::Interface* self) {
385 u32* cmd_buff = Kernel::GetCommandBuffer();
386
387 cmd_buff[0] = IPC::MakeHeader(0x16, 2, 0);
388 cmd_buff[1] = RESULT_SUCCESS.raw;
389 cmd_buff[2] = 1;
390
391 LOG_WARNING(Service_Y2R, "(STUBBED) called");
392}
393
394/**
395 * Y2R::IsFinishedSendingV service function
396 * Output:
397 * 1 : Result of the function, 0 on success, otherwise error code
398 * 2 : u8, 0 = Not Finished, 1 = Finished
399 */
400static void IsFinishedSendingV(Service::Interface* self) {
401 u32* cmd_buff = Kernel::GetCommandBuffer();
402
403 cmd_buff[0] = IPC::MakeHeader(0x17, 2, 0);
404 cmd_buff[1] = RESULT_SUCCESS.raw;
405 cmd_buff[2] = 1;
406
407 LOG_WARNING(Service_Y2R, "(STUBBED) called");
201} 408}
202 409
203static void SetReceiving(Service::Interface* self) { 410static void SetReceiving(Service::Interface* self) {
@@ -207,27 +414,66 @@ static void SetReceiving(Service::Interface* self) {
207 conversion.dst.image_size = cmd_buff[2]; 414 conversion.dst.image_size = cmd_buff[2];
208 conversion.dst.transfer_unit = cmd_buff[3]; 415 conversion.dst.transfer_unit = cmd_buff[3];
209 conversion.dst.gap = cmd_buff[4]; 416 conversion.dst.gap = cmd_buff[4];
210 u32 dst_process_handle = cmd_buff[6];
211 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
212 "dst_process_handle=0x%08X", conversion.dst.image_size,
213 conversion.dst.transfer_unit, conversion.dst.gap,
214 dst_process_handle);
215 417
418 cmd_buff[0] = IPC::MakeHeader(0x18, 1, 0);
216 cmd_buff[1] = RESULT_SUCCESS.raw; 419 cmd_buff[1] = RESULT_SUCCESS.raw;
420
421 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, dst_process_handle=0x%08X",
422 conversion.dst.image_size, conversion.dst.transfer_unit, conversion.dst.gap, cmd_buff[6]);
423}
424
425/**
426 * Y2R::IsFinishedReceiving service function
427 * Output:
428 * 1 : Result of the function, 0 on success, otherwise error code
429 * 2 : u8, 0 = Not Finished, 1 = Finished
430 */
431static void IsFinishedReceiving(Service::Interface* self) {
432 u32* cmd_buff = Kernel::GetCommandBuffer();
433
434 cmd_buff[0] = IPC::MakeHeader(0x19, 2, 0);
435 cmd_buff[1] = RESULT_SUCCESS.raw;
436 cmd_buff[2] = 1;
437
438 LOG_WARNING(Service_Y2R, "(STUBBED) called");
217} 439}
218 440
219static void SetInputLineWidth(Service::Interface* self) { 441static void SetInputLineWidth(Service::Interface* self) {
220 u32* cmd_buff = Kernel::GetCommandBuffer(); 442 u32* cmd_buff = Kernel::GetCommandBuffer();
221 443
222 LOG_DEBUG(Service_Y2R, "called input_line_width=%u", cmd_buff[1]); 444 cmd_buff[0] = IPC::MakeHeader(0x1A, 1, 0);
223 cmd_buff[1] = conversion.SetInputLineWidth(cmd_buff[1]).raw; 445 cmd_buff[1] = conversion.SetInputLineWidth(cmd_buff[1]).raw;
446
447 LOG_DEBUG(Service_Y2R, "called input_line_width=%u", cmd_buff[1]);
448}
449
450static void GetInputLineWidth(Service::Interface* self) {
451 u32* cmd_buff = Kernel::GetCommandBuffer();
452
453 cmd_buff[0] = IPC::MakeHeader(0x1B, 2, 0);
454 cmd_buff[1] = RESULT_SUCCESS.raw;
455 cmd_buff[2] = conversion.input_line_width;
456
457 LOG_DEBUG(Service_Y2R, "called input_line_width=%u", conversion.input_line_width);
224} 458}
225 459
226static void SetInputLines(Service::Interface* self) { 460static void SetInputLines(Service::Interface* self) {
227 u32* cmd_buff = Kernel::GetCommandBuffer(); 461 u32* cmd_buff = Kernel::GetCommandBuffer();
228 462
229 LOG_DEBUG(Service_Y2R, "called input_line_number=%u", cmd_buff[1]); 463 cmd_buff[0] = IPC::MakeHeader(0x1C, 1, 0);
230 cmd_buff[1] = conversion.SetInputLines(cmd_buff[1]).raw; 464 cmd_buff[1] = conversion.SetInputLines(cmd_buff[1]).raw;
465
466 LOG_DEBUG(Service_Y2R, "called input_lines=%u", cmd_buff[1]);
467}
468
469static void GetInputLines(Service::Interface* self) {
470 u32* cmd_buff = Kernel::GetCommandBuffer();
471
472 cmd_buff[0] = IPC::MakeHeader(0x1D, 2, 0);
473 cmd_buff[1] = RESULT_SUCCESS.raw;
474 cmd_buff[2] = static_cast<u32>(conversion.input_lines);
475
476 LOG_DEBUG(Service_Y2R, "called input_lines=%u", conversion.input_lines);
231} 477}
232 478
233static void SetCoefficient(Service::Interface* self) { 479static void SetCoefficient(Service::Interface* self) {
@@ -235,45 +481,111 @@ static void SetCoefficient(Service::Interface* self) {
235 481
236 const u16* coefficients = reinterpret_cast<const u16*>(&cmd_buff[1]); 482 const u16* coefficients = reinterpret_cast<const u16*>(&cmd_buff[1]);
237 std::memcpy(conversion.coefficients.data(), coefficients, sizeof(CoefficientSet)); 483 std::memcpy(conversion.coefficients.data(), coefficients, sizeof(CoefficientSet));
484
485 cmd_buff[0] = IPC::MakeHeader(0x1E, 1, 0);
486 cmd_buff[1] = RESULT_SUCCESS.raw;
487
238 LOG_DEBUG(Service_Y2R, "called coefficients=[%hX, %hX, %hX, %hX, %hX, %hX, %hX, %hX]", 488 LOG_DEBUG(Service_Y2R, "called coefficients=[%hX, %hX, %hX, %hX, %hX, %hX, %hX, %hX]",
239 coefficients[0], coefficients[1], coefficients[2], coefficients[3], 489 coefficients[0], coefficients[1], coefficients[2], coefficients[3],
240 coefficients[4], coefficients[5], coefficients[6], coefficients[7]); 490 coefficients[4], coefficients[5], coefficients[6], coefficients[7]);
491}
241 492
493static void GetCoefficient(Service::Interface* self) {
494 u32* cmd_buff = Kernel::GetCommandBuffer();
495
496 cmd_buff[0] = IPC::MakeHeader(0x1F, 5, 0);
242 cmd_buff[1] = RESULT_SUCCESS.raw; 497 cmd_buff[1] = RESULT_SUCCESS.raw;
498 std::memcpy(&cmd_buff[2], conversion.coefficients.data(), sizeof(CoefficientSet));
499
500 LOG_DEBUG(Service_Y2R, "called");
243} 501}
244 502
245static void SetStandardCoefficient(Service::Interface* self) { 503static void SetStandardCoefficient(Service::Interface* self) {
246 u32* cmd_buff = Kernel::GetCommandBuffer(); 504 u32* cmd_buff = Kernel::GetCommandBuffer();
247 505
248 LOG_DEBUG(Service_Y2R, "called standard_coefficient=%u", cmd_buff[1]); 506 u32 index = cmd_buff[1];
507
508 cmd_buff[0] = IPC::MakeHeader(0x20, 1, 0);
509 cmd_buff[1] = conversion.SetStandardCoefficient((StandardCoefficient)index).raw;
510
511 LOG_DEBUG(Service_Y2R, "called standard_coefficient=%u", index);
512}
513
514static void GetStandardCoefficient(Service::Interface* self) {
515 u32* cmd_buff = Kernel::GetCommandBuffer();
516
517 u32 index = cmd_buff[1];
518
519 if (index < ARRAY_SIZE(standard_coefficients)) {
520 cmd_buff[0] = IPC::MakeHeader(0x21, 5, 0);
521 cmd_buff[1] = RESULT_SUCCESS.raw;
522 std::memcpy(&cmd_buff[2], &standard_coefficients[index], sizeof(CoefficientSet));
249 523
250 cmd_buff[1] = conversion.SetStandardCoefficient((StandardCoefficient)cmd_buff[1]).raw; 524 LOG_DEBUG(Service_Y2R, "called standard_coefficient=%u ", index);
525 } else {
526 cmd_buff[0] = IPC::MakeHeader(0x21, 1, 0);
527 cmd_buff[1] = -1; // TODO(bunnei): Identify the correct error code for this
528
529 LOG_ERROR(Service_Y2R, "called standard_coefficient=%u The argument is invalid!", index);
530 }
251} 531}
252 532
253static void SetAlpha(Service::Interface* self) { 533static void SetAlpha(Service::Interface* self) {
254 u32* cmd_buff = Kernel::GetCommandBuffer(); 534 u32* cmd_buff = Kernel::GetCommandBuffer();
255 535
256 conversion.alpha = cmd_buff[1]; 536 conversion.alpha = cmd_buff[1];
537
538 cmd_buff[0] = IPC::MakeHeader(0x22, 1, 0);
539 cmd_buff[1] = RESULT_SUCCESS.raw;
540
257 LOG_DEBUG(Service_Y2R, "called alpha=%hu", conversion.alpha); 541 LOG_DEBUG(Service_Y2R, "called alpha=%hu", conversion.alpha);
542}
543
544static void GetAlpha(Service::Interface* self) {
545 u32* cmd_buff = Kernel::GetCommandBuffer();
258 546
547 cmd_buff[0] = IPC::MakeHeader(0x23, 2, 0);
259 cmd_buff[1] = RESULT_SUCCESS.raw; 548 cmd_buff[1] = RESULT_SUCCESS.raw;
549 cmd_buff[2] = conversion.alpha;
550
551 LOG_DEBUG(Service_Y2R, "called alpha=%hu", conversion.alpha);
260} 552}
261 553
262static void StartConversion(Service::Interface* self) { 554static void SetDitheringWeightParams(Service::Interface* self) {
263 u32* cmd_buff = Kernel::GetCommandBuffer(); 555 u32* cmd_buff = Kernel::GetCommandBuffer();
556 std::memcpy(&dithering_weight_params, &cmd_buff[1], sizeof(DitheringWeightParams));
264 557
265 HW::Y2R::PerformConversion(conversion); 558 cmd_buff[0] = IPC::MakeHeader(0x24, 1, 0);
559 cmd_buff[1] = RESULT_SUCCESS.raw;
266 560
267 // dst_image_size would seem to be perfect for this, but it doesn't include the gap :( 561 LOG_DEBUG(Service_Y2R, "called");
268 u32 total_output_size = conversion.input_lines * 562}
269 (conversion.dst.transfer_unit + conversion.dst.gap); 563
270 VideoCore::g_renderer->Rasterizer()->InvalidateRegion( 564static void GetDitheringWeightParams(Service::Interface* self) {
271 Memory::VirtualToPhysicalAddress(conversion.dst.address), total_output_size); 565 u32* cmd_buff = Kernel::GetCommandBuffer();
566
567 cmd_buff[0] = IPC::MakeHeader(0x25, 9, 0);
568 cmd_buff[1] = RESULT_SUCCESS.raw;
569 std::memcpy(&cmd_buff[2], &dithering_weight_params, sizeof(DitheringWeightParams));
272 570
273 LOG_DEBUG(Service_Y2R, "called"); 571 LOG_DEBUG(Service_Y2R, "called");
572}
573
574static void StartConversion(Service::Interface* self) {
575 u32* cmd_buff = Kernel::GetCommandBuffer();
576
577 // dst_image_size would seem to be perfect for this, but it doesn't include the gap :(
578 u32 total_output_size = conversion.input_lines * (conversion.dst.transfer_unit + conversion.dst.gap);
579 Memory::RasterizerFlushAndInvalidateRegion(Memory::VirtualToPhysicalAddress(conversion.dst.address), total_output_size);
580
581 HW::Y2R::PerformConversion(conversion);
582
274 completion_event->Signal(); 583 completion_event->Signal();
275 584
585 cmd_buff[0] = IPC::MakeHeader(0x26, 1, 0);
276 cmd_buff[1] = RESULT_SUCCESS.raw; 586 cmd_buff[1] = RESULT_SUCCESS.raw;
587
588 LOG_DEBUG(Service_Y2R, "called");
277} 589}
278 590
279static void StopConversion(Service::Interface* self) { 591static void StopConversion(Service::Interface* self) {
@@ -281,6 +593,7 @@ static void StopConversion(Service::Interface* self) {
281 593
282 cmd_buff[0] = IPC::MakeHeader(0x27, 1, 0); 594 cmd_buff[0] = IPC::MakeHeader(0x27, 1, 0);
283 cmd_buff[1] = RESULT_SUCCESS.raw; 595 cmd_buff[1] = RESULT_SUCCESS.raw;
596
284 LOG_DEBUG(Service_Y2R, "called"); 597 LOG_DEBUG(Service_Y2R, "called");
285} 598}
286 599
@@ -293,50 +606,61 @@ static void StopConversion(Service::Interface* self) {
293static void IsBusyConversion(Service::Interface* self) { 606static void IsBusyConversion(Service::Interface* self) {
294 u32* cmd_buff = Kernel::GetCommandBuffer(); 607 u32* cmd_buff = Kernel::GetCommandBuffer();
295 608
609 cmd_buff[0] = IPC::MakeHeader(0x28, 2, 0);
296 cmd_buff[1] = RESULT_SUCCESS.raw; 610 cmd_buff[1] = RESULT_SUCCESS.raw;
297 cmd_buff[2] = 0; // StartConversion always finishes immediately 611 cmd_buff[2] = 0; // StartConversion always finishes immediately
612
298 LOG_DEBUG(Service_Y2R, "called"); 613 LOG_DEBUG(Service_Y2R, "called");
299} 614}
300 615
301/** 616/**
302 * Y2R_U::SetConversionParams service function 617 * Y2R_U::SetPackageParameter service function
303 */ 618 */
304static void SetConversionParams(Service::Interface* self) { 619static void SetPackageParameter(Service::Interface* self) {
305 u32* cmd_buff = Kernel::GetCommandBuffer(); 620 u32* cmd_buff = Kernel::GetCommandBuffer();
306 621
307 auto params = reinterpret_cast<const ConversionParameters*>(&cmd_buff[1]); 622 auto params = reinterpret_cast<const ConversionParameters*>(&cmd_buff[1]);
308 LOG_DEBUG(Service_Y2R,
309 "called input_format=%hhu output_format=%hhu rotation=%hhu block_alignment=%hhu "
310 "input_line_width=%hu input_lines=%hu standard_coefficient=%hhu "
311 "reserved=%hhu alpha=%hX",
312 params->input_format, params->output_format, params->rotation, params->block_alignment,
313 params->input_line_width, params->input_lines, params->standard_coefficient,
314 params->reserved, params->alpha);
315
316 ResultCode result = RESULT_SUCCESS;
317 623
318 conversion.input_format = params->input_format; 624 conversion.input_format = params->input_format;
319 conversion.output_format = params->output_format; 625 conversion.output_format = params->output_format;
320 conversion.rotation = params->rotation; 626 conversion.rotation = params->rotation;
321 conversion.block_alignment = params->block_alignment; 627 conversion.block_alignment = params->block_alignment;
322 result = conversion.SetInputLineWidth(params->input_line_width); 628
323 if (result.IsError()) goto cleanup; 629 ResultCode result = conversion.SetInputLineWidth(params->input_line_width);
630
631 if (result.IsError())
632 goto cleanup;
633
324 result = conversion.SetInputLines(params->input_lines); 634 result = conversion.SetInputLines(params->input_lines);
325 if (result.IsError()) goto cleanup; 635
636 if (result.IsError())
637 goto cleanup;
638
326 result = conversion.SetStandardCoefficient(params->standard_coefficient); 639 result = conversion.SetStandardCoefficient(params->standard_coefficient);
327 if (result.IsError()) goto cleanup; 640
641 if (result.IsError())
642 goto cleanup;
643
644 conversion.padding = params->padding;
328 conversion.alpha = params->alpha; 645 conversion.alpha = params->alpha;
329 646
330cleanup: 647cleanup:
331 cmd_buff[0] = IPC::MakeHeader(0x29, 1, 0); 648 cmd_buff[0] = IPC::MakeHeader(0x29, 1, 0);
332 cmd_buff[1] = result.raw; 649 cmd_buff[1] = result.raw;
650
651 LOG_DEBUG(Service_Y2R, "called input_format=%hhu output_format=%hhu rotation=%hhu block_alignment=%hhu "
652 "input_line_width=%hu input_lines=%hu standard_coefficient=%hhu reserved=%hhu alpha=%hX",
653 params->input_format, params->output_format, params->rotation, params->block_alignment,
654 params->input_line_width, params->input_lines, params->standard_coefficient, params->padding, params->alpha);
333} 655}
334 656
335static void PingProcess(Service::Interface* self) { 657static void PingProcess(Service::Interface* self) {
336 u32* cmd_buff = Kernel::GetCommandBuffer(); 658 u32* cmd_buff = Kernel::GetCommandBuffer();
337 659
660 cmd_buff[0] = IPC::MakeHeader(0x2A, 2, 0);
338 cmd_buff[1] = RESULT_SUCCESS.raw; 661 cmd_buff[1] = RESULT_SUCCESS.raw;
339 cmd_buff[2] = 0; 662 cmd_buff[2] = 0;
663
340 LOG_WARNING(Service_Y2R, "(STUBBED) called"); 664 LOG_WARNING(Service_Y2R, "(STUBBED) called");
341} 665}
342 666
@@ -362,6 +686,7 @@ static void DriverInitialize(Service::Interface* self) {
362 686
363 cmd_buff[0] = IPC::MakeHeader(0x2B, 1, 0); 687 cmd_buff[0] = IPC::MakeHeader(0x2B, 1, 0);
364 cmd_buff[1] = RESULT_SUCCESS.raw; 688 cmd_buff[1] = RESULT_SUCCESS.raw;
689
365 LOG_DEBUG(Service_Y2R, "called"); 690 LOG_DEBUG(Service_Y2R, "called");
366} 691}
367 692
@@ -370,54 +695,67 @@ static void DriverFinalize(Service::Interface* self) {
370 695
371 cmd_buff[0] = IPC::MakeHeader(0x2C, 1, 0); 696 cmd_buff[0] = IPC::MakeHeader(0x2C, 1, 0);
372 cmd_buff[1] = RESULT_SUCCESS.raw; 697 cmd_buff[1] = RESULT_SUCCESS.raw;
698
699 LOG_DEBUG(Service_Y2R, "called");
700}
701
702
703static void GetPackageParameter(Service::Interface* self) {
704 u32* cmd_buff = Kernel::GetCommandBuffer();
705
706 cmd_buff[0] = IPC::MakeHeader(0x2D, 4, 0);
707 cmd_buff[1] = RESULT_SUCCESS.raw;
708 std::memcpy(&cmd_buff[2], &conversion, sizeof(ConversionParameters));
709
373 LOG_DEBUG(Service_Y2R, "called"); 710 LOG_DEBUG(Service_Y2R, "called");
374} 711}
375 712
376const Interface::FunctionInfo FunctionTable[] = { 713const Interface::FunctionInfo FunctionTable[] = {
377 {0x00010040, SetInputFormat, "SetInputFormat"}, 714 {0x00010040, SetInputFormat, "SetInputFormat"},
378 {0x00020000, nullptr, "GetInputFormat"}, 715 {0x00020000, GetInputFormat, "GetInputFormat"},
379 {0x00030040, SetOutputFormat, "SetOutputFormat"}, 716 {0x00030040, SetOutputFormat, "SetOutputFormat"},
380 {0x00040000, nullptr, "GetOutputFormat"}, 717 {0x00040000, GetOutputFormat, "GetOutputFormat"},
381 {0x00050040, SetRotation, "SetRotation"}, 718 {0x00050040, SetRotation, "SetRotation"},
382 {0x00060000, nullptr, "GetRotation"}, 719 {0x00060000, GetRotation, "GetRotation"},
383 {0x00070040, SetBlockAlignment, "SetBlockAlignment"}, 720 {0x00070040, SetBlockAlignment, "SetBlockAlignment"},
384 {0x00080000, nullptr, "GetBlockAlignment"}, 721 {0x00080000, GetBlockAlignment, "GetBlockAlignment"},
385 {0x00090040, nullptr, "SetSpacialDithering"}, 722 {0x00090040, SetSpacialDithering, "SetSpacialDithering"},
386 {0x000A0000, nullptr, "GetSpacialDithering"}, 723 {0x000A0000, GetSpacialDithering, "GetSpacialDithering"},
387 {0x000B0040, nullptr, "SetTemporalDithering"}, 724 {0x000B0040, SetTemporalDithering, "SetTemporalDithering"},
388 {0x000C0000, nullptr, "GetTemporalDithering"}, 725 {0x000C0000, GetTemporalDithering, "GetTemporalDithering"},
389 {0x000D0040, SetTransferEndInterrupt, "SetTransferEndInterrupt"}, 726 {0x000D0040, SetTransferEndInterrupt, "SetTransferEndInterrupt"},
727 {0x000E0000, GetTransferEndInterrupt, "GetTransferEndInterrupt"},
390 {0x000F0000, GetTransferEndEvent, "GetTransferEndEvent"}, 728 {0x000F0000, GetTransferEndEvent, "GetTransferEndEvent"},
391 {0x00100102, SetSendingY, "SetSendingY"}, 729 {0x00100102, SetSendingY, "SetSendingY"},
392 {0x00110102, SetSendingU, "SetSendingU"}, 730 {0x00110102, SetSendingU, "SetSendingU"},
393 {0x00120102, SetSendingV, "SetSendingV"}, 731 {0x00120102, SetSendingV, "SetSendingV"},
394 {0x00130102, SetSendingYUYV, "SetSendingYUYV"}, 732 {0x00130102, SetSendingYUYV, "SetSendingYUYV"},
395 {0x00140000, nullptr, "IsFinishedSendingYuv"}, 733 {0x00140000, IsFinishedSendingYuv, "IsFinishedSendingYuv"},
396 {0x00150000, nullptr, "IsFinishedSendingY"}, 734 {0x00150000, IsFinishedSendingY, "IsFinishedSendingY"},
397 {0x00160000, nullptr, "IsFinishedSendingU"}, 735 {0x00160000, IsFinishedSendingU, "IsFinishedSendingU"},
398 {0x00170000, nullptr, "IsFinishedSendingV"}, 736 {0x00170000, IsFinishedSendingV, "IsFinishedSendingV"},
399 {0x00180102, SetReceiving, "SetReceiving"}, 737 {0x00180102, SetReceiving, "SetReceiving"},
400 {0x00190000, nullptr, "IsFinishedReceiving"}, 738 {0x00190000, IsFinishedReceiving, "IsFinishedReceiving"},
401 {0x001A0040, SetInputLineWidth, "SetInputLineWidth"}, 739 {0x001A0040, SetInputLineWidth, "SetInputLineWidth"},
402 {0x001B0000, nullptr, "GetInputLineWidth"}, 740 {0x001B0000, GetInputLineWidth, "GetInputLineWidth"},
403 {0x001C0040, SetInputLines, "SetInputLines"}, 741 {0x001C0040, SetInputLines, "SetInputLines"},
404 {0x001D0000, nullptr, "GetInputLines"}, 742 {0x001D0000, GetInputLines, "GetInputLines"},
405 {0x001E0100, SetCoefficient, "SetCoefficient"}, 743 {0x001E0100, SetCoefficient, "SetCoefficient"},
406 {0x001F0000, nullptr, "GetCoefficient"}, 744 {0x001F0000, GetCoefficient, "GetCoefficient"},
407 {0x00200040, SetStandardCoefficient, "SetStandardCoefficient"}, 745 {0x00200040, SetStandardCoefficient, "SetStandardCoefficient"},
408 {0x00210040, nullptr, "GetStandardCoefficientParams"}, 746 {0x00210040, GetStandardCoefficient, "GetStandardCoefficient"},
409 {0x00220040, SetAlpha, "SetAlpha"}, 747 {0x00220040, SetAlpha, "SetAlpha"},
410 {0x00230000, nullptr, "GetAlpha"}, 748 {0x00230000, GetAlpha, "GetAlpha"},
411 {0x00240200, nullptr, "SetDitheringWeightParams"}, 749 {0x00240200, SetDitheringWeightParams,"SetDitheringWeightParams"},
412 {0x00250000, nullptr, "GetDitheringWeightParams"}, 750 {0x00250000, GetDitheringWeightParams,"GetDitheringWeightParams"},
413 {0x00260000, StartConversion, "StartConversion"}, 751 {0x00260000, StartConversion, "StartConversion"},
414 {0x00270000, StopConversion, "StopConversion"}, 752 {0x00270000, StopConversion, "StopConversion"},
415 {0x00280000, IsBusyConversion, "IsBusyConversion"}, 753 {0x00280000, IsBusyConversion, "IsBusyConversion"},
416 {0x002901C0, SetConversionParams, "SetConversionParams"}, 754 {0x002901C0, SetPackageParameter, "SetPackageParameter"},
417 {0x002A0000, PingProcess, "PingProcess"}, 755 {0x002A0000, PingProcess, "PingProcess"},
418 {0x002B0000, DriverInitialize, "DriverInitialize"}, 756 {0x002B0000, DriverInitialize, "DriverInitialize"},
419 {0x002C0000, DriverFinalize, "DriverFinalize"}, 757 {0x002C0000, DriverFinalize, "DriverFinalize"},
420 {0x002D0000, nullptr, "GetPackageParameter"}, 758 {0x002D0000, GetPackageParameter, "GetPackageParameter"},
421}; 759};
422 760
423//////////////////////////////////////////////////////////////////////////////////////////////////// 761////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/core/hle/service/y2r_u.h b/src/core/hle/service/y2r_u.h
index 3965a5545..95fa2fdb7 100644
--- a/src/core/hle/service/y2r_u.h
+++ b/src/core/hle/service/y2r_u.h
@@ -97,6 +97,7 @@ struct ConversionConfiguration {
97 u16 input_line_width; 97 u16 input_line_width;
98 u16 input_lines; 98 u16 input_lines;
99 CoefficientSet coefficients; 99 CoefficientSet coefficients;
100 u8 padding;
100 u16 alpha; 101 u16 alpha;
101 102
102 /// Input parameters for the Y (luma) plane 103 /// Input parameters for the Y (luma) plane
@@ -109,6 +110,25 @@ struct ConversionConfiguration {
109 ResultCode SetStandardCoefficient(StandardCoefficient standard_coefficient); 110 ResultCode SetStandardCoefficient(StandardCoefficient standard_coefficient);
110}; 111};
111 112
113struct DitheringWeightParams {
114 u16 w0_xEven_yEven;
115 u16 w0_xOdd_yEven;
116 u16 w0_xEven_yOdd;
117 u16 w0_xOdd_yOdd;
118 u16 w1_xEven_yEven;
119 u16 w1_xOdd_yEven;
120 u16 w1_xEven_yOdd;
121 u16 w1_xOdd_yOdd;
122 u16 w2_xEven_yEven;
123 u16 w2_xOdd_yEven;
124 u16 w2_xEven_yOdd;
125 u16 w2_xOdd_yOdd;
126 u16 w3_xEven_yEven;
127 u16 w3_xOdd_yEven;
128 u16 w3_xEven_yOdd;
129 u16 w3_xOdd_yOdd;
130};
131
112class Interface : public Service::Interface { 132class Interface : public Service::Interface {
113public: 133public:
114 Interface(); 134 Interface();
diff --git a/src/core/hle/svc.cpp b/src/core/hle/svc.cpp
index 1112a905e..fb2aecbf2 100644
--- a/src/core/hle/svc.cpp
+++ b/src/core/hle/svc.cpp
@@ -6,7 +6,6 @@
6 6
7#include "common/logging/log.h" 7#include "common/logging/log.h"
8#include "common/microprofile.h" 8#include "common/microprofile.h"
9#include "common/profiler.h"
10#include "common/string_util.h" 9#include "common/string_util.h"
11#include "common/symbols.h" 10#include "common/symbols.h"
12 11
@@ -1035,8 +1034,6 @@ static const FunctionDef SVC_Table[] = {
1035 {0x7D, HLE::Wrap<QueryProcessMemory>, "QueryProcessMemory"}, 1034 {0x7D, HLE::Wrap<QueryProcessMemory>, "QueryProcessMemory"},
1036}; 1035};
1037 1036
1038Common::Profiling::TimingCategory profiler_svc("SVC Calls");
1039
1040static const FunctionDef* GetSVCInfo(u32 func_num) { 1037static const FunctionDef* GetSVCInfo(u32 func_num) {
1041 if (func_num >= ARRAY_SIZE(SVC_Table)) { 1038 if (func_num >= ARRAY_SIZE(SVC_Table)) {
1042 LOG_ERROR(Kernel_SVC, "unknown svc=0x%02X", func_num); 1039 LOG_ERROR(Kernel_SVC, "unknown svc=0x%02X", func_num);
@@ -1048,7 +1045,6 @@ static const FunctionDef* GetSVCInfo(u32 func_num) {
1048MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70)); 1045MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70));
1049 1046
1050void CallSVC(u32 immediate) { 1047void CallSVC(u32 immediate) {
1051 Common::Profiling::ScopeTimer timer_svc(profiler_svc);
1052 MICROPROFILE_SCOPE(Kernel_SVC); 1048 MICROPROFILE_SCOPE(Kernel_SVC);
1053 1049
1054 const FunctionDef* info = GetSVCInfo(immediate); 1050 const FunctionDef* info = GetSVCInfo(immediate);
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index 7e2f9cdfa..2fe856293 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -115,21 +115,39 @@ inline void Write(u32 addr, const T data) {
115 u8* start = Memory::GetPhysicalPointer(config.GetStartAddress()); 115 u8* start = Memory::GetPhysicalPointer(config.GetStartAddress());
116 u8* end = Memory::GetPhysicalPointer(config.GetEndAddress()); 116 u8* end = Memory::GetPhysicalPointer(config.GetEndAddress());
117 117
118 if (config.fill_24bit) { 118 // TODO: Consider always accelerating and returning vector of
119 // fill with 24-bit values 119 // regions that the accelerated fill did not cover to
120 for (u8* ptr = start; ptr < end; ptr += 3) { 120 // reduce/eliminate the fill that the cpu has to do.
121 ptr[0] = config.value_24bit_r; 121 // This would also mean that the flush below is not needed.
122 ptr[1] = config.value_24bit_g; 122 // Fill should first flush all surfaces that touch but are
123 ptr[2] = config.value_24bit_b; 123 // not completely within the fill range.
124 // Then fill all completely covered surfaces, and return the
125 // regions that were between surfaces or within the touching
126 // ones for cpu to manually fill here.
127 if (!VideoCore::g_renderer->Rasterizer()->AccelerateFill(config)) {
128 Memory::RasterizerFlushAndInvalidateRegion(config.GetStartAddress(), config.GetEndAddress() - config.GetStartAddress());
129
130 if (config.fill_24bit) {
131 // fill with 24-bit values
132 for (u8* ptr = start; ptr < end; ptr += 3) {
133 ptr[0] = config.value_24bit_r;
134 ptr[1] = config.value_24bit_g;
135 ptr[2] = config.value_24bit_b;
136 }
137 } else if (config.fill_32bit) {
138 // fill with 32-bit values
139 if (end > start) {
140 u32 value = config.value_32bit;
141 size_t len = (end - start) / sizeof(u32);
142 for (size_t i = 0; i < len; ++i)
143 memcpy(&start[i * sizeof(u32)], &value, sizeof(u32));
144 }
145 } else {
146 // fill with 16-bit values
147 u16 value_16bit = config.value_16bit.Value();
148 for (u8* ptr = start; ptr < end; ptr += sizeof(u16))
149 memcpy(ptr, &value_16bit, sizeof(u16));
124 } 150 }
125 } else if (config.fill_32bit) {
126 // fill with 32-bit values
127 for (u32* ptr = (u32*)start; ptr < (u32*)end; ++ptr)
128 *ptr = config.value_32bit;
129 } else {
130 // fill with 16-bit values
131 for (u16* ptr = (u16*)start; ptr < (u16*)end; ++ptr)
132 *ptr = config.value_16bit;
133 } 151 }
134 152
135 LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), config.GetEndAddress()); 153 LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), config.GetEndAddress());
@@ -139,8 +157,6 @@ inline void Write(u32 addr, const T data) {
139 } else { 157 } else {
140 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC1); 158 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC1);
141 } 159 }
142
143 VideoCore::g_renderer->Rasterizer()->InvalidateRegion(config.GetStartAddress(), config.GetEndAddress() - config.GetStartAddress());
144 } 160 }
145 161
146 // Reset "trigger" flag and set the "finish" flag 162 // Reset "trigger" flag and set the "finish" flag
@@ -161,184 +177,185 @@ inline void Write(u32 addr, const T data) {
161 if (Pica::g_debug_context) 177 if (Pica::g_debug_context)
162 Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer, nullptr); 178 Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer, nullptr);
163 179
164 u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress()); 180 if (!VideoCore::g_renderer->Rasterizer()->AccelerateDisplayTransfer(config)) {
165 u8* dst_pointer = Memory::GetPhysicalPointer(config.GetPhysicalOutputAddress()); 181 u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress());
166 182 u8* dst_pointer = Memory::GetPhysicalPointer(config.GetPhysicalOutputAddress());
167 if (config.is_texture_copy) {
168 u32 input_width = config.texture_copy.input_width * 16;
169 u32 input_gap = config.texture_copy.input_gap * 16;
170 u32 output_width = config.texture_copy.output_width * 16;
171 u32 output_gap = config.texture_copy.output_gap * 16;
172
173 size_t contiguous_input_size = config.texture_copy.size / input_width * (input_width + input_gap);
174 VideoCore::g_renderer->Rasterizer()->FlushRegion(config.GetPhysicalInputAddress(), contiguous_input_size);
175
176 u32 remaining_size = config.texture_copy.size;
177 u32 remaining_input = input_width;
178 u32 remaining_output = output_width;
179 while (remaining_size > 0) {
180 u32 copy_size = std::min({ remaining_input, remaining_output, remaining_size });
181 183
182 std::memcpy(dst_pointer, src_pointer, copy_size); 184 if (config.is_texture_copy) {
183 src_pointer += copy_size; 185 u32 input_width = config.texture_copy.input_width * 16;
184 dst_pointer += copy_size; 186 u32 input_gap = config.texture_copy.input_gap * 16;
187 u32 output_width = config.texture_copy.output_width * 16;
188 u32 output_gap = config.texture_copy.output_gap * 16;
185 189
186 remaining_input -= copy_size; 190 size_t contiguous_input_size = config.texture_copy.size / input_width * (input_width + input_gap);
187 remaining_output -= copy_size; 191 Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), contiguous_input_size);
188 remaining_size -= copy_size;
189 192
190 if (remaining_input == 0) { 193 size_t contiguous_output_size = config.texture_copy.size / output_width * (output_width + output_gap);
191 remaining_input = input_width; 194 Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), contiguous_output_size);
192 src_pointer += input_gap;
193 }
194 if (remaining_output == 0) {
195 remaining_output = output_width;
196 dst_pointer += output_gap;
197 }
198 }
199 195
200 LOG_TRACE(HW_GPU, "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X", 196 u32 remaining_size = config.texture_copy.size;
201 config.texture_copy.size, 197 u32 remaining_input = input_width;
202 config.GetPhysicalInputAddress(), input_width, input_gap, 198 u32 remaining_output = output_width;
203 config.GetPhysicalOutputAddress(), output_width, output_gap, 199 while (remaining_size > 0) {
204 config.flags); 200 u32 copy_size = std::min({ remaining_input, remaining_output, remaining_size });
205 201
206 size_t contiguous_output_size = config.texture_copy.size / output_width * (output_width + output_gap); 202 std::memcpy(dst_pointer, src_pointer, copy_size);
207 VideoCore::g_renderer->Rasterizer()->InvalidateRegion(config.GetPhysicalOutputAddress(), contiguous_output_size); 203 src_pointer += copy_size;
204 dst_pointer += copy_size;
208 205
209 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); 206 remaining_input -= copy_size;
210 break; 207 remaining_output -= copy_size;
211 } 208 remaining_size -= copy_size;
212 209
213 if (config.scaling > config.ScaleXY) { 210 if (remaining_input == 0) {
214 LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", config.scaling.Value()); 211 remaining_input = input_width;
215 UNIMPLEMENTED(); 212 src_pointer += input_gap;
216 break; 213 }
217 } 214 if (remaining_output == 0) {
215 remaining_output = output_width;
216 dst_pointer += output_gap;
217 }
218 }
218 219
219 if (config.input_linear && config.scaling != config.NoScale) { 220 LOG_TRACE(HW_GPU, "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X",
220 LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input"); 221 config.texture_copy.size,
221 UNIMPLEMENTED(); 222 config.GetPhysicalInputAddress(), input_width, input_gap,
222 break; 223 config.GetPhysicalOutputAddress(), output_width, output_gap,
223 } 224 config.flags);
224 225
225 bool horizontal_scale = config.scaling != config.NoScale; 226 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF);
226 bool vertical_scale = config.scaling == config.ScaleXY; 227 break;
228 }
227 229
228 u32 output_width = config.output_width >> horizontal_scale; 230 if (config.scaling > config.ScaleXY) {
229 u32 output_height = config.output_height >> vertical_scale; 231 LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", config.scaling.Value());
232 UNIMPLEMENTED();
233 break;
234 }
230 235
231 u32 input_size = config.input_width * config.input_height * GPU::Regs::BytesPerPixel(config.input_format); 236 if (config.input_linear && config.scaling != config.NoScale) {
232 u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format); 237 LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input");
238 UNIMPLEMENTED();
239 break;
240 }
233 241
234 VideoCore::g_renderer->Rasterizer()->FlushRegion(config.GetPhysicalInputAddress(), input_size); 242 int horizontal_scale = config.scaling != config.NoScale ? 1 : 0;
243 int vertical_scale = config.scaling == config.ScaleXY ? 1 : 0;
235 244
236 for (u32 y = 0; y < output_height; ++y) { 245 u32 output_width = config.output_width >> horizontal_scale;
237 for (u32 x = 0; x < output_width; ++x) { 246 u32 output_height = config.output_height >> vertical_scale;
238 Math::Vec4<u8> src_color;
239 247
240 // Calculate the [x,y] position of the input image 248 u32 input_size = config.input_width * config.input_height * GPU::Regs::BytesPerPixel(config.input_format);
241 // based on the current output position and the scale 249 u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format);
242 u32 input_x = x << horizontal_scale;
243 u32 input_y = y << vertical_scale;
244 250
245 if (config.flip_vertically) { 251 Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), input_size);
246 // Flip the y value of the output data, 252 Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), output_size);
247 // we do this after calculating the [x,y] position of the input image
248 // to account for the scaling options.
249 y = output_height - y - 1;
250 }
251 253
252 u32 dst_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.output_format); 254 for (u32 y = 0; y < output_height; ++y) {
253 u32 src_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.input_format); 255 for (u32 x = 0; x < output_width; ++x) {
254 u32 src_offset; 256 Math::Vec4<u8> src_color;
255 u32 dst_offset;
256 257
257 if (config.input_linear) { 258 // Calculate the [x,y] position of the input image
258 if (!config.dont_swizzle) { 259 // based on the current output position and the scale
259 // Interpret the input as linear and the output as tiled 260 u32 input_x = x << horizontal_scale;
260 u32 coarse_y = y & ~7; 261 u32 input_y = y << vertical_scale;
261 u32 stride = output_width * dst_bytes_per_pixel;
262 262
263 src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel; 263 if (config.flip_vertically) {
264 dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + coarse_y * stride; 264 // Flip the y value of the output data,
265 } else { 265 // we do this after calculating the [x,y] position of the input image
266 // Both input and output are linear 266 // to account for the scaling options.
267 src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel; 267 y = output_height - y - 1;
268 dst_offset = (x + y * output_width) * dst_bytes_per_pixel;
269 } 268 }
270 } else {
271 if (!config.dont_swizzle) {
272 // Interpret the input as tiled and the output as linear
273 u32 coarse_y = input_y & ~7;
274 u32 stride = config.input_width * src_bytes_per_pixel;
275 269
276 src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + coarse_y * stride; 270 u32 dst_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.output_format);
277 dst_offset = (x + y * output_width) * dst_bytes_per_pixel; 271 u32 src_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.input_format);
272 u32 src_offset;
273 u32 dst_offset;
274
275 if (config.input_linear) {
276 if (!config.dont_swizzle) {
277 // Interpret the input as linear and the output as tiled
278 u32 coarse_y = y & ~7;
279 u32 stride = output_width * dst_bytes_per_pixel;
280
281 src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel;
282 dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + coarse_y * stride;
283 } else {
284 // Both input and output are linear
285 src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel;
286 dst_offset = (x + y * output_width) * dst_bytes_per_pixel;
287 }
278 } else { 288 } else {
279 // Both input and output are tiled 289 if (!config.dont_swizzle) {
280 u32 out_coarse_y = y & ~7; 290 // Interpret the input as tiled and the output as linear
281 u32 out_stride = output_width * dst_bytes_per_pixel; 291 u32 coarse_y = input_y & ~7;
282 292 u32 stride = config.input_width * src_bytes_per_pixel;
283 u32 in_coarse_y = input_y & ~7; 293
284 u32 in_stride = config.input_width * src_bytes_per_pixel; 294 src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + coarse_y * stride;
285 295 dst_offset = (x + y * output_width) * dst_bytes_per_pixel;
286 src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + in_coarse_y * in_stride; 296 } else {
287 dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + out_coarse_y * out_stride; 297 // Both input and output are tiled
298 u32 out_coarse_y = y & ~7;
299 u32 out_stride = output_width * dst_bytes_per_pixel;
300
301 u32 in_coarse_y = input_y & ~7;
302 u32 in_stride = config.input_width * src_bytes_per_pixel;
303
304 src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + in_coarse_y * in_stride;
305 dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + out_coarse_y * out_stride;
306 }
288 } 307 }
289 }
290 308
291 const u8* src_pixel = src_pointer + src_offset; 309 const u8* src_pixel = src_pointer + src_offset;
292 src_color = DecodePixel(config.input_format, src_pixel); 310 src_color = DecodePixel(config.input_format, src_pixel);
293 if (config.scaling == config.ScaleX) { 311 if (config.scaling == config.ScaleX) {
294 Math::Vec4<u8> pixel = DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel); 312 Math::Vec4<u8> pixel = DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel);
295 src_color = ((src_color + pixel) / 2).Cast<u8>(); 313 src_color = ((src_color + pixel) / 2).Cast<u8>();
296 } else if (config.scaling == config.ScaleXY) { 314 } else if (config.scaling == config.ScaleXY) {
297 Math::Vec4<u8> pixel1 = DecodePixel(config.input_format, src_pixel + 1 * src_bytes_per_pixel); 315 Math::Vec4<u8> pixel1 = DecodePixel(config.input_format, src_pixel + 1 * src_bytes_per_pixel);
298 Math::Vec4<u8> pixel2 = DecodePixel(config.input_format, src_pixel + 2 * src_bytes_per_pixel); 316 Math::Vec4<u8> pixel2 = DecodePixel(config.input_format, src_pixel + 2 * src_bytes_per_pixel);
299 Math::Vec4<u8> pixel3 = DecodePixel(config.input_format, src_pixel + 3 * src_bytes_per_pixel); 317 Math::Vec4<u8> pixel3 = DecodePixel(config.input_format, src_pixel + 3 * src_bytes_per_pixel);
300 src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast<u8>(); 318 src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast<u8>();
301 } 319 }
302 320
303 u8* dst_pixel = dst_pointer + dst_offset; 321 u8* dst_pixel = dst_pointer + dst_offset;
304 switch (config.output_format) { 322 switch (config.output_format) {
305 case Regs::PixelFormat::RGBA8: 323 case Regs::PixelFormat::RGBA8:
306 Color::EncodeRGBA8(src_color, dst_pixel); 324 Color::EncodeRGBA8(src_color, dst_pixel);
307 break; 325 break;
308 326
309 case Regs::PixelFormat::RGB8: 327 case Regs::PixelFormat::RGB8:
310 Color::EncodeRGB8(src_color, dst_pixel); 328 Color::EncodeRGB8(src_color, dst_pixel);
311 break; 329 break;
312 330
313 case Regs::PixelFormat::RGB565: 331 case Regs::PixelFormat::RGB565:
314 Color::EncodeRGB565(src_color, dst_pixel); 332 Color::EncodeRGB565(src_color, dst_pixel);
315 break; 333 break;
316 334
317 case Regs::PixelFormat::RGB5A1: 335 case Regs::PixelFormat::RGB5A1:
318 Color::EncodeRGB5A1(src_color, dst_pixel); 336 Color::EncodeRGB5A1(src_color, dst_pixel);
319 break; 337 break;
320 338
321 case Regs::PixelFormat::RGBA4: 339 case Regs::PixelFormat::RGBA4:
322 Color::EncodeRGBA4(src_color, dst_pixel); 340 Color::EncodeRGBA4(src_color, dst_pixel);
323 break; 341 break;
324 342
325 default: 343 default:
326 LOG_ERROR(HW_GPU, "Unknown destination framebuffer format %x", config.output_format.Value()); 344 LOG_ERROR(HW_GPU, "Unknown destination framebuffer format %x", config.output_format.Value());
327 break; 345 break;
346 }
328 } 347 }
329 } 348 }
330 }
331 349
332 LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), dst format %x, flags 0x%08X", 350 LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), dst format %x, flags 0x%08X",
333 config.output_height * output_width * GPU::Regs::BytesPerPixel(config.output_format), 351 config.output_height * output_width * GPU::Regs::BytesPerPixel(config.output_format),
334 config.GetPhysicalInputAddress(), config.input_width.Value(), config.input_height.Value(), 352 config.GetPhysicalInputAddress(), config.input_width.Value(), config.input_height.Value(),
335 config.GetPhysicalOutputAddress(), output_width, output_height, 353 config.GetPhysicalOutputAddress(), output_width, output_height,
336 config.output_format.Value(), config.flags); 354 config.output_format.Value(), config.flags);
355 }
337 356
338 g_regs.display_transfer_config.trigger = 0; 357 g_regs.display_transfer_config.trigger = 0;
339 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); 358 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF);
340
341 VideoCore::g_renderer->Rasterizer()->InvalidateRegion(config.GetPhysicalOutputAddress(), output_size);
342 } 359 }
343 break; 360 break;
344 } 361 }
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h
index a00adbf53..da4c345b4 100644
--- a/src/core/hw/gpu.h
+++ b/src/core/hw/gpu.h
@@ -78,7 +78,7 @@ struct Regs {
78 78
79 INSERT_PADDING_WORDS(0x4); 79 INSERT_PADDING_WORDS(0x4);
80 80
81 struct { 81 struct MemoryFillConfig {
82 u32 address_start; 82 u32 address_start;
83 u32 address_end; 83 u32 address_end;
84 84
@@ -165,7 +165,7 @@ struct Regs {
165 165
166 INSERT_PADDING_WORDS(0x169); 166 INSERT_PADDING_WORDS(0x169);
167 167
168 struct { 168 struct DisplayTransferConfig {
169 u32 input_address; 169 u32 input_address;
170 u32 output_address; 170 u32 output_address;
171 171
diff --git a/src/core/loader/ncch.cpp b/src/core/loader/ncch.cpp
index a4b47ef8c..066e91a9e 100644
--- a/src/core/loader/ncch.cpp
+++ b/src/core/loader/ncch.cpp
@@ -255,7 +255,7 @@ ResultStatus AppLoader_NCCH::Load() {
255 resource_limit_category = exheader_header.arm11_system_local_caps.resource_limit_category; 255 resource_limit_category = exheader_header.arm11_system_local_caps.resource_limit_category;
256 256
257 LOG_INFO(Loader, "Name: %s" , exheader_header.codeset_info.name); 257 LOG_INFO(Loader, "Name: %s" , exheader_header.codeset_info.name);
258 LOG_INFO(Loader, "Program ID: %016X" , ncch_header.program_id); 258 LOG_INFO(Loader, "Program ID: %016llX" , ncch_header.program_id);
259 LOG_DEBUG(Loader, "Code compressed: %s" , is_compressed ? "yes" : "no"); 259 LOG_DEBUG(Loader, "Code compressed: %s" , is_compressed ? "yes" : "no");
260 LOG_DEBUG(Loader, "Entry point: 0x%08X", entry_point); 260 LOG_DEBUG(Loader, "Entry point: 0x%08X", entry_point);
261 LOG_DEBUG(Loader, "Code size: 0x%08X", code_size); 261 LOG_DEBUG(Loader, "Code size: 0x%08X", code_size);
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 7de5bd15d..ee9b69f81 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -15,6 +15,9 @@
15#include "core/memory_setup.h" 15#include "core/memory_setup.h"
16#include "core/mmio.h" 16#include "core/mmio.h"
17 17
18#include "video_core/renderer_base.h"
19#include "video_core/video_core.h"
20
18namespace Memory { 21namespace Memory {
19 22
20enum class PageType { 23enum class PageType {
@@ -22,8 +25,12 @@ enum class PageType {
22 Unmapped, 25 Unmapped,
23 /// Page is mapped to regular memory. This is the only type you can get pointers to. 26 /// Page is mapped to regular memory. This is the only type you can get pointers to.
24 Memory, 27 Memory,
28 /// Page is mapped to regular memory, but also needs to check for rasterizer cache flushing and invalidation
29 RasterizerCachedMemory,
25 /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions. 30 /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions.
26 Special, 31 Special,
32 /// Page is mapped to a I/O region, but also needs to check for rasterizer cache flushing and invalidation
33 RasterizerCachedSpecial,
27}; 34};
28 35
29struct SpecialRegion { 36struct SpecialRegion {
@@ -57,6 +64,12 @@ struct PageTable {
57 * the corresponding entry in `pointers` MUST be set to null. 64 * the corresponding entry in `pointers` MUST be set to null.
58 */ 65 */
59 std::array<PageType, NUM_ENTRIES> attributes; 66 std::array<PageType, NUM_ENTRIES> attributes;
67
68 /**
69 * Indicates the number of externally cached resources touching a page that should be
70 * flushed before the memory is accessed
71 */
72 std::array<u8, NUM_ENTRIES> cached_res_count;
60}; 73};
61 74
62/// Singular page table used for the singleton process 75/// Singular page table used for the singleton process
@@ -72,8 +85,15 @@ static void MapPages(u32 base, u32 size, u8* memory, PageType type) {
72 while (base != end) { 85 while (base != end) {
73 ASSERT_MSG(base < PageTable::NUM_ENTRIES, "out of range mapping at %08X", base); 86 ASSERT_MSG(base < PageTable::NUM_ENTRIES, "out of range mapping at %08X", base);
74 87
88 // Since pages are unmapped on shutdown after video core is shutdown, the renderer may be null here
89 if (current_page_table->attributes[base] == PageType::RasterizerCachedMemory ||
90 current_page_table->attributes[base] == PageType::RasterizerCachedSpecial) {
91 RasterizerFlushAndInvalidateRegion(VirtualToPhysicalAddress(base << PAGE_BITS), PAGE_SIZE);
92 }
93
75 current_page_table->attributes[base] = type; 94 current_page_table->attributes[base] = type;
76 current_page_table->pointers[base] = memory; 95 current_page_table->pointers[base] = memory;
96 current_page_table->cached_res_count[base] = 0;
77 97
78 base += 1; 98 base += 1;
79 if (memory != nullptr) 99 if (memory != nullptr)
@@ -84,6 +104,7 @@ static void MapPages(u32 base, u32 size, u8* memory, PageType type) {
84void InitMemoryMap() { 104void InitMemoryMap() {
85 main_page_table.pointers.fill(nullptr); 105 main_page_table.pointers.fill(nullptr);
86 main_page_table.attributes.fill(PageType::Unmapped); 106 main_page_table.attributes.fill(PageType::Unmapped);
107 main_page_table.cached_res_count.fill(0);
87} 108}
88 109
89void MapMemoryRegion(VAddr base, u32 size, u8* target) { 110void MapMemoryRegion(VAddr base, u32 size, u8* target) {
@@ -107,6 +128,28 @@ void UnmapRegion(VAddr base, u32 size) {
107} 128}
108 129
109/** 130/**
131 * Gets a pointer to the exact memory at the virtual address (i.e. not page aligned)
132 * using a VMA from the current process
133 */
134static u8* GetPointerFromVMA(VAddr vaddr) {
135 u8* direct_pointer = nullptr;
136
137 auto& vma = Kernel::g_current_process->vm_manager.FindVMA(vaddr)->second;
138 switch (vma.type) {
139 case Kernel::VMAType::AllocatedMemoryBlock:
140 direct_pointer = vma.backing_block->data() + vma.offset;
141 break;
142 case Kernel::VMAType::BackingMemory:
143 direct_pointer = vma.backing_memory;
144 break;
145 default:
146 UNREACHABLE();
147 }
148
149 return direct_pointer + (vaddr - vma.base);
150}
151
152/**
110 * This function should only be called for virtual addreses with attribute `PageType::Special`. 153 * This function should only be called for virtual addreses with attribute `PageType::Special`.
111 */ 154 */
112static MMIORegionPointer GetMMIOHandler(VAddr vaddr) { 155static MMIORegionPointer GetMMIOHandler(VAddr vaddr) {
@@ -126,6 +169,7 @@ template <typename T>
126T Read(const VAddr vaddr) { 169T Read(const VAddr vaddr) {
127 const u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; 170 const u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS];
128 if (page_pointer) { 171 if (page_pointer) {
172 // NOTE: Avoid adding any extra logic to this fast-path block
129 T value; 173 T value;
130 std::memcpy(&value, &page_pointer[vaddr & PAGE_MASK], sizeof(T)); 174 std::memcpy(&value, &page_pointer[vaddr & PAGE_MASK], sizeof(T));
131 return value; 175 return value;
@@ -139,8 +183,22 @@ T Read(const VAddr vaddr) {
139 case PageType::Memory: 183 case PageType::Memory:
140 ASSERT_MSG(false, "Mapped memory page without a pointer @ %08X", vaddr); 184 ASSERT_MSG(false, "Mapped memory page without a pointer @ %08X", vaddr);
141 break; 185 break;
186 case PageType::RasterizerCachedMemory:
187 {
188 RasterizerFlushRegion(VirtualToPhysicalAddress(vaddr), sizeof(T));
189
190 T value;
191 std::memcpy(&value, GetPointerFromVMA(vaddr), sizeof(T));
192 return value;
193 }
142 case PageType::Special: 194 case PageType::Special:
143 return ReadMMIO<T>(GetMMIOHandler(vaddr), vaddr); 195 return ReadMMIO<T>(GetMMIOHandler(vaddr), vaddr);
196 case PageType::RasterizerCachedSpecial:
197 {
198 RasterizerFlushRegion(VirtualToPhysicalAddress(vaddr), sizeof(T));
199
200 return ReadMMIO<T>(GetMMIOHandler(vaddr), vaddr);
201 }
144 default: 202 default:
145 UNREACHABLE(); 203 UNREACHABLE();
146 } 204 }
@@ -153,6 +211,7 @@ template <typename T>
153void Write(const VAddr vaddr, const T data) { 211void Write(const VAddr vaddr, const T data) {
154 u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; 212 u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS];
155 if (page_pointer) { 213 if (page_pointer) {
214 // NOTE: Avoid adding any extra logic to this fast-path block
156 std::memcpy(&page_pointer[vaddr & PAGE_MASK], &data, sizeof(T)); 215 std::memcpy(&page_pointer[vaddr & PAGE_MASK], &data, sizeof(T));
157 return; 216 return;
158 } 217 }
@@ -165,9 +224,23 @@ void Write(const VAddr vaddr, const T data) {
165 case PageType::Memory: 224 case PageType::Memory:
166 ASSERT_MSG(false, "Mapped memory page without a pointer @ %08X", vaddr); 225 ASSERT_MSG(false, "Mapped memory page without a pointer @ %08X", vaddr);
167 break; 226 break;
227 case PageType::RasterizerCachedMemory:
228 {
229 RasterizerFlushAndInvalidateRegion(VirtualToPhysicalAddress(vaddr), sizeof(T));
230
231 std::memcpy(GetPointerFromVMA(vaddr), &data, sizeof(T));
232 break;
233 }
168 case PageType::Special: 234 case PageType::Special:
169 WriteMMIO<T>(GetMMIOHandler(vaddr), vaddr, data); 235 WriteMMIO<T>(GetMMIOHandler(vaddr), vaddr, data);
170 break; 236 break;
237 case PageType::RasterizerCachedSpecial:
238 {
239 RasterizerFlushAndInvalidateRegion(VirtualToPhysicalAddress(vaddr), sizeof(T));
240
241 WriteMMIO<T>(GetMMIOHandler(vaddr), vaddr, data);
242 break;
243 }
171 default: 244 default:
172 UNREACHABLE(); 245 UNREACHABLE();
173 } 246 }
@@ -179,6 +252,10 @@ u8* GetPointer(const VAddr vaddr) {
179 return page_pointer + (vaddr & PAGE_MASK); 252 return page_pointer + (vaddr & PAGE_MASK);
180 } 253 }
181 254
255 if (current_page_table->attributes[vaddr >> PAGE_BITS] == PageType::RasterizerCachedMemory) {
256 return GetPointerFromVMA(vaddr);
257 }
258
182 LOG_ERROR(HW_Memory, "unknown GetPointer @ 0x%08x", vaddr); 259 LOG_ERROR(HW_Memory, "unknown GetPointer @ 0x%08x", vaddr);
183 return nullptr; 260 return nullptr;
184} 261}
@@ -187,6 +264,69 @@ u8* GetPhysicalPointer(PAddr address) {
187 return GetPointer(PhysicalToVirtualAddress(address)); 264 return GetPointer(PhysicalToVirtualAddress(address));
188} 265}
189 266
267void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) {
268 if (start == 0) {
269 return;
270 }
271
272 u32 num_pages = ((start + size - 1) >> PAGE_BITS) - (start >> PAGE_BITS) + 1;
273 PAddr paddr = start;
274
275 for (unsigned i = 0; i < num_pages; ++i) {
276 VAddr vaddr = PhysicalToVirtualAddress(paddr);
277 u8& res_count = current_page_table->cached_res_count[vaddr >> PAGE_BITS];
278 ASSERT_MSG(count_delta <= UINT8_MAX - res_count, "Rasterizer resource cache counter overflow!");
279 ASSERT_MSG(count_delta >= -res_count, "Rasterizer resource cache counter underflow!");
280
281 // Switch page type to cached if now cached
282 if (res_count == 0) {
283 PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
284 switch (page_type) {
285 case PageType::Memory:
286 page_type = PageType::RasterizerCachedMemory;
287 current_page_table->pointers[vaddr >> PAGE_BITS] = nullptr;
288 break;
289 case PageType::Special:
290 page_type = PageType::RasterizerCachedSpecial;
291 break;
292 default:
293 UNREACHABLE();
294 }
295 }
296
297 res_count += count_delta;
298
299 // Switch page type to uncached if now uncached
300 if (res_count == 0) {
301 PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
302 switch (page_type) {
303 case PageType::RasterizerCachedMemory:
304 page_type = PageType::Memory;
305 current_page_table->pointers[vaddr >> PAGE_BITS] = GetPointerFromVMA(vaddr & ~PAGE_MASK);
306 break;
307 case PageType::RasterizerCachedSpecial:
308 page_type = PageType::Special;
309 break;
310 default:
311 UNREACHABLE();
312 }
313 }
314 paddr += PAGE_SIZE;
315 }
316}
317
318void RasterizerFlushRegion(PAddr start, u32 size) {
319 if (VideoCore::g_renderer != nullptr) {
320 VideoCore::g_renderer->Rasterizer()->FlushRegion(start, size);
321 }
322}
323
324void RasterizerFlushAndInvalidateRegion(PAddr start, u32 size) {
325 if (VideoCore::g_renderer != nullptr) {
326 VideoCore::g_renderer->Rasterizer()->FlushAndInvalidateRegion(start, size);
327 }
328}
329
190u8 Read8(const VAddr addr) { 330u8 Read8(const VAddr addr) {
191 return Read<u8>(addr); 331 return Read<u8>(addr);
192} 332}
diff --git a/src/core/memory.h b/src/core/memory.h
index 5af72b7a7..9caa3c3f5 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -148,4 +148,20 @@ VAddr PhysicalToVirtualAddress(PAddr addr);
148 */ 148 */
149u8* GetPhysicalPointer(PAddr address); 149u8* GetPhysicalPointer(PAddr address);
150 150
151/**
152 * Adds the supplied value to the rasterizer resource cache counter of each
153 * page touching the region.
154 */
155void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta);
156
157/**
158 * Flushes any externally cached rasterizer resources touching the given region.
159 */
160void RasterizerFlushRegion(PAddr start, u32 size);
161
162/**
163 * Flushes and invalidates any externally cached rasterizer resources touching the given region.
164 */
165void RasterizerFlushAndInvalidateRegion(PAddr start, u32 size);
166
151} 167}
diff --git a/src/core/settings.cpp b/src/core/settings.cpp
index 1aa26fbd2..eaf5c8461 100644
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -19,7 +19,7 @@ void Apply() {
19 19
20 VideoCore::g_hw_renderer_enabled = values.use_hw_renderer; 20 VideoCore::g_hw_renderer_enabled = values.use_hw_renderer;
21 VideoCore::g_shader_jit_enabled = values.use_shader_jit; 21 VideoCore::g_shader_jit_enabled = values.use_shader_jit;
22 22 VideoCore::g_scaled_resolution_enabled = values.use_scaled_resolution;
23} 23}
24 24
25} // namespace 25} // namespace
diff --git a/src/core/settings.h b/src/core/settings.h
index 4933a516d..d620d8461 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -55,6 +55,7 @@ struct Values {
55 // Renderer 55 // Renderer
56 bool use_hw_renderer; 56 bool use_hw_renderer;
57 bool use_shader_jit; 57 bool use_shader_jit;
58 bool use_scaled_resolution;
58 59
59 float bg_red; 60 float bg_red;
60 float bg_green; 61 float bg_green;
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 76cfd4f7d..de4082b1f 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -16,6 +16,7 @@ set(SRCS
16 shader/shader_interpreter.cpp 16 shader/shader_interpreter.cpp
17 swrasterizer.cpp 17 swrasterizer.cpp
18 utils.cpp 18 utils.cpp
19 vertex_loader.cpp
19 video_core.cpp 20 video_core.cpp
20 ) 21 )
21 22
@@ -43,6 +44,7 @@ set(HEADERS
43 shader/shader_interpreter.h 44 shader/shader_interpreter.h
44 swrasterizer.h 45 swrasterizer.h
45 utils.h 46 utils.h
47 vertex_loader.h
46 video_core.h 48 video_core.h
47 ) 49 )
48 50
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 3abe79c09..58883e374 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -7,7 +7,6 @@
7 7
8#include "common/alignment.h" 8#include "common/alignment.h"
9#include "common/microprofile.h" 9#include "common/microprofile.h"
10#include "common/profiler.h"
11 10
12#include "core/settings.h" 11#include "core/settings.h"
13#include "core/hle/service/gsp_gpu.h" 12#include "core/hle/service/gsp_gpu.h"
@@ -22,6 +21,7 @@
22#include "video_core/video_core.h" 21#include "video_core/video_core.h"
23#include "video_core/debug_utils/debug_utils.h" 22#include "video_core/debug_utils/debug_utils.h"
24#include "video_core/shader/shader_interpreter.h" 23#include "video_core/shader/shader_interpreter.h"
24#include "video_core/vertex_loader.h"
25 25
26namespace Pica { 26namespace Pica {
27 27
@@ -35,8 +35,6 @@ static int default_attr_counter = 0;
35 35
36static u32 default_attr_write_buffer[3]; 36static u32 default_attr_write_buffer[3];
37 37
38Common::Profiling::TimingCategory category_drawing("Drawing");
39
40// Expand a 4-bit mask to 4-byte mask, e.g. 0b0101 -> 0x00FF00FF 38// Expand a 4-bit mask to 4-byte mask, e.g. 0b0101 -> 0x00FF00FF
41static const u32 expand_bits_to_bytes[] = { 39static const u32 expand_bits_to_bytes[] = {
42 0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff, 40 0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff,
@@ -186,60 +184,19 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
186 case PICA_REG_INDEX(trigger_draw): 184 case PICA_REG_INDEX(trigger_draw):
187 case PICA_REG_INDEX(trigger_draw_indexed): 185 case PICA_REG_INDEX(trigger_draw_indexed):
188 { 186 {
189 Common::Profiling::ScopeTimer scope_timer(category_drawing);
190 MICROPROFILE_SCOPE(GPU_Drawing); 187 MICROPROFILE_SCOPE(GPU_Drawing);
191 188
192#if PICA_LOG_TEV 189#if PICA_LOG_TEV
193 DebugUtils::DumpTevStageConfig(regs.GetTevStages()); 190 DebugUtils::DumpTevStageConfig(regs.GetTevStages());
194#endif 191#endif
195
196 if (g_debug_context) 192 if (g_debug_context)
197 g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr); 193 g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr);
198 194
199 const auto& attribute_config = regs.vertex_attributes; 195 // Processes information about internal vertex attributes to figure out how a vertex is loaded.
200 const u32 base_address = attribute_config.GetPhysicalBaseAddress(); 196 // Later, these can be compiled and cached.
201 197 VertexLoader loader;
202 // Information about internal vertex attributes 198 const u32 base_address = regs.vertex_attributes.GetPhysicalBaseAddress();
203 u32 vertex_attribute_sources[16]; 199 loader.Setup(regs);
204 boost::fill(vertex_attribute_sources, 0xdeadbeef);
205 u32 vertex_attribute_strides[16] = {};
206 Regs::VertexAttributeFormat vertex_attribute_formats[16] = {};
207
208 u32 vertex_attribute_elements[16] = {};
209 u32 vertex_attribute_element_size[16] = {};
210
211 // Setup attribute data from loaders
212 for (int loader = 0; loader < 12; ++loader) {
213 const auto& loader_config = attribute_config.attribute_loaders[loader];
214
215 u32 offset = 0;
216
217 // TODO: What happens if a loader overwrites a previous one's data?
218 for (unsigned component = 0; component < loader_config.component_count; ++component) {
219 if (component >= 12) {
220 LOG_ERROR(HW_GPU, "Overflow in the vertex attribute loader %u trying to load component %u", loader, component);
221 continue;
222 }
223
224 u32 attribute_index = loader_config.GetComponent(component);
225 if (attribute_index < 12) {
226 int element_size = attribute_config.GetElementSizeInBytes(attribute_index);
227 offset = Common::AlignUp(offset, element_size);
228 vertex_attribute_sources[attribute_index] = base_address + loader_config.data_offset + offset;
229 vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count);
230 vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index);
231 vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index);
232 vertex_attribute_element_size[attribute_index] = element_size;
233 offset += attribute_config.GetStride(attribute_index);
234 } else if (attribute_index < 16) {
235 // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively
236 offset = Common::AlignUp(offset, 4);
237 offset += (attribute_index - 11) * 4;
238 } else {
239 UNREACHABLE(); // This is truly unreachable due to the number of bits for each component
240 }
241 }
242 }
243 200
244 // Load vertices 201 // Load vertices
245 bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed)); 202 bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed));
@@ -263,32 +220,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
263 } 220 }
264 } 221 }
265 222
266 class { 223 DebugUtils::MemoryAccessTracker memory_accesses;
267 /// Combine overlapping and close ranges
268 void SimplifyRanges() {
269 for (auto it = ranges.begin(); it != ranges.end(); ++it) {
270 // NOTE: We add 32 to the range end address to make sure "close" ranges are combined, too
271 auto it2 = std::next(it);
272 while (it2 != ranges.end() && it->first + it->second + 32 >= it2->first) {
273 it->second = std::max(it->second, it2->first + it2->second - it->first);
274 it2 = ranges.erase(it2);
275 }
276 }
277 }
278
279 public:
280 /// Record a particular memory access in the list
281 void AddAccess(u32 paddr, u32 size) {
282 // Create new range or extend existing one
283 ranges[paddr] = std::max(ranges[paddr], size);
284
285 // Simplify ranges...
286 SimplifyRanges();
287 }
288
289 /// Map of accessed ranges (mapping start address to range size)
290 std::map<u32, u32> ranges;
291 } memory_accesses;
292 224
293 // Simple circular-replacement vertex cache 225 // Simple circular-replacement vertex cache
294 // The size has been tuned for optimal balance between hit-rate and the cost of lookup 226 // The size has been tuned for optimal balance between hit-rate and the cost of lookup
@@ -332,60 +264,13 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
332 if (!vertex_cache_hit) { 264 if (!vertex_cache_hit) {
333 // Initialize data for the current vertex 265 // Initialize data for the current vertex
334 Shader::InputVertex input; 266 Shader::InputVertex input;
335 267 loader.LoadVertex(base_address, index, vertex, input, memory_accesses);
336 for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) {
337 if (vertex_attribute_elements[i] != 0) {
338 // Default attribute values set if array elements have < 4 components. This
339 // is *not* carried over from the default attribute settings even if they're
340 // enabled for this attribute.
341 static const float24 zero = float24::FromFloat32(0.0f);
342 static const float24 one = float24::FromFloat32(1.0f);
343 input.attr[i] = Math::Vec4<float24>(zero, zero, zero, one);
344
345 // Load per-vertex data from the loader arrays
346 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
347 u32 source_addr = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i];
348 const u8* srcdata = Memory::GetPhysicalPointer(source_addr);
349
350 if (g_debug_context && Pica::g_debug_context->recorder) {
351 memory_accesses.AddAccess(source_addr,
352 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4
353 : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1);
354 }
355
356 const float srcval =
357 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE) ? *reinterpret_cast<const s8*>(srcdata) :
358 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? *reinterpret_cast<const u8*>(srcdata) :
359 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? *reinterpret_cast<const s16*>(srcdata) :
360 *reinterpret_cast<const float*>(srcdata);
361
362 input.attr[i][comp] = float24::FromFloat32(srcval);
363 LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f",
364 comp, i, vertex, index,
365 attribute_config.GetPhysicalBaseAddress(),
366 vertex_attribute_sources[i] - base_address,
367 vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i],
368 input.attr[i][comp].ToFloat32());
369 }
370 } else if (attribute_config.IsDefaultAttribute(i)) {
371 // Load the default attribute if we're configured to do so
372 input.attr[i] = g_state.vs.default_attributes[i];
373 LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)",
374 i, vertex, index,
375 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(),
376 input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
377 } else {
378 // TODO(yuriks): In this case, no data gets loaded and the vertex
379 // remains with the last value it had. This isn't currently maintained
380 // as global state, however, and so won't work in Citra yet.
381 }
382 }
383 268
384 if (g_debug_context) 269 if (g_debug_context)
385 g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input); 270 g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input);
386 271
387 // Send to vertex shader 272 // Send to vertex shader
388 output = Shader::Run(shader_unit, input, attribute_config.GetNumTotalAttributes()); 273 output = Shader::Run(shader_unit, input, loader.GetNumTotalAttributes());
389 274
390 if (is_indexed) { 275 if (is_indexed) {
391 vertex_cache[vertex_cache_pos] = output; 276 vertex_cache[vertex_cache_pos] = output;
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index c3a9c9598..178a566f7 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -40,15 +40,12 @@ using nihstro::DVLPHeader;
40 40
41namespace Pica { 41namespace Pica {
42 42
43void DebugContext::OnEvent(Event event, void* data) { 43void DebugContext::DoOnEvent(Event event, void* data) {
44 if (!breakpoints[event].enabled)
45 return;
46
47 { 44 {
48 std::unique_lock<std::mutex> lock(breakpoint_mutex); 45 std::unique_lock<std::mutex> lock(breakpoint_mutex);
49 46
50 // Commit the hardware renderer's framebuffer so it will show on debug widgets 47 // Commit the rasterizer's caches so framebuffers, render targets, etc. will show on debug widgets
51 VideoCore::g_renderer->Rasterizer()->FlushFramebuffer(); 48 VideoCore::g_renderer->Rasterizer()->FlushAll();
52 49
53 // TODO: Should stop the CPU thread here once we multithread emulation. 50 // TODO: Should stop the CPU thread here once we multithread emulation.
54 51
diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h
index 7df941619..dd0828cee 100644
--- a/src/video_core/debug_utils/debug_utils.h
+++ b/src/video_core/debug_utils/debug_utils.h
@@ -114,7 +114,15 @@ public:
114 * @param event Event which has happened 114 * @param event Event which has happened
115 * @param data Optional data pointer (pass nullptr if unused). Needs to remain valid until Resume() is called. 115 * @param data Optional data pointer (pass nullptr if unused). Needs to remain valid until Resume() is called.
116 */ 116 */
117 void OnEvent(Event event, void* data); 117 void OnEvent(Event event, void* data) {
118 // This check is left in the header to allow the compiler to inline it.
119 if (!breakpoints[(int)event].enabled)
120 return;
121 // For the rest of event handling, call a separate function.
122 DoOnEvent(event, data);
123 }
124
125 void DoOnEvent(Event event, void *data);
118 126
119 /** 127 /**
120 * Resume from the current breakpoint. 128 * Resume from the current breakpoint.
@@ -126,12 +134,14 @@ public:
126 * Delete all set breakpoints and resume emulation. 134 * Delete all set breakpoints and resume emulation.
127 */ 135 */
128 void ClearBreakpoints() { 136 void ClearBreakpoints() {
129 breakpoints.clear(); 137 for (auto &bp : breakpoints) {
138 bp.enabled = false;
139 }
130 Resume(); 140 Resume();
131 } 141 }
132 142
133 // TODO: Evaluate if access to these members should be hidden behind a public interface. 143 // TODO: Evaluate if access to these members should be hidden behind a public interface.
134 std::map<Event, BreakPoint> breakpoints; 144 std::array<BreakPoint, (int)Event::NumEvents> breakpoints;
135 Event active_breakpoint; 145 Event active_breakpoint;
136 bool at_breakpoint = false; 146 bool at_breakpoint = false;
137 147
@@ -206,6 +216,36 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data);
206 216
207void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages); 217void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages);
208 218
219/**
220 * Used in the vertex loader to merge access records. TODO: Investigate if actually useful.
221 */
222class MemoryAccessTracker {
223 /// Combine overlapping and close ranges
224 void SimplifyRanges() {
225 for (auto it = ranges.begin(); it != ranges.end(); ++it) {
226 // NOTE: We add 32 to the range end address to make sure "close" ranges are combined, too
227 auto it2 = std::next(it);
228 while (it2 != ranges.end() && it->first + it->second + 32 >= it2->first) {
229 it->second = std::max(it->second, it2->first + it2->second - it->first);
230 it2 = ranges.erase(it2);
231 }
232 }
233 }
234
235public:
236 /// Record a particular memory access in the list
237 void AddAccess(u32 paddr, u32 size) {
238 // Create new range or extend existing one
239 ranges[paddr] = std::max(ranges[paddr], size);
240
241 // Simplify ranges...
242 SimplifyRanges();
243 }
244
245 /// Map of accessed ranges (mapping start address to range size)
246 std::map<u32, u32> ranges;
247};
248
209} // namespace 249} // namespace
210 250
211} // namespace 251} // namespace
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 4552ff81c..cf130d7f8 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -577,7 +577,7 @@ struct Regs {
577 } 577 }
578 } 578 }
579 579
580 struct { 580 struct FramebufferConfig {
581 INSERT_PADDING_WORDS(0x3); 581 INSERT_PADDING_WORDS(0x3);
582 582
583 union { 583 union {
@@ -747,8 +747,13 @@ struct Regs {
747 case LightingSampler::ReflectGreen: 747 case LightingSampler::ReflectGreen:
748 case LightingSampler::ReflectBlue: 748 case LightingSampler::ReflectBlue:
749 return (config == LightingConfig::Config4) || (config == LightingConfig::Config5) || (config == LightingConfig::Config7); 749 return (config == LightingConfig::Config4) || (config == LightingConfig::Config5) || (config == LightingConfig::Config7);
750 default:
751 UNREACHABLE_MSG("Regs::IsLightingSamplerSupported: Reached "
752 "unreachable section, sampler should be one "
753 "of Distribution0, Distribution1, Fresnel, "
754 "ReflectRed, ReflectGreen or ReflectBlue, instead "
755 "got %i", static_cast<int>(config));
750 } 756 }
751 return false;
752 } 757 }
753 758
754 struct { 759 struct {
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 0434ad05a..9cf77b1f2 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -9,7 +9,6 @@
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "common/math_util.h" 10#include "common/math_util.h"
11#include "common/microprofile.h" 11#include "common/microprofile.h"
12#include "common/profiler.h"
13 12
14#include "core/memory.h" 13#include "core/memory.h"
15#include "core/hw/gpu.h" 14#include "core/hw/gpu.h"
@@ -287,7 +286,6 @@ static int SignedArea (const Math::Vec2<Fix12P4>& vtx1,
287 return Math::Cross(vec1, vec2).z; 286 return Math::Cross(vec1, vec2).z;
288}; 287};
289 288
290static Common::Profiling::TimingCategory rasterization_category("Rasterization");
291MICROPROFILE_DEFINE(GPU_Rasterization, "GPU", "Rasterization", MP_RGB(50, 50, 240)); 289MICROPROFILE_DEFINE(GPU_Rasterization, "GPU", "Rasterization", MP_RGB(50, 50, 240));
292 290
293/** 291/**
@@ -300,7 +298,6 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
300 bool reversed = false) 298 bool reversed = false)
301{ 299{
302 const auto& regs = g_state.regs; 300 const auto& regs = g_state.regs;
303 Common::Profiling::ScopeTimer timer(rasterization_category);
304 MICROPROFILE_SCOPE(GPU_Rasterization); 301 MICROPROFILE_SCOPE(GPU_Rasterization);
305 302
306 // vertex positions in rasterizer coordinates 303 // vertex positions in rasterizer coordinates
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 008c5827b..bf7101665 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -6,6 +6,10 @@
6 6
7#include "common/common_types.h" 7#include "common/common_types.h"
8 8
9#include "core/hw/gpu.h"
10
11struct ScreenInfo;
12
9namespace Pica { 13namespace Pica {
10namespace Shader { 14namespace Shader {
11struct OutputVertex; 15struct OutputVertex;
@@ -18,12 +22,6 @@ class RasterizerInterface {
18public: 22public:
19 virtual ~RasterizerInterface() {} 23 virtual ~RasterizerInterface() {}
20 24
21 /// Initialize API-specific GPU objects
22 virtual void InitObjects() = 0;
23
24 /// Reset the rasterizer, such as flushing all caches and updating all state
25 virtual void Reset() = 0;
26
27 /// Queues the primitive formed by the given vertices for rendering 25 /// Queues the primitive formed by the given vertices for rendering
28 virtual void AddTriangle(const Pica::Shader::OutputVertex& v0, 26 virtual void AddTriangle(const Pica::Shader::OutputVertex& v0,
29 const Pica::Shader::OutputVertex& v1, 27 const Pica::Shader::OutputVertex& v1,
@@ -32,17 +30,26 @@ public:
32 /// Draw the current batch of triangles 30 /// Draw the current batch of triangles
33 virtual void DrawTriangles() = 0; 31 virtual void DrawTriangles() = 0;
34 32
35 /// Commit the rasterizer's framebuffer contents immediately to the current 3DS memory framebuffer
36 virtual void FlushFramebuffer() = 0;
37
38 /// Notify rasterizer that the specified PICA register has been changed 33 /// Notify rasterizer that the specified PICA register has been changed
39 virtual void NotifyPicaRegisterChanged(u32 id) = 0; 34 virtual void NotifyPicaRegisterChanged(u32 id) = 0;
40 35
41 /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory. 36 /// Notify rasterizer that all caches should be flushed to 3DS memory
37 virtual void FlushAll() = 0;
38
39 /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory
42 virtual void FlushRegion(PAddr addr, u32 size) = 0; 40 virtual void FlushRegion(PAddr addr, u32 size) = 0;
43 41
44 /// Notify rasterizer that any caches of the specified region should be discraded and reloaded from 3DS memory. 42 /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory and invalidated
45 virtual void InvalidateRegion(PAddr addr, u32 size) = 0; 43 virtual void FlushAndInvalidateRegion(PAddr addr, u32 size) = 0;
44
45 /// Attempt to use a faster method to perform a display transfer
46 virtual bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { return false; }
47
48 /// Attempt to use a faster method to fill a region
49 virtual bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { return false; }
50
51 /// Attempt to use a faster method to display the framebuffer to screen
52 virtual bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) { return false; }
46}; 53};
47 54
48} 55}
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp
index 101f84eb9..ccd497de0 100644
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -21,7 +21,5 @@ void RendererBase::RefreshRasterizerSetting() {
21 } else { 21 } else {
22 rasterizer = std::make_unique<VideoCore::SWRasterizer>(); 22 rasterizer = std::make_unique<VideoCore::SWRasterizer>();
23 } 23 }
24 rasterizer->InitObjects();
25 rasterizer->Reset();
26 } 24 }
27} 25}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 6ca9f45e2..a8c775c80 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -11,7 +11,6 @@
11#include "common/file_util.h" 11#include "common/file_util.h"
12#include "common/math_util.h" 12#include "common/math_util.h"
13#include "common/microprofile.h" 13#include "common/microprofile.h"
14#include "common/profiler.h"
15 14
16#include "core/memory.h" 15#include "core/memory.h"
17#include "core/settings.h" 16#include "core/settings.h"
@@ -36,10 +35,7 @@ static bool IsPassThroughTevStage(const Pica::Regs::TevStageConfig& stage) {
36 stage.GetAlphaMultiplier() == 1); 35 stage.GetAlphaMultiplier() == 1);
37} 36}
38 37
39RasterizerOpenGL::RasterizerOpenGL() : cached_fb_color_addr(0), cached_fb_depth_addr(0) { } 38RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
40RasterizerOpenGL::~RasterizerOpenGL() { }
41
42void RasterizerOpenGL::InitObjects() {
43 // Create sampler objects 39 // Create sampler objects
44 for (size_t i = 0; i < texture_samplers.size(); ++i) { 40 for (size_t i = 0; i < texture_samplers.size(); ++i) {
45 texture_samplers[i].Create(); 41 texture_samplers[i].Create();
@@ -61,6 +57,10 @@ void RasterizerOpenGL::InitObjects() {
61 57
62 uniform_block_data.dirty = true; 58 uniform_block_data.dirty = true;
63 59
60 for (unsigned index = 0; index < lighting_luts.size(); index++) {
61 uniform_block_data.lut_dirty[index] = true;
62 }
63
64 // Set vertex attributes 64 // Set vertex attributes
65 glVertexAttribPointer(GLShader::ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position)); 65 glVertexAttribPointer(GLShader::ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position));
66 glEnableVertexAttribArray(GLShader::ATTRIBUTE_POSITION); 66 glEnableVertexAttribArray(GLShader::ATTRIBUTE_POSITION);
@@ -81,70 +81,24 @@ void RasterizerOpenGL::InitObjects() {
81 glVertexAttribPointer(GLShader::ATTRIBUTE_VIEW, 3, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, view)); 81 glVertexAttribPointer(GLShader::ATTRIBUTE_VIEW, 3, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, view));
82 glEnableVertexAttribArray(GLShader::ATTRIBUTE_VIEW); 82 glEnableVertexAttribArray(GLShader::ATTRIBUTE_VIEW);
83 83
84 SetShader(); 84 // Create render framebuffer
85
86 // Create textures for OGL framebuffer that will be rendered to, initially 1x1 to succeed in framebuffer creation
87 fb_color_texture.texture.Create();
88 ReconfigureColorTexture(fb_color_texture, Pica::Regs::ColorFormat::RGBA8, 1, 1);
89
90 state.texture_units[0].texture_2d = fb_color_texture.texture.handle;
91 state.Apply();
92
93 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
94 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
95 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
96 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
97 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
98
99 state.texture_units[0].texture_2d = 0;
100 state.Apply();
101
102 fb_depth_texture.texture.Create();
103 ReconfigureDepthTexture(fb_depth_texture, Pica::Regs::DepthFormat::D16, 1, 1);
104
105 state.texture_units[0].texture_2d = fb_depth_texture.texture.handle;
106 state.Apply();
107
108 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
109 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
110 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
111 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
112 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
113 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_FUNC, GL_LEQUAL);
114 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_MODE, GL_NONE);
115
116 state.texture_units[0].texture_2d = 0;
117 state.Apply();
118
119 // Configure OpenGL framebuffer
120 framebuffer.Create(); 85 framebuffer.Create();
121 86
122 state.draw.framebuffer = framebuffer.handle; 87 // Allocate and bind lighting lut textures
88 for (size_t i = 0; i < lighting_luts.size(); ++i) {
89 lighting_luts[i].Create();
90 state.lighting_luts[i].texture_1d = lighting_luts[i].handle;
91 }
123 state.Apply(); 92 state.Apply();
124 93
125 glActiveTexture(GL_TEXTURE0); 94 for (size_t i = 0; i < lighting_luts.size(); ++i) {
126 glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, fb_color_texture.texture.handle, 0);
127 glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, fb_depth_texture.texture.handle, 0);
128
129 for (size_t i = 0; i < lighting_lut.size(); ++i) {
130 lighting_lut[i].Create();
131 state.lighting_lut[i].texture_1d = lighting_lut[i].handle;
132
133 glActiveTexture(GL_TEXTURE3 + i); 95 glActiveTexture(GL_TEXTURE3 + i);
134 glBindTexture(GL_TEXTURE_1D, state.lighting_lut[i].texture_1d);
135
136 glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr); 96 glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr);
137 glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); 97 glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
138 glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); 98 glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
139 } 99 }
140 state.Apply();
141
142 GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER);
143 ASSERT_MSG(status == GL_FRAMEBUFFER_COMPLETE,
144 "OpenGL rasterizer framebuffer setup failed, status %X", status);
145}
146 100
147void RasterizerOpenGL::Reset() { 101 // Sync fixed function OpenGL state
148 SyncCullMode(); 102 SyncCullMode();
149 SyncDepthModifiers(); 103 SyncDepthModifiers();
150 SyncBlendEnabled(); 104 SyncBlendEnabled();
@@ -156,10 +110,10 @@ void RasterizerOpenGL::Reset() {
156 SyncColorWriteMask(); 110 SyncColorWriteMask();
157 SyncStencilWriteMask(); 111 SyncStencilWriteMask();
158 SyncDepthWriteMask(); 112 SyncDepthWriteMask();
113}
159 114
160 SetShader(); 115RasterizerOpenGL::~RasterizerOpenGL() {
161 116
162 res_cache.InvalidateAll();
163} 117}
164 118
165/** 119/**
@@ -196,47 +150,98 @@ void RasterizerOpenGL::DrawTriangles() {
196 if (vertex_batch.empty()) 150 if (vertex_batch.empty())
197 return; 151 return;
198 152
199 SyncFramebuffer(); 153 const auto& regs = Pica::g_state.regs;
200 SyncDrawState(); 154
155 // Sync and bind the framebuffer surfaces
156 CachedSurface* color_surface;
157 CachedSurface* depth_surface;
158 MathUtil::Rectangle<int> rect;
159 std::tie(color_surface, depth_surface, rect) = res_cache.GetFramebufferSurfaces(regs.framebuffer);
160
161 state.draw.draw_framebuffer = framebuffer.handle;
162 state.Apply();
163
164 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, color_surface != nullptr ? color_surface->texture.handle : 0, 0);
165 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, depth_surface != nullptr ? depth_surface->texture.handle : 0, 0);
166 bool has_stencil = regs.framebuffer.depth_format == Pica::Regs::DepthFormat::D24S8;
167 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, (has_stencil && depth_surface != nullptr) ? depth_surface->texture.handle : 0, 0);
168
169 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
170 return;
171 }
172
173 // Sync the viewport
174 // These registers hold half-width and half-height, so must be multiplied by 2
175 GLsizei viewport_width = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_x).ToFloat32() * 2;
176 GLsizei viewport_height = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_y).ToFloat32() * 2;
177
178 glViewport((GLint)(rect.left + regs.viewport_corner.x * color_surface->res_scale_width),
179 (GLint)(rect.bottom + regs.viewport_corner.y * color_surface->res_scale_height),
180 (GLsizei)(viewport_width * color_surface->res_scale_width), (GLsizei)(viewport_height * color_surface->res_scale_height));
181
182 // Sync and bind the texture surfaces
183 const auto pica_textures = regs.GetTextures();
184 for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) {
185 const auto& texture = pica_textures[texture_index];
186
187 if (texture.enabled) {
188 texture_samplers[texture_index].SyncWithConfig(texture.config);
189 CachedSurface* surface = res_cache.GetTextureSurface(texture);
190 if (surface != nullptr) {
191 state.texture_units[texture_index].texture_2d = surface->texture.handle;
192 } else {
193 // Can occur when texture addr is null or its memory is unmapped/invalid
194 state.texture_units[texture_index].texture_2d = 0;
195 }
196 } else {
197 state.texture_units[texture_index].texture_2d = 0;
198 }
199 }
201 200
202 if (state.draw.shader_dirty) { 201 // Sync and bind the shader
202 if (shader_dirty) {
203 SetShader(); 203 SetShader();
204 state.draw.shader_dirty = false; 204 shader_dirty = false;
205 } 205 }
206 206
207 for (unsigned index = 0; index < lighting_lut.size(); index++) { 207 // Sync the lighting luts
208 for (unsigned index = 0; index < lighting_luts.size(); index++) {
208 if (uniform_block_data.lut_dirty[index]) { 209 if (uniform_block_data.lut_dirty[index]) {
209 SyncLightingLUT(index); 210 SyncLightingLUT(index);
210 uniform_block_data.lut_dirty[index] = false; 211 uniform_block_data.lut_dirty[index] = false;
211 } 212 }
212 } 213 }
213 214
215 // Sync the uniform data
214 if (uniform_block_data.dirty) { 216 if (uniform_block_data.dirty) {
215 glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data, GL_STATIC_DRAW); 217 glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data, GL_STATIC_DRAW);
216 uniform_block_data.dirty = false; 218 uniform_block_data.dirty = false;
217 } 219 }
218 220
221 state.Apply();
222
223 // Draw the vertex batch
219 glBufferData(GL_ARRAY_BUFFER, vertex_batch.size() * sizeof(HardwareVertex), vertex_batch.data(), GL_STREAM_DRAW); 224 glBufferData(GL_ARRAY_BUFFER, vertex_batch.size() * sizeof(HardwareVertex), vertex_batch.data(), GL_STREAM_DRAW);
220 glDrawArrays(GL_TRIANGLES, 0, (GLsizei)vertex_batch.size()); 225 glDrawArrays(GL_TRIANGLES, 0, (GLsizei)vertex_batch.size());
221 226
222 vertex_batch.clear(); 227 // Mark framebuffer surfaces as dirty
223 228 // TODO: Restrict invalidation area to the viewport
224 // Flush the resource cache at the current depth and color framebuffer addresses for render-to-texture 229 if (color_surface != nullptr) {
225 const auto& regs = Pica::g_state.regs; 230 color_surface->dirty = true;
226 231 res_cache.FlushRegion(color_surface->addr, color_surface->size, color_surface, true);
227 u32 cached_fb_color_size = Pica::Regs::BytesPerColorPixel(fb_color_texture.format) 232 }
228 * fb_color_texture.width * fb_color_texture.height; 233 if (depth_surface != nullptr) {
229 234 depth_surface->dirty = true;
230 u32 cached_fb_depth_size = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format) 235 res_cache.FlushRegion(depth_surface->addr, depth_surface->size, depth_surface, true);
231 * fb_depth_texture.width * fb_depth_texture.height; 236 }
232 237
233 res_cache.InvalidateInRange(cached_fb_color_addr, cached_fb_color_size, true); 238 vertex_batch.clear();
234 res_cache.InvalidateInRange(cached_fb_depth_addr, cached_fb_depth_size, true);
235}
236 239
237void RasterizerOpenGL::FlushFramebuffer() { 240 // Unbind textures for potential future use as framebuffer attachments
238 CommitColorBuffer(); 241 for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) {
239 CommitDepthBuffer(); 242 state.texture_units[texture_index].texture_2d = 0;
243 }
244 state.Apply();
240} 245}
241 246
242void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { 247void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
@@ -268,7 +273,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
268 // Alpha test 273 // Alpha test
269 case PICA_REG_INDEX(output_merger.alpha_test): 274 case PICA_REG_INDEX(output_merger.alpha_test):
270 SyncAlphaTest(); 275 SyncAlphaTest();
271 state.draw.shader_dirty = true; 276 shader_dirty = true;
272 break; 277 break;
273 278
274 // Sync GL stencil test + stencil write mask 279 // Sync GL stencil test + stencil write mask
@@ -334,7 +339,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
334 case PICA_REG_INDEX(tev_stage5.color_op): 339 case PICA_REG_INDEX(tev_stage5.color_op):
335 case PICA_REG_INDEX(tev_stage5.color_scale): 340 case PICA_REG_INDEX(tev_stage5.color_scale):
336 case PICA_REG_INDEX(tev_combiner_buffer_input): 341 case PICA_REG_INDEX(tev_combiner_buffer_input):
337 state.draw.shader_dirty = true; 342 shader_dirty = true;
338 break; 343 break;
339 case PICA_REG_INDEX(tev_stage0.const_r): 344 case PICA_REG_INDEX(tev_stage0.const_r):
340 SyncTevConstColor(0, regs.tev_stage0); 345 SyncTevConstColor(0, regs.tev_stage0);
@@ -521,41 +526,257 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
521 } 526 }
522} 527}
523 528
529void RasterizerOpenGL::FlushAll() {
530 res_cache.FlushAll();
531}
532
524void RasterizerOpenGL::FlushRegion(PAddr addr, u32 size) { 533void RasterizerOpenGL::FlushRegion(PAddr addr, u32 size) {
525 const auto& regs = Pica::g_state.regs; 534 res_cache.FlushRegion(addr, size, nullptr, false);
535}
526 536
527 u32 cached_fb_color_size = Pica::Regs::BytesPerColorPixel(fb_color_texture.format) 537void RasterizerOpenGL::FlushAndInvalidateRegion(PAddr addr, u32 size) {
528 * fb_color_texture.width * fb_color_texture.height; 538 res_cache.FlushRegion(addr, size, nullptr, true);
539}
529 540
530 u32 cached_fb_depth_size = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format) 541bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) {
531 * fb_depth_texture.width * fb_depth_texture.height; 542 using PixelFormat = CachedSurface::PixelFormat;
543 using SurfaceType = CachedSurface::SurfaceType;
532 544
533 // If source memory region overlaps 3DS framebuffers, commit them before the copy happens 545 if (config.is_texture_copy) {
534 if (MathUtil::IntervalsIntersect(addr, size, cached_fb_color_addr, cached_fb_color_size)) 546 // TODO(tfarley): Try to hardware accelerate this
535 CommitColorBuffer(); 547 return false;
548 }
536 549
537 if (MathUtil::IntervalsIntersect(addr, size, cached_fb_depth_addr, cached_fb_depth_size)) 550 CachedSurface src_params;
538 CommitDepthBuffer(); 551 src_params.addr = config.GetPhysicalInputAddress();
552 src_params.width = config.output_width;
553 src_params.height = config.output_height;
554 src_params.is_tiled = !config.input_linear;
555 src_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.input_format);
556
557 CachedSurface dst_params;
558 dst_params.addr = config.GetPhysicalOutputAddress();
559 dst_params.width = config.scaling != config.NoScale ? config.output_width / 2 : config.output_width.Value();
560 dst_params.height = config.scaling == config.ScaleXY ? config.output_height / 2 : config.output_height.Value();
561 dst_params.is_tiled = config.input_linear != config.dont_swizzle;
562 dst_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.output_format);
563
564 MathUtil::Rectangle<int> src_rect;
565 CachedSurface* src_surface = res_cache.GetSurfaceRect(src_params, false, true, src_rect);
566
567 if (src_surface == nullptr) {
568 return false;
569 }
570
571 // Require destination surface to have same resolution scale as source to preserve scaling
572 dst_params.res_scale_width = src_surface->res_scale_width;
573 dst_params.res_scale_height = src_surface->res_scale_height;
574
575 MathUtil::Rectangle<int> dst_rect;
576 CachedSurface* dst_surface = res_cache.GetSurfaceRect(dst_params, true, false, dst_rect);
577
578 if (dst_surface == nullptr) {
579 return false;
580 }
581
582 // Don't accelerate if the src and dst surfaces are the same
583 if (src_surface == dst_surface) {
584 return false;
585 }
586
587 if (config.flip_vertically) {
588 std::swap(dst_rect.top, dst_rect.bottom);
589 }
590
591 if (!res_cache.TryBlitSurfaces(src_surface, src_rect, dst_surface, dst_rect)) {
592 return false;
593 }
594
595 u32 dst_size = dst_params.width * dst_params.height * CachedSurface::GetFormatBpp(dst_params.pixel_format) / 8;
596 dst_surface->dirty = true;
597 res_cache.FlushRegion(config.GetPhysicalOutputAddress(), dst_size, dst_surface, true);
598 return true;
539} 599}
540 600
541void RasterizerOpenGL::InvalidateRegion(PAddr addr, u32 size) { 601bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) {
542 const auto& regs = Pica::g_state.regs; 602 using PixelFormat = CachedSurface::PixelFormat;
603 using SurfaceType = CachedSurface::SurfaceType;
604
605 CachedSurface* dst_surface = res_cache.TryGetFillSurface(config);
606
607 if (dst_surface == nullptr) {
608 return false;
609 }
610
611 OpenGLState cur_state = OpenGLState::GetCurState();
612
613 SurfaceType dst_type = CachedSurface::GetFormatType(dst_surface->pixel_format);
543 614
544 u32 cached_fb_color_size = Pica::Regs::BytesPerColorPixel(fb_color_texture.format) 615 GLuint old_fb = cur_state.draw.draw_framebuffer;
545 * fb_color_texture.width * fb_color_texture.height; 616 cur_state.draw.draw_framebuffer = framebuffer.handle;
617 // TODO: When scissor test is implemented, need to disable scissor test in cur_state here so Clear call isn't affected
618 cur_state.Apply();
546 619
547 u32 cached_fb_depth_size = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format) 620 if (dst_type == SurfaceType::Color || dst_type == SurfaceType::Texture) {
548 * fb_depth_texture.width * fb_depth_texture.height; 621 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_surface->texture.handle, 0);
622 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
549 623
550 // If modified memory region overlaps 3DS framebuffers, reload their contents into OpenGL 624 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
551 if (MathUtil::IntervalsIntersect(addr, size, cached_fb_color_addr, cached_fb_color_size)) 625 return false;
552 ReloadColorBuffer(); 626 }
627
628 GLfloat color_values[4] = {0.0f, 0.0f, 0.0f, 0.0f};
629
630 // TODO: Handle additional pixel format and fill value size combinations to accelerate more cases
631 // For instance, checking if fill value's bytes/bits repeat to allow filling I8/A8/I4/A4/...
632 // Currently only handles formats that are multiples of the fill value size
633
634 if (config.fill_24bit) {
635 switch (dst_surface->pixel_format) {
636 case PixelFormat::RGB8:
637 color_values[0] = config.value_24bit_r / 255.0f;
638 color_values[1] = config.value_24bit_g / 255.0f;
639 color_values[2] = config.value_24bit_b / 255.0f;
640 break;
641 default:
642 return false;
643 }
644 } else if (config.fill_32bit) {
645 u32 value = config.value_32bit;
646
647 switch (dst_surface->pixel_format) {
648 case PixelFormat::RGBA8:
649 color_values[0] = (value >> 24) / 255.0f;
650 color_values[1] = ((value >> 16) & 0xFF) / 255.0f;
651 color_values[2] = ((value >> 8) & 0xFF) / 255.0f;
652 color_values[3] = (value & 0xFF) / 255.0f;
653 break;
654 default:
655 return false;
656 }
657 } else {
658 u16 value_16bit = config.value_16bit.Value();
659 Math::Vec4<u8> color;
660
661 switch (dst_surface->pixel_format) {
662 case PixelFormat::RGBA8:
663 color_values[0] = (value_16bit >> 8) / 255.0f;
664 color_values[1] = (value_16bit & 0xFF) / 255.0f;
665 color_values[2] = color_values[0];
666 color_values[3] = color_values[1];
667 break;
668 case PixelFormat::RGB5A1:
669 color = Color::DecodeRGB5A1((const u8*)&value_16bit);
670 color_values[0] = color[0] / 31.0f;
671 color_values[1] = color[1] / 31.0f;
672 color_values[2] = color[2] / 31.0f;
673 color_values[3] = color[3];
674 break;
675 case PixelFormat::RGB565:
676 color = Color::DecodeRGB565((const u8*)&value_16bit);
677 color_values[0] = color[0] / 31.0f;
678 color_values[1] = color[1] / 63.0f;
679 color_values[2] = color[2] / 31.0f;
680 break;
681 case PixelFormat::RGBA4:
682 color = Color::DecodeRGBA4((const u8*)&value_16bit);
683 color_values[0] = color[0] / 15.0f;
684 color_values[1] = color[1] / 15.0f;
685 color_values[2] = color[2] / 15.0f;
686 color_values[3] = color[3] / 15.0f;
687 break;
688 case PixelFormat::IA8:
689 case PixelFormat::RG8:
690 color_values[0] = (value_16bit >> 8) / 255.0f;
691 color_values[1] = (value_16bit & 0xFF) / 255.0f;
692 break;
693 default:
694 return false;
695 }
696 }
697
698 cur_state.color_mask.red_enabled = true;
699 cur_state.color_mask.green_enabled = true;
700 cur_state.color_mask.blue_enabled = true;
701 cur_state.color_mask.alpha_enabled = true;
702 cur_state.Apply();
703 glClearBufferfv(GL_COLOR, 0, color_values);
704 } else if (dst_type == SurfaceType::Depth) {
705 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
706 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_surface->texture.handle, 0);
707 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
708
709 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
710 return false;
711 }
712
713 GLfloat value_float;
714 if (dst_surface->pixel_format == CachedSurface::PixelFormat::D16) {
715 value_float = config.value_32bit / 65535.0f; // 2^16 - 1
716 } else if (dst_surface->pixel_format == CachedSurface::PixelFormat::D24) {
717 value_float = config.value_32bit / 16777215.0f; // 2^24 - 1
718 }
719
720 cur_state.depth.write_mask = true;
721 cur_state.Apply();
722 glClearBufferfv(GL_DEPTH, 0, &value_float);
723 } else if (dst_type == SurfaceType::DepthStencil) {
724 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
725 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, dst_surface->texture.handle, 0);
726
727 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
728 return false;
729 }
730
731 GLfloat value_float = (config.value_32bit & 0xFFFFFF) / 16777215.0f; // 2^24 - 1
732 GLint value_int = (config.value_32bit >> 24);
733
734 cur_state.depth.write_mask = true;
735 cur_state.stencil.write_mask = true;
736 cur_state.Apply();
737 glClearBufferfi(GL_DEPTH_STENCIL, 0, value_float, value_int);
738 }
553 739
554 if (MathUtil::IntervalsIntersect(addr, size, cached_fb_depth_addr, cached_fb_depth_size)) 740 cur_state.draw.draw_framebuffer = old_fb;
555 ReloadDepthBuffer(); 741 // TODO: Return scissor test to previous value when scissor test is implemented
742 cur_state.Apply();
556 743
557 // Notify cache of flush in case the region touches a cached resource 744 dst_surface->dirty = true;
558 res_cache.InvalidateInRange(addr, size); 745 res_cache.FlushRegion(dst_surface->addr, dst_surface->size, dst_surface, true);
746 return true;
747}
748
749bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) {
750 if (framebuffer_addr == 0) {
751 return false;
752 }
753
754 CachedSurface src_params;
755 src_params.addr = framebuffer_addr;
756 src_params.width = config.width;
757 src_params.height = config.height;
758 src_params.stride = pixel_stride;
759 src_params.is_tiled = false;
760 src_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.color_format);
761
762 MathUtil::Rectangle<int> src_rect;
763 CachedSurface* src_surface = res_cache.GetSurfaceRect(src_params, false, true, src_rect);
764
765 if (src_surface == nullptr) {
766 return false;
767 }
768
769 u32 scaled_width = src_surface->GetScaledWidth();
770 u32 scaled_height = src_surface->GetScaledHeight();
771
772 screen_info.display_texcoords = MathUtil::Rectangle<float>((float)src_rect.top / (float)scaled_height,
773 (float)src_rect.left / (float)scaled_width,
774 (float)src_rect.bottom / (float)scaled_height,
775 (float)src_rect.right / (float)scaled_width);
776
777 screen_info.display_texture = src_surface->texture.handle;
778
779 return true;
559} 780}
560 781
561void RasterizerOpenGL::SamplerInfo::Create() { 782void RasterizerOpenGL::SamplerInfo::Create() {
@@ -597,108 +818,6 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Pica::Regs::TextureConf
597 } 818 }
598} 819}
599 820
600void RasterizerOpenGL::ReconfigureColorTexture(TextureInfo& texture, Pica::Regs::ColorFormat format, u32 width, u32 height) {
601 GLint internal_format;
602
603 texture.format = format;
604 texture.width = width;
605 texture.height = height;
606
607 switch (format) {
608 case Pica::Regs::ColorFormat::RGBA8:
609 internal_format = GL_RGBA;
610 texture.gl_format = GL_RGBA;
611 texture.gl_type = GL_UNSIGNED_INT_8_8_8_8;
612 break;
613
614 case Pica::Regs::ColorFormat::RGB8:
615 // This pixel format uses BGR since GL_UNSIGNED_BYTE specifies byte-order, unlike every
616 // specific OpenGL type used in this function using native-endian (that is, little-endian
617 // mostly everywhere) for words or half-words.
618 // TODO: check how those behave on big-endian processors.
619 internal_format = GL_RGB;
620 texture.gl_format = GL_BGR;
621 texture.gl_type = GL_UNSIGNED_BYTE;
622 break;
623
624 case Pica::Regs::ColorFormat::RGB5A1:
625 internal_format = GL_RGBA;
626 texture.gl_format = GL_RGBA;
627 texture.gl_type = GL_UNSIGNED_SHORT_5_5_5_1;
628 break;
629
630 case Pica::Regs::ColorFormat::RGB565:
631 internal_format = GL_RGB;
632 texture.gl_format = GL_RGB;
633 texture.gl_type = GL_UNSIGNED_SHORT_5_6_5;
634 break;
635
636 case Pica::Regs::ColorFormat::RGBA4:
637 internal_format = GL_RGBA;
638 texture.gl_format = GL_RGBA;
639 texture.gl_type = GL_UNSIGNED_SHORT_4_4_4_4;
640 break;
641
642 default:
643 LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer texture color format %x", format);
644 UNIMPLEMENTED();
645 break;
646 }
647
648 state.texture_units[0].texture_2d = texture.texture.handle;
649 state.Apply();
650
651 glActiveTexture(GL_TEXTURE0);
652 glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0,
653 texture.gl_format, texture.gl_type, nullptr);
654
655 state.texture_units[0].texture_2d = 0;
656 state.Apply();
657}
658
659void RasterizerOpenGL::ReconfigureDepthTexture(DepthTextureInfo& texture, Pica::Regs::DepthFormat format, u32 width, u32 height) {
660 GLint internal_format;
661
662 texture.format = format;
663 texture.width = width;
664 texture.height = height;
665
666 switch (format) {
667 case Pica::Regs::DepthFormat::D16:
668 internal_format = GL_DEPTH_COMPONENT16;
669 texture.gl_format = GL_DEPTH_COMPONENT;
670 texture.gl_type = GL_UNSIGNED_SHORT;
671 break;
672
673 case Pica::Regs::DepthFormat::D24:
674 internal_format = GL_DEPTH_COMPONENT24;
675 texture.gl_format = GL_DEPTH_COMPONENT;
676 texture.gl_type = GL_UNSIGNED_INT;
677 break;
678
679 case Pica::Regs::DepthFormat::D24S8:
680 internal_format = GL_DEPTH24_STENCIL8;
681 texture.gl_format = GL_DEPTH_STENCIL;
682 texture.gl_type = GL_UNSIGNED_INT_24_8;
683 break;
684
685 default:
686 LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer texture depth format %x", format);
687 UNIMPLEMENTED();
688 break;
689 }
690
691 state.texture_units[0].texture_2d = texture.texture.handle;
692 state.Apply();
693
694 glActiveTexture(GL_TEXTURE0);
695 glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0,
696 texture.gl_format, texture.gl_type, nullptr);
697
698 state.texture_units[0].texture_2d = 0;
699 state.Apply();
700}
701
702void RasterizerOpenGL::SetShader() { 821void RasterizerOpenGL::SetShader() {
703 PicaShaderConfig config = PicaShaderConfig::CurrentConfig(); 822 PicaShaderConfig config = PicaShaderConfig::CurrentConfig();
704 std::unique_ptr<PicaShader> shader = std::make_unique<PicaShader>(); 823 std::unique_ptr<PicaShader> shader = std::make_unique<PicaShader>();
@@ -754,6 +873,8 @@ void RasterizerOpenGL::SetShader() {
754 873
755 SyncGlobalAmbient(); 874 SyncGlobalAmbient();
756 for (int light_index = 0; light_index < 8; light_index++) { 875 for (int light_index = 0; light_index < 8; light_index++) {
876 SyncLightSpecular0(light_index);
877 SyncLightSpecular1(light_index);
757 SyncLightDiffuse(light_index); 878 SyncLightDiffuse(light_index);
758 SyncLightAmbient(light_index); 879 SyncLightAmbient(light_index);
759 SyncLightPosition(light_index); 880 SyncLightPosition(light_index);
@@ -761,83 +882,6 @@ void RasterizerOpenGL::SetShader() {
761 } 882 }
762} 883}
763 884
764void RasterizerOpenGL::SyncFramebuffer() {
765 const auto& regs = Pica::g_state.regs;
766
767 PAddr new_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress();
768 Pica::Regs::ColorFormat new_fb_color_format = regs.framebuffer.color_format;
769
770 PAddr new_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress();
771 Pica::Regs::DepthFormat new_fb_depth_format = regs.framebuffer.depth_format;
772
773 bool fb_size_changed = fb_color_texture.width != static_cast<GLsizei>(regs.framebuffer.GetWidth()) ||
774 fb_color_texture.height != static_cast<GLsizei>(regs.framebuffer.GetHeight());
775
776 bool color_fb_prop_changed = fb_color_texture.format != new_fb_color_format ||
777 fb_size_changed;
778
779 bool depth_fb_prop_changed = fb_depth_texture.format != new_fb_depth_format ||
780 fb_size_changed;
781
782 bool color_fb_modified = cached_fb_color_addr != new_fb_color_addr ||
783 color_fb_prop_changed;
784
785 bool depth_fb_modified = cached_fb_depth_addr != new_fb_depth_addr ||
786 depth_fb_prop_changed;
787
788 // Commit if framebuffer modified in any way
789 if (color_fb_modified)
790 CommitColorBuffer();
791
792 if (depth_fb_modified)
793 CommitDepthBuffer();
794
795 // Reconfigure framebuffer textures if any property has changed
796 if (color_fb_prop_changed) {
797 ReconfigureColorTexture(fb_color_texture, new_fb_color_format,
798 regs.framebuffer.GetWidth(), regs.framebuffer.GetHeight());
799 }
800
801 if (depth_fb_prop_changed) {
802 ReconfigureDepthTexture(fb_depth_texture, new_fb_depth_format,
803 regs.framebuffer.GetWidth(), regs.framebuffer.GetHeight());
804
805 // Only attach depth buffer as stencil if it supports stencil
806 switch (new_fb_depth_format) {
807 case Pica::Regs::DepthFormat::D16:
808 case Pica::Regs::DepthFormat::D24:
809 glFramebufferTexture2D(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
810 break;
811
812 case Pica::Regs::DepthFormat::D24S8:
813 glFramebufferTexture2D(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, fb_depth_texture.texture.handle, 0);
814 break;
815
816 default:
817 LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer depth format %x", new_fb_depth_format);
818 UNIMPLEMENTED();
819 break;
820 }
821 }
822
823 // Load buffer data again if fb modified in any way
824 if (color_fb_modified) {
825 cached_fb_color_addr = new_fb_color_addr;
826
827 ReloadColorBuffer();
828 }
829
830 if (depth_fb_modified) {
831 cached_fb_depth_addr = new_fb_depth_addr;
832
833 ReloadDepthBuffer();
834 }
835
836 GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER);
837 ASSERT_MSG(status == GL_FRAMEBUFFER_COMPLETE,
838 "OpenGL rasterizer framebuffer setup failed, status %X", status);
839}
840
841void RasterizerOpenGL::SyncCullMode() { 885void RasterizerOpenGL::SyncCullMode() {
842 const auto& regs = Pica::g_state.regs; 886 const auto& regs = Pica::g_state.regs;
843 887
@@ -1034,229 +1078,3 @@ void RasterizerOpenGL::SyncLightPosition(int light_index) {
1034 uniform_block_data.dirty = true; 1078 uniform_block_data.dirty = true;
1035 } 1079 }
1036} 1080}
1037
1038void RasterizerOpenGL::SyncDrawState() {
1039 const auto& regs = Pica::g_state.regs;
1040
1041 // Sync the viewport
1042 GLsizei viewport_width = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_x).ToFloat32() * 2;
1043 GLsizei viewport_height = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_y).ToFloat32() * 2;
1044
1045 // OpenGL uses different y coordinates, so negate corner offset and flip origin
1046 // TODO: Ensure viewport_corner.x should not be negated or origin flipped
1047 // TODO: Use floating-point viewports for accuracy if supported
1048 glViewport((GLsizei)regs.viewport_corner.x,
1049 (GLsizei)regs.viewport_corner.y,
1050 viewport_width, viewport_height);
1051
1052 // Sync bound texture(s), upload if not cached
1053 const auto pica_textures = regs.GetTextures();
1054 for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) {
1055 const auto& texture = pica_textures[texture_index];
1056
1057 if (texture.enabled) {
1058 texture_samplers[texture_index].SyncWithConfig(texture.config);
1059 res_cache.LoadAndBindTexture(state, texture_index, texture);
1060 } else {
1061 state.texture_units[texture_index].texture_2d = 0;
1062 }
1063 }
1064
1065 state.draw.uniform_buffer = uniform_buffer.handle;
1066 state.Apply();
1067}
1068
1069MICROPROFILE_DEFINE(OpenGL_FramebufferReload, "OpenGL", "FB Reload", MP_RGB(70, 70, 200));
1070
1071void RasterizerOpenGL::ReloadColorBuffer() {
1072 u8* color_buffer = Memory::GetPhysicalPointer(cached_fb_color_addr);
1073
1074 if (color_buffer == nullptr)
1075 return;
1076
1077 MICROPROFILE_SCOPE(OpenGL_FramebufferReload);
1078
1079 u32 bytes_per_pixel = Pica::Regs::BytesPerColorPixel(fb_color_texture.format);
1080
1081 std::unique_ptr<u8[]> temp_fb_color_buffer(new u8[fb_color_texture.width * fb_color_texture.height * bytes_per_pixel]);
1082
1083 // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion is necessary.
1084 for (int y = 0; y < fb_color_texture.height; ++y) {
1085 for (int x = 0; x < fb_color_texture.width; ++x) {
1086 const u32 coarse_y = y & ~7;
1087 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_color_texture.width * bytes_per_pixel;
1088 u32 gl_pixel_index = (x + (fb_color_texture.height - 1 - y) * fb_color_texture.width) * bytes_per_pixel;
1089
1090 u8* pixel = color_buffer + dst_offset;
1091 memcpy(&temp_fb_color_buffer[gl_pixel_index], pixel, bytes_per_pixel);
1092 }
1093 }
1094
1095 state.texture_units[0].texture_2d = fb_color_texture.texture.handle;
1096 state.Apply();
1097
1098 glActiveTexture(GL_TEXTURE0);
1099 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, fb_color_texture.width, fb_color_texture.height,
1100 fb_color_texture.gl_format, fb_color_texture.gl_type, temp_fb_color_buffer.get());
1101
1102 state.texture_units[0].texture_2d = 0;
1103 state.Apply();
1104}
1105
1106void RasterizerOpenGL::ReloadDepthBuffer() {
1107 if (cached_fb_depth_addr == 0)
1108 return;
1109
1110 // TODO: Appears to work, but double-check endianness of depth values and order of depth-stencil
1111 u8* depth_buffer = Memory::GetPhysicalPointer(cached_fb_depth_addr);
1112
1113 if (depth_buffer == nullptr)
1114 return;
1115
1116 MICROPROFILE_SCOPE(OpenGL_FramebufferReload);
1117
1118 u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format);
1119
1120 // OpenGL needs 4 bpp alignment for D24
1121 u32 gl_bpp = bytes_per_pixel == 3 ? 4 : bytes_per_pixel;
1122
1123 std::unique_ptr<u8[]> temp_fb_depth_buffer(new u8[fb_depth_texture.width * fb_depth_texture.height * gl_bpp]);
1124
1125 u8* temp_fb_depth_data = bytes_per_pixel == 3 ? (temp_fb_depth_buffer.get() + 1) : temp_fb_depth_buffer.get();
1126
1127 if (fb_depth_texture.format == Pica::Regs::DepthFormat::D24S8) {
1128 for (int y = 0; y < fb_depth_texture.height; ++y) {
1129 for (int x = 0; x < fb_depth_texture.width; ++x) {
1130 const u32 coarse_y = y & ~7;
1131 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel;
1132 u32 gl_pixel_index = (x + (fb_depth_texture.height - 1 - y) * fb_depth_texture.width);
1133
1134 u8* pixel = depth_buffer + dst_offset;
1135 u32 depth_stencil = *(u32*)pixel;
1136 ((u32*)temp_fb_depth_data)[gl_pixel_index] = (depth_stencil << 8) | (depth_stencil >> 24);
1137 }
1138 }
1139 } else {
1140 for (int y = 0; y < fb_depth_texture.height; ++y) {
1141 for (int x = 0; x < fb_depth_texture.width; ++x) {
1142 const u32 coarse_y = y & ~7;
1143 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel;
1144 u32 gl_pixel_index = (x + (fb_depth_texture.height - 1 - y) * fb_depth_texture.width) * gl_bpp;
1145
1146 u8* pixel = depth_buffer + dst_offset;
1147 memcpy(&temp_fb_depth_data[gl_pixel_index], pixel, bytes_per_pixel);
1148 }
1149 }
1150 }
1151
1152 state.texture_units[0].texture_2d = fb_depth_texture.texture.handle;
1153 state.Apply();
1154
1155 glActiveTexture(GL_TEXTURE0);
1156 if (fb_depth_texture.format == Pica::Regs::DepthFormat::D24S8) {
1157 // TODO(Subv): There is a bug with Intel Windows drivers that makes glTexSubImage2D not change the stencil buffer.
1158 // The bug has been reported to Intel (https://communities.intel.com/message/324464)
1159 glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH24_STENCIL8, fb_depth_texture.width, fb_depth_texture.height, 0,
1160 GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, temp_fb_depth_buffer.get());
1161 } else {
1162 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, fb_depth_texture.width, fb_depth_texture.height,
1163 fb_depth_texture.gl_format, fb_depth_texture.gl_type, temp_fb_depth_buffer.get());
1164 }
1165
1166 state.texture_units[0].texture_2d = 0;
1167 state.Apply();
1168}
1169
1170Common::Profiling::TimingCategory buffer_commit_category("Framebuffer Commit");
1171MICROPROFILE_DEFINE(OpenGL_FramebufferCommit, "OpenGL", "FB Commit", MP_RGB(70, 70, 200));
1172
1173void RasterizerOpenGL::CommitColorBuffer() {
1174 if (cached_fb_color_addr != 0) {
1175 u8* color_buffer = Memory::GetPhysicalPointer(cached_fb_color_addr);
1176
1177 if (color_buffer != nullptr) {
1178 Common::Profiling::ScopeTimer timer(buffer_commit_category);
1179 MICROPROFILE_SCOPE(OpenGL_FramebufferCommit);
1180
1181 u32 bytes_per_pixel = Pica::Regs::BytesPerColorPixel(fb_color_texture.format);
1182
1183 std::unique_ptr<u8[]> temp_gl_color_buffer(new u8[fb_color_texture.width * fb_color_texture.height * bytes_per_pixel]);
1184
1185 state.texture_units[0].texture_2d = fb_color_texture.texture.handle;
1186 state.Apply();
1187
1188 glActiveTexture(GL_TEXTURE0);
1189 glGetTexImage(GL_TEXTURE_2D, 0, fb_color_texture.gl_format, fb_color_texture.gl_type, temp_gl_color_buffer.get());
1190
1191 state.texture_units[0].texture_2d = 0;
1192 state.Apply();
1193
1194 // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion is necessary.
1195 for (int y = 0; y < fb_color_texture.height; ++y) {
1196 for (int x = 0; x < fb_color_texture.width; ++x) {
1197 const u32 coarse_y = y & ~7;
1198 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_color_texture.width * bytes_per_pixel;
1199 u32 gl_pixel_index = x * bytes_per_pixel + (fb_color_texture.height - 1 - y) * fb_color_texture.width * bytes_per_pixel;
1200
1201 u8* pixel = color_buffer + dst_offset;
1202 memcpy(pixel, &temp_gl_color_buffer[gl_pixel_index], bytes_per_pixel);
1203 }
1204 }
1205 }
1206 }
1207}
1208
1209void RasterizerOpenGL::CommitDepthBuffer() {
1210 if (cached_fb_depth_addr != 0) {
1211 // TODO: Output seems correct visually, but doesn't quite match sw renderer output. One of them is wrong.
1212 u8* depth_buffer = Memory::GetPhysicalPointer(cached_fb_depth_addr);
1213
1214 if (depth_buffer != nullptr) {
1215 Common::Profiling::ScopeTimer timer(buffer_commit_category);
1216 MICROPROFILE_SCOPE(OpenGL_FramebufferCommit);
1217
1218 u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format);
1219
1220 // OpenGL needs 4 bpp alignment for D24
1221 u32 gl_bpp = bytes_per_pixel == 3 ? 4 : bytes_per_pixel;
1222
1223 std::unique_ptr<u8[]> temp_gl_depth_buffer(new u8[fb_depth_texture.width * fb_depth_texture.height * gl_bpp]);
1224
1225 state.texture_units[0].texture_2d = fb_depth_texture.texture.handle;
1226 state.Apply();
1227
1228 glActiveTexture(GL_TEXTURE0);
1229 glGetTexImage(GL_TEXTURE_2D, 0, fb_depth_texture.gl_format, fb_depth_texture.gl_type, temp_gl_depth_buffer.get());
1230
1231 state.texture_units[0].texture_2d = 0;
1232 state.Apply();
1233
1234 u8* temp_gl_depth_data = bytes_per_pixel == 3 ? (temp_gl_depth_buffer.get() + 1) : temp_gl_depth_buffer.get();
1235
1236 if (fb_depth_texture.format == Pica::Regs::DepthFormat::D24S8) {
1237 for (int y = 0; y < fb_depth_texture.height; ++y) {
1238 for (int x = 0; x < fb_depth_texture.width; ++x) {
1239 const u32 coarse_y = y & ~7;
1240 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel;
1241 u32 gl_pixel_index = (x + (fb_depth_texture.height - 1 - y) * fb_depth_texture.width);
1242
1243 u8* pixel = depth_buffer + dst_offset;
1244 u32 depth_stencil = ((u32*)temp_gl_depth_data)[gl_pixel_index];
1245 *(u32*)pixel = (depth_stencil >> 8) | (depth_stencil << 24);
1246 }
1247 }
1248 } else {
1249 for (int y = 0; y < fb_depth_texture.height; ++y) {
1250 for (int x = 0; x < fb_depth_texture.width; ++x) {
1251 const u32 coarse_y = y & ~7;
1252 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel;
1253 u32 gl_pixel_index = (x + (fb_depth_texture.height - 1 - y) * fb_depth_texture.width) * gl_bpp;
1254
1255 u8* pixel = depth_buffer + dst_offset;
1256 memcpy(pixel, &temp_gl_depth_data[gl_pixel_index], bytes_per_pixel);
1257 }
1258 }
1259 }
1260 }
1261 }
1262}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 390349a0c..8d6177e88 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -19,6 +19,7 @@
19#include "video_core/renderer_opengl/gl_rasterizer_cache.h" 19#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
20#include "video_core/renderer_opengl/gl_state.h" 20#include "video_core/renderer_opengl/gl_state.h"
21#include "video_core/renderer_opengl/pica_to_gl.h" 21#include "video_core/renderer_opengl/pica_to_gl.h"
22#include "video_core/renderer_opengl/renderer_opengl.h"
22#include "video_core/shader/shader_interpreter.h" 23#include "video_core/shader/shader_interpreter.h"
23 24
24/** 25/**
@@ -191,16 +192,17 @@ public:
191 RasterizerOpenGL(); 192 RasterizerOpenGL();
192 ~RasterizerOpenGL() override; 193 ~RasterizerOpenGL() override;
193 194
194 void InitObjects() override;
195 void Reset() override;
196 void AddTriangle(const Pica::Shader::OutputVertex& v0, 195 void AddTriangle(const Pica::Shader::OutputVertex& v0,
197 const Pica::Shader::OutputVertex& v1, 196 const Pica::Shader::OutputVertex& v1,
198 const Pica::Shader::OutputVertex& v2) override; 197 const Pica::Shader::OutputVertex& v2) override;
199 void DrawTriangles() override; 198 void DrawTriangles() override;
200 void FlushFramebuffer() override;
201 void NotifyPicaRegisterChanged(u32 id) override; 199 void NotifyPicaRegisterChanged(u32 id) override;
200 void FlushAll() override;
202 void FlushRegion(PAddr addr, u32 size) override; 201 void FlushRegion(PAddr addr, u32 size) override;
203 void InvalidateRegion(PAddr addr, u32 size) override; 202 void FlushAndInvalidateRegion(PAddr addr, u32 size) override;
203 bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) override;
204 bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override;
205 bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) override;
204 206
205 /// OpenGL shader generated for a given Pica register state 207 /// OpenGL shader generated for a given Pica register state
206 struct PicaShader { 208 struct PicaShader {
@@ -210,26 +212,6 @@ public:
210 212
211private: 213private:
212 214
213 /// Structure used for storing information about color textures
214 struct TextureInfo {
215 OGLTexture texture;
216 GLsizei width;
217 GLsizei height;
218 Pica::Regs::ColorFormat format;
219 GLenum gl_format;
220 GLenum gl_type;
221 };
222
223 /// Structure used for storing information about depth textures
224 struct DepthTextureInfo {
225 OGLTexture texture;
226 GLsizei width;
227 GLsizei height;
228 Pica::Regs::DepthFormat format;
229 GLenum gl_format;
230 GLenum gl_type;
231 };
232
233 struct SamplerInfo { 215 struct SamplerInfo {
234 using TextureConfig = Pica::Regs::TextureConfig; 216 using TextureConfig = Pica::Regs::TextureConfig;
235 217
@@ -311,18 +293,9 @@ private:
311 static_assert(sizeof(UniformData) == 0x310, "The size of the UniformData structure has changed, update the structure in the shader"); 293 static_assert(sizeof(UniformData) == 0x310, "The size of the UniformData structure has changed, update the structure in the shader");
312 static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec"); 294 static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec");
313 295
314 /// Reconfigure the OpenGL color texture to use the given format and dimensions
315 void ReconfigureColorTexture(TextureInfo& texture, Pica::Regs::ColorFormat format, u32 width, u32 height);
316
317 /// Reconfigure the OpenGL depth texture to use the given format and dimensions
318 void ReconfigureDepthTexture(DepthTextureInfo& texture, Pica::Regs::DepthFormat format, u32 width, u32 height);
319
320 /// Sets the OpenGL shader in accordance with the current PICA register state 296 /// Sets the OpenGL shader in accordance with the current PICA register state
321 void SetShader(); 297 void SetShader();
322 298
323 /// Syncs the state and contents of the OpenGL framebuffer to match the current PICA framebuffer
324 void SyncFramebuffer();
325
326 /// Syncs the cull mode to match the PICA register 299 /// Syncs the cull mode to match the PICA register
327 void SyncCullMode(); 300 void SyncCullMode();
328 301
@@ -359,72 +332,42 @@ private:
359 /// Syncs the depth test states to match the PICA register 332 /// Syncs the depth test states to match the PICA register
360 void SyncDepthTest(); 333 void SyncDepthTest();
361 334
362 /// Syncs the TEV constant color to match the PICA register
363 void SyncTevConstColor(int tev_index, const Pica::Regs::TevStageConfig& tev_stage);
364
365 /// Syncs the TEV combiner color buffer to match the PICA register 335 /// Syncs the TEV combiner color buffer to match the PICA register
366 void SyncCombinerColor(); 336 void SyncCombinerColor();
367 337
338 /// Syncs the TEV constant color to match the PICA register
339 void SyncTevConstColor(int tev_index, const Pica::Regs::TevStageConfig& tev_stage);
340
368 /// Syncs the lighting global ambient color to match the PICA register 341 /// Syncs the lighting global ambient color to match the PICA register
369 void SyncGlobalAmbient(); 342 void SyncGlobalAmbient();
370 343
371 /// Syncs the lighting lookup tables 344 /// Syncs the lighting lookup tables
372 void SyncLightingLUT(unsigned index); 345 void SyncLightingLUT(unsigned index);
373 346
374 /// Syncs the specified light's diffuse color to match the PICA register
375 void SyncLightDiffuse(int light_index);
376
377 /// Syncs the specified light's ambient color to match the PICA register
378 void SyncLightAmbient(int light_index);
379
380 /// Syncs the specified light's position to match the PICA register
381 void SyncLightPosition(int light_index);
382
383 /// Syncs the specified light's specular 0 color to match the PICA register 347 /// Syncs the specified light's specular 0 color to match the PICA register
384 void SyncLightSpecular0(int light_index); 348 void SyncLightSpecular0(int light_index);
385 349
386 /// Syncs the specified light's specular 1 color to match the PICA register 350 /// Syncs the specified light's specular 1 color to match the PICA register
387 void SyncLightSpecular1(int light_index); 351 void SyncLightSpecular1(int light_index);
388 352
389 /// Syncs the remaining OpenGL drawing state to match the current PICA state 353 /// Syncs the specified light's diffuse color to match the PICA register
390 void SyncDrawState(); 354 void SyncLightDiffuse(int light_index);
391
392 /// Copies the 3DS color framebuffer into the OpenGL color framebuffer texture
393 void ReloadColorBuffer();
394 355
395 /// Copies the 3DS depth framebuffer into the OpenGL depth framebuffer texture 356 /// Syncs the specified light's ambient color to match the PICA register
396 void ReloadDepthBuffer(); 357 void SyncLightAmbient(int light_index);
397 358
398 /** 359 /// Syncs the specified light's position to match the PICA register
399 * Save the current OpenGL color framebuffer to the current PICA framebuffer in 3DS memory 360 void SyncLightPosition(int light_index);
400 * Loads the OpenGL framebuffer textures into temporary buffers
401 * Then copies into the 3DS framebuffer using proper Morton order
402 */
403 void CommitColorBuffer();
404 361
405 /** 362 OpenGLState state;
406 * Save the current OpenGL depth framebuffer to the current PICA framebuffer in 3DS memory
407 * Loads the OpenGL framebuffer textures into temporary buffers
408 * Then copies into the 3DS framebuffer using proper Morton order
409 */
410 void CommitDepthBuffer();
411 363
412 RasterizerCacheOpenGL res_cache; 364 RasterizerCacheOpenGL res_cache;
413 365
414 std::vector<HardwareVertex> vertex_batch; 366 std::vector<HardwareVertex> vertex_batch;
415 367
416 OpenGLState state;
417
418 PAddr cached_fb_color_addr;
419 PAddr cached_fb_depth_addr;
420
421 // Hardware rasterizer
422 std::array<SamplerInfo, 3> texture_samplers;
423 TextureInfo fb_color_texture;
424 DepthTextureInfo fb_depth_texture;
425
426 std::unordered_map<PicaShaderConfig, std::unique_ptr<PicaShader>> shader_cache; 368 std::unordered_map<PicaShaderConfig, std::unique_ptr<PicaShader>> shader_cache;
427 const PicaShader* current_shader = nullptr; 369 const PicaShader* current_shader = nullptr;
370 bool shader_dirty;
428 371
429 struct { 372 struct {
430 UniformData data; 373 UniformData data;
@@ -432,11 +375,12 @@ private:
432 bool dirty; 375 bool dirty;
433 } uniform_block_data; 376 } uniform_block_data;
434 377
378 std::array<SamplerInfo, 3> texture_samplers;
435 OGLVertexArray vertex_array; 379 OGLVertexArray vertex_array;
436 OGLBuffer vertex_buffer; 380 OGLBuffer vertex_buffer;
437 OGLBuffer uniform_buffer; 381 OGLBuffer uniform_buffer;
438 OGLFramebuffer framebuffer; 382 OGLFramebuffer framebuffer;
439 383
440 std::array<OGLTexture, 6> lighting_lut; 384 std::array<OGLTexture, 6> lighting_luts;
441 std::array<std::array<GLvec4, 256>, 6> lighting_lut_data; 385 std::array<std::array<GLvec4, 256>, 6> lighting_lut_data;
442}; 386};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 1323c12e4..55c2fb283 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -2,8 +2,9 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <memory> 5#include <unordered_set>
6 6
7#include "common/emu_window.h"
7#include "common/hash.h" 8#include "common/hash.h"
8#include "common/math_util.h" 9#include "common/math_util.h"
9#include "common/microprofile.h" 10#include "common/microprofile.h"
@@ -12,71 +13,693 @@
12#include "core/memory.h" 13#include "core/memory.h"
13 14
14#include "video_core/debug_utils/debug_utils.h" 15#include "video_core/debug_utils/debug_utils.h"
16#include "video_core/pica_state.h"
15#include "video_core/renderer_opengl/gl_rasterizer_cache.h" 17#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
16#include "video_core/renderer_opengl/pica_to_gl.h" 18#include "video_core/renderer_opengl/pica_to_gl.h"
19#include "video_core/utils.h"
20#include "video_core/video_core.h"
21
22struct FormatTuple {
23 GLint internal_format;
24 GLenum format;
25 GLenum type;
26};
27
28static const std::array<FormatTuple, 5> fb_format_tuples = {{
29 { GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8 }, // RGBA8
30 { GL_RGB8, GL_BGR, GL_UNSIGNED_BYTE }, // RGB8
31 { GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1 }, // RGB5A1
32 { GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5 }, // RGB565
33 { GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4 }, // RGBA4
34}};
35
36static const std::array<FormatTuple, 4> depth_format_tuples = {{
37 { GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT }, // D16
38 {},
39 { GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT }, // D24
40 { GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8 }, // D24S8
41}};
42
43RasterizerCacheOpenGL::RasterizerCacheOpenGL() {
44 transfer_framebuffers[0].Create();
45 transfer_framebuffers[1].Create();
46}
17 47
18RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { 48RasterizerCacheOpenGL::~RasterizerCacheOpenGL() {
19 InvalidateAll(); 49 FlushAll();
20} 50}
21 51
22MICROPROFILE_DEFINE(OpenGL_TextureUpload, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192)); 52static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width, u32 height, u32 bytes_per_pixel, u32 gl_bytes_per_pixel, u8* morton_data, u8* gl_data, bool morton_to_gl) {
53 using PixelFormat = CachedSurface::PixelFormat;
54
55 u8* data_ptrs[2];
56 u32 depth_stencil_shifts[2] = {24, 8};
23 57
24void RasterizerCacheOpenGL::LoadAndBindTexture(OpenGLState &state, unsigned texture_unit, const Pica::DebugUtils::TextureInfo& info) { 58 if (morton_to_gl) {
25 const auto cached_texture = texture_cache.find(info.physical_address); 59 std::swap(depth_stencil_shifts[0], depth_stencil_shifts[1]);
60 }
61
62 if (pixel_format == PixelFormat::D24S8) {
63 for (unsigned y = 0; y < height; ++y) {
64 for (unsigned x = 0; x < width; ++x) {
65 const u32 coarse_y = y & ~7;
66 u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel;
67 u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel;
68
69 data_ptrs[morton_to_gl] = morton_data + morton_offset;
70 data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index];
26 71
27 if (cached_texture != texture_cache.end()) { 72 // Swap depth and stencil value ordering since 3DS does not match OpenGL
28 state.texture_units[texture_unit].texture_2d = cached_texture->second->texture.handle; 73 u32 depth_stencil;
29 state.Apply(); 74 memcpy(&depth_stencil, data_ptrs[1], sizeof(u32));
75 depth_stencil = (depth_stencil << depth_stencil_shifts[0]) | (depth_stencil >> depth_stencil_shifts[1]);
76
77 memcpy(data_ptrs[0], &depth_stencil, sizeof(u32));
78 }
79 }
30 } else { 80 } else {
31 MICROPROFILE_SCOPE(OpenGL_TextureUpload); 81 for (unsigned y = 0; y < height; ++y) {
82 for (unsigned x = 0; x < width; ++x) {
83 const u32 coarse_y = y & ~7;
84 u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel;
85 u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel;
86
87 data_ptrs[morton_to_gl] = morton_data + morton_offset;
88 data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index];
89
90 memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
91 }
92 }
93 }
94}
95
96bool RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, CachedSurface::SurfaceType type, const MathUtil::Rectangle<int>& src_rect, const MathUtil::Rectangle<int>& dst_rect) {
97 using SurfaceType = CachedSurface::SurfaceType;
98
99 OpenGLState cur_state = OpenGLState::GetCurState();
100
101 // Make sure textures aren't bound to texture units, since going to bind them to framebuffer components
102 OpenGLState::ResetTexture(src_tex);
103 OpenGLState::ResetTexture(dst_tex);
104
105 // Keep track of previous framebuffer bindings
106 GLuint old_fbs[2] = { cur_state.draw.read_framebuffer, cur_state.draw.draw_framebuffer };
107 cur_state.draw.read_framebuffer = transfer_framebuffers[0].handle;
108 cur_state.draw.draw_framebuffer = transfer_framebuffers[1].handle;
109 cur_state.Apply();
110
111 u32 buffers = 0;
112
113 if (type == SurfaceType::Color || type == SurfaceType::Texture) {
114 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex, 0);
115 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
116
117 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex, 0);
118 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
119
120 buffers = GL_COLOR_BUFFER_BIT;
121 } else if (type == SurfaceType::Depth) {
122 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
123 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0);
124 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
125
126 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
127 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0);
128 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
129
130 buffers = GL_DEPTH_BUFFER_BIT;
131 } else if (type == SurfaceType::DepthStencil) {
132 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
133 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0);
134
135 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
136 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0);
137
138 buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
139 }
140
141 if (OpenGLState::CheckFBStatus(GL_READ_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
142 return false;
143 }
144
145 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
146 return false;
147 }
148
149 glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
150 dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom,
151 buffers, buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST);
152
153 // Restore previous framebuffer bindings
154 cur_state.draw.read_framebuffer = old_fbs[0];
155 cur_state.draw.draw_framebuffer = old_fbs[1];
156 cur_state.Apply();
157
158 return true;
159}
160
161bool RasterizerCacheOpenGL::TryBlitSurfaces(CachedSurface* src_surface, const MathUtil::Rectangle<int>& src_rect, CachedSurface* dst_surface, const MathUtil::Rectangle<int>& dst_rect) {
162 using SurfaceType = CachedSurface::SurfaceType;
163
164 if (!CachedSurface::CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format)) {
165 return false;
166 }
167
168 return BlitTextures(src_surface->texture.handle, dst_surface->texture.handle, CachedSurface::GetFormatType(src_surface->pixel_format), src_rect, dst_rect);
169}
170
171static void AllocateSurfaceTexture(GLuint texture, CachedSurface::PixelFormat pixel_format, u32 width, u32 height) {
172 // Allocate an uninitialized texture of appropriate size and format for the surface
173 using SurfaceType = CachedSurface::SurfaceType;
174
175 OpenGLState cur_state = OpenGLState::GetCurState();
176
177 // Keep track of previous texture bindings
178 GLuint old_tex = cur_state.texture_units[0].texture_2d;
179 cur_state.texture_units[0].texture_2d = texture;
180 cur_state.Apply();
181 glActiveTexture(GL_TEXTURE0);
182
183 SurfaceType type = CachedSurface::GetFormatType(pixel_format);
184
185 FormatTuple tuple;
186 if (type == SurfaceType::Color) {
187 ASSERT((size_t)pixel_format < fb_format_tuples.size());
188 tuple = fb_format_tuples[(unsigned int)pixel_format];
189 } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) {
190 size_t tuple_idx = (size_t)pixel_format - 14;
191 ASSERT(tuple_idx < depth_format_tuples.size());
192 tuple = depth_format_tuples[tuple_idx];
193 } else {
194 tuple = { GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE };
195 }
196
197 glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, width, height, 0,
198 tuple.format, tuple.type, nullptr);
199
200 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
201 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
202 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
203 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
204
205 // Restore previous texture bindings
206 cur_state.texture_units[0].texture_2d = old_tex;
207 cur_state.Apply();
208}
209
210MICROPROFILE_DEFINE(OpenGL_SurfaceUpload, "OpenGL", "Surface Upload", MP_RGB(128, 64, 192));
211CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bool match_res_scale, bool load_if_create) {
212 using PixelFormat = CachedSurface::PixelFormat;
213 using SurfaceType = CachedSurface::SurfaceType;
214
215 if (params.addr == 0) {
216 return nullptr;
217 }
218
219 u32 params_size = params.width * params.height * CachedSurface::GetFormatBpp(params.pixel_format) / 8;
220
221 // Check for an exact match in existing surfaces
222 CachedSurface* best_exact_surface = nullptr;
223 float exact_surface_goodness = -1.f;
224
225 auto surface_interval = boost::icl::interval<PAddr>::right_open(params.addr, params.addr + params_size);
226 auto range = surface_cache.equal_range(surface_interval);
227 for (auto it = range.first; it != range.second; ++it) {
228 for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) {
229 CachedSurface* surface = it2->get();
230
231 // Check if the request matches the surface exactly
232 if (params.addr == surface->addr &&
233 params.width == surface->width && params.height == surface->height &&
234 params.pixel_format == surface->pixel_format)
235 {
236 // Make sure optional param-matching criteria are fulfilled
237 bool tiling_match = (params.is_tiled == surface->is_tiled);
238 bool res_scale_match = (params.res_scale_width == surface->res_scale_width && params.res_scale_height == surface->res_scale_height);
239 if (!match_res_scale || res_scale_match) {
240 // Prioritize same-tiling and highest resolution surfaces
241 float match_goodness = (float)tiling_match + surface->res_scale_width * surface->res_scale_height;
242 if (match_goodness > exact_surface_goodness || surface->dirty) {
243 exact_surface_goodness = match_goodness;
244 best_exact_surface = surface;
245 }
246 }
247 }
248 }
249 }
250
251 // Return the best exact surface if found
252 if (best_exact_surface != nullptr) {
253 return best_exact_surface;
254 }
255
256 // No matching surfaces found, so create a new one
257 u8* texture_src_data = Memory::GetPhysicalPointer(params.addr);
258 if (texture_src_data == nullptr) {
259 return nullptr;
260 }
261
262 MICROPROFILE_SCOPE(OpenGL_SurfaceUpload);
263
264 std::shared_ptr<CachedSurface> new_surface = std::make_shared<CachedSurface>();
265
266 new_surface->addr = params.addr;
267 new_surface->size = params_size;
268
269 new_surface->texture.Create();
270 new_surface->width = params.width;
271 new_surface->height = params.height;
272 new_surface->stride = params.stride;
273 new_surface->res_scale_width = params.res_scale_width;
274 new_surface->res_scale_height = params.res_scale_height;
275
276 new_surface->is_tiled = params.is_tiled;
277 new_surface->pixel_format = params.pixel_format;
278 new_surface->dirty = false;
279
280 if (!load_if_create) {
281 // Don't load any data; just allocate the surface's texture
282 AllocateSurfaceTexture(new_surface->texture.handle, new_surface->pixel_format, new_surface->GetScaledWidth(), new_surface->GetScaledHeight());
283 } else {
284 // TODO: Consider attempting subrect match in existing surfaces and direct blit here instead of memory upload below if that's a common scenario in some game
285
286 Memory::RasterizerFlushRegion(params.addr, params_size);
287
288 // Load data from memory to the new surface
289 OpenGLState cur_state = OpenGLState::GetCurState();
290
291 GLuint old_tex = cur_state.texture_units[0].texture_2d;
292 cur_state.texture_units[0].texture_2d = new_surface->texture.handle;
293 cur_state.Apply();
294 glActiveTexture(GL_TEXTURE0);
295
296 glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)new_surface->stride);
297 if (!new_surface->is_tiled) {
298 // TODO: Ensure this will always be a color format, not a depth or other format
299 ASSERT((size_t)new_surface->pixel_format < fb_format_tuples.size());
300 const FormatTuple& tuple = fb_format_tuples[(unsigned int)params.pixel_format];
301
302 glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0,
303 tuple.format, tuple.type, texture_src_data);
304 } else {
305 SurfaceType type = CachedSurface::GetFormatType(new_surface->pixel_format);
306 if (type != SurfaceType::Depth && type != SurfaceType::DepthStencil) {
307 FormatTuple tuple;
308 if ((size_t)params.pixel_format < fb_format_tuples.size()) {
309 tuple = fb_format_tuples[(unsigned int)params.pixel_format];
310 } else {
311 // Texture
312 tuple = { GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE };
313 }
314
315 std::vector<Math::Vec4<u8>> tex_buffer(params.width * params.height);
316
317 Pica::DebugUtils::TextureInfo tex_info;
318 tex_info.width = params.width;
319 tex_info.height = params.height;
320 tex_info.stride = params.width * CachedSurface::GetFormatBpp(params.pixel_format) / 8;
321 tex_info.format = (Pica::Regs::TextureFormat)params.pixel_format;
322 tex_info.physical_address = params.addr;
323
324 for (unsigned y = 0; y < params.height; ++y) {
325 for (unsigned x = 0; x < params.width; ++x) {
326 tex_buffer[x + params.width * y] = Pica::DebugUtils::LookupTexture(texture_src_data, x, params.height - 1 - y, tex_info);
327 }
328 }
329
330 glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0, GL_RGBA, GL_UNSIGNED_BYTE, tex_buffer.data());
331 } else {
332 // Depth/Stencil formats need special treatment since they aren't sampleable using LookupTexture and can't use RGBA format
333 size_t tuple_idx = (size_t)params.pixel_format - 14;
334 ASSERT(tuple_idx < depth_format_tuples.size());
335 const FormatTuple& tuple = depth_format_tuples[tuple_idx];
336
337 u32 bytes_per_pixel = CachedSurface::GetFormatBpp(params.pixel_format) / 8;
338
339 // OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type
340 bool use_4bpp = (params.pixel_format == PixelFormat::D24);
341
342 u32 gl_bytes_per_pixel = use_4bpp ? 4 : bytes_per_pixel;
343
344 std::vector<u8> temp_fb_depth_buffer(params.width * params.height * gl_bytes_per_pixel);
345
346 u8* temp_fb_depth_buffer_ptr = use_4bpp ? temp_fb_depth_buffer.data() + 1 : temp_fb_depth_buffer.data();
347
348 MortonCopyPixels(params.pixel_format, params.width, params.height, bytes_per_pixel, gl_bytes_per_pixel, texture_src_data, temp_fb_depth_buffer_ptr, true);
349
350 glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0,
351 tuple.format, tuple.type, temp_fb_depth_buffer.data());
352 }
353 }
354 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
32 355
33 std::unique_ptr<CachedTexture> new_texture = std::make_unique<CachedTexture>(); 356 // If not 1x scale, blit 1x texture to a new scaled texture and replace texture in surface
357 if (new_surface->res_scale_width != 1.f || new_surface->res_scale_height != 1.f) {
358 OGLTexture scaled_texture;
359 scaled_texture.Create();
34 360
35 new_texture->texture.Create(); 361 AllocateSurfaceTexture(scaled_texture.handle, new_surface->pixel_format, new_surface->GetScaledWidth(), new_surface->GetScaledHeight());
36 state.texture_units[texture_unit].texture_2d = new_texture->texture.handle; 362 BlitTextures(new_surface->texture.handle, scaled_texture.handle, CachedSurface::GetFormatType(new_surface->pixel_format),
37 state.Apply(); 363 MathUtil::Rectangle<int>(0, 0, new_surface->width, new_surface->height),
38 glActiveTexture(GL_TEXTURE0 + texture_unit); 364 MathUtil::Rectangle<int>(0, 0, new_surface->GetScaledWidth(), new_surface->GetScaledHeight()));
39 365
40 u8* texture_src_data = Memory::GetPhysicalPointer(info.physical_address); 366 new_surface->texture.Release();
367 new_surface->texture.handle = scaled_texture.handle;
368 scaled_texture.handle = 0;
369 cur_state.texture_units[0].texture_2d = new_surface->texture.handle;
370 cur_state.Apply();
371 }
372
373 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
374 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
375 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
376 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
377
378 cur_state.texture_units[0].texture_2d = old_tex;
379 cur_state.Apply();
380 }
381
382 Memory::RasterizerMarkRegionCached(new_surface->addr, new_surface->size, 1);
383 surface_cache.add(std::make_pair(boost::icl::interval<PAddr>::right_open(new_surface->addr, new_surface->addr + new_surface->size), std::set<std::shared_ptr<CachedSurface>>({ new_surface })));
384 return new_surface.get();
385}
41 386
42 new_texture->width = info.width; 387CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params, bool match_res_scale, bool load_if_create, MathUtil::Rectangle<int>& out_rect) {
43 new_texture->height = info.height; 388 if (params.addr == 0) {
44 new_texture->size = info.stride * info.height; 389 return nullptr;
45 new_texture->addr = info.physical_address; 390 }
46 new_texture->hash = Common::ComputeHash64(texture_src_data, new_texture->size); 391
392 u32 total_pixels = params.width * params.height;
393 u32 params_size = total_pixels * CachedSurface::GetFormatBpp(params.pixel_format) / 8;
394
395 // Attempt to find encompassing surfaces
396 CachedSurface* best_subrect_surface = nullptr;
397 float subrect_surface_goodness = -1.f;
47 398
48 std::unique_ptr<Math::Vec4<u8>[]> temp_texture_buffer_rgba(new Math::Vec4<u8>[info.width * info.height]); 399 auto surface_interval = boost::icl::interval<PAddr>::right_open(params.addr, params.addr + params_size);
400 auto cache_upper_bound = surface_cache.upper_bound(surface_interval);
401 for (auto it = surface_cache.lower_bound(surface_interval); it != cache_upper_bound; ++it) {
402 for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) {
403 CachedSurface* surface = it2->get();
49 404
50 for (int y = 0; y < info.height; ++y) { 405 // Check if the request is contained in the surface
51 for (int x = 0; x < info.width; ++x) { 406 if (params.addr >= surface->addr &&
52 temp_texture_buffer_rgba[x + info.width * y] = Pica::DebugUtils::LookupTexture(texture_src_data, x, info.height - 1 - y, info); 407 params.addr + params_size - 1 <= surface->addr + surface->size - 1 &&
408 params.pixel_format == surface->pixel_format)
409 {
410 // Make sure optional param-matching criteria are fulfilled
411 bool tiling_match = (params.is_tiled == surface->is_tiled);
412 bool res_scale_match = (params.res_scale_width == surface->res_scale_width && params.res_scale_height == surface->res_scale_height);
413 if (!match_res_scale || res_scale_match) {
414 // Prioritize same-tiling and highest resolution surfaces
415 float match_goodness = (float)tiling_match + surface->res_scale_width * surface->res_scale_height;
416 if (match_goodness > subrect_surface_goodness || surface->dirty) {
417 subrect_surface_goodness = match_goodness;
418 best_subrect_surface = surface;
419 }
420 }
53 } 421 }
54 } 422 }
423 }
424
425 // Return the best subrect surface if found
426 if (best_subrect_surface != nullptr) {
427 unsigned int bytes_per_pixel = (CachedSurface::GetFormatBpp(best_subrect_surface->pixel_format) / 8);
428
429 int x0, y0;
430
431 if (!params.is_tiled) {
432 u32 begin_pixel_index = (params.addr - best_subrect_surface->addr) / bytes_per_pixel;
433 x0 = begin_pixel_index % best_subrect_surface->width;
434 y0 = begin_pixel_index / best_subrect_surface->width;
435
436 out_rect = MathUtil::Rectangle<int>(x0, y0, x0 + params.width, y0 + params.height);
437 } else {
438 u32 bytes_per_tile = 8 * 8 * bytes_per_pixel;
439 u32 tiles_per_row = best_subrect_surface->width / 8;
440
441 u32 begin_tile_index = (params.addr - best_subrect_surface->addr) / bytes_per_tile;
442 x0 = begin_tile_index % tiles_per_row * 8;
443 y0 = begin_tile_index / tiles_per_row * 8;
444
445 // Tiled surfaces are flipped vertically in the rasterizer vs. 3DS memory.
446 out_rect = MathUtil::Rectangle<int>(x0, best_subrect_surface->height - y0, x0 + params.width, best_subrect_surface->height - (y0 + params.height));
447 }
55 448
56 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, info.width, info.height, 0, GL_RGBA, GL_UNSIGNED_BYTE, temp_texture_buffer_rgba.get()); 449 out_rect.left = (int)(out_rect.left * best_subrect_surface->res_scale_width);
450 out_rect.right = (int)(out_rect.right * best_subrect_surface->res_scale_width);
451 out_rect.top = (int)(out_rect.top * best_subrect_surface->res_scale_height);
452 out_rect.bottom = (int)(out_rect.bottom * best_subrect_surface->res_scale_height);
57 453
58 texture_cache.emplace(info.physical_address, std::move(new_texture)); 454 return best_subrect_surface;
59 } 455 }
456
457 // No subrect found - create and return a new surface
458 if (!params.is_tiled) {
459 out_rect = MathUtil::Rectangle<int>(0, 0, (int)(params.width * params.res_scale_width), (int)(params.height * params.res_scale_height));
460 } else {
461 out_rect = MathUtil::Rectangle<int>(0, (int)(params.height * params.res_scale_height), (int)(params.width * params.res_scale_width), 0);
462 }
463
464 return GetSurface(params, match_res_scale, load_if_create);
465}
466
467CachedSurface* RasterizerCacheOpenGL::GetTextureSurface(const Pica::Regs::FullTextureConfig& config) {
468 Pica::DebugUtils::TextureInfo info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config.config, config.format);
469
470 CachedSurface params;
471 params.addr = info.physical_address;
472 params.width = info.width;
473 params.height = info.height;
474 params.is_tiled = true;
475 params.pixel_format = CachedSurface::PixelFormatFromTextureFormat(info.format);
476 return GetSurface(params, false, true);
60} 477}
61 478
62void RasterizerCacheOpenGL::InvalidateInRange(PAddr addr, u32 size, bool ignore_hash) { 479std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> RasterizerCacheOpenGL::GetFramebufferSurfaces(const Pica::Regs::FramebufferConfig& config) {
63 // TODO: Optimize by also inserting upper bound (addr + size) of each texture into the same map and also narrow using lower_bound 480 const auto& regs = Pica::g_state.regs;
64 auto cache_upper_bound = texture_cache.upper_bound(addr + size); 481
482 // Make sur that framebuffers don't overlap if both color and depth are being used
483 u32 fb_area = config.GetWidth() * config.GetHeight();
484 bool framebuffers_overlap = config.GetColorBufferPhysicalAddress() != 0 &&
485 config.GetDepthBufferPhysicalAddress() != 0 &&
486 MathUtil::IntervalsIntersect(config.GetColorBufferPhysicalAddress(), fb_area * GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(config.color_format.Value())),
487 config.GetDepthBufferPhysicalAddress(), fb_area * Pica::Regs::BytesPerDepthPixel(config.depth_format));
488 bool using_color_fb = config.GetColorBufferPhysicalAddress() != 0;
489 bool using_depth_fb = config.GetDepthBufferPhysicalAddress() != 0 && (regs.output_merger.depth_test_enable || regs.output_merger.depth_write_enable || !framebuffers_overlap);
490
491 if (framebuffers_overlap && using_color_fb && using_depth_fb) {
492 LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; overlapping framebuffers not supported!");
493 using_depth_fb = false;
494 }
495
496 // get color and depth surfaces
497 CachedSurface color_params;
498 CachedSurface depth_params;
499 color_params.width = depth_params.width = config.GetWidth();
500 color_params.height = depth_params.height = config.GetHeight();
501 color_params.is_tiled = depth_params.is_tiled = true;
502 if (VideoCore::g_scaled_resolution_enabled) {
503 auto layout = VideoCore::g_emu_window->GetFramebufferLayout();
504
505 // Assume same scaling factor for top and bottom screens
506 color_params.res_scale_width = depth_params.res_scale_width = (float)layout.top_screen.GetWidth() / VideoCore::kScreenTopWidth;
507 color_params.res_scale_height = depth_params.res_scale_height = (float)layout.top_screen.GetHeight() / VideoCore::kScreenTopHeight;
508 }
509
510 color_params.addr = config.GetColorBufferPhysicalAddress();
511 color_params.pixel_format = CachedSurface::PixelFormatFromColorFormat(config.color_format);
512
513 depth_params.addr = config.GetDepthBufferPhysicalAddress();
514 depth_params.pixel_format = CachedSurface::PixelFormatFromDepthFormat(config.depth_format);
515
516 MathUtil::Rectangle<int> color_rect;
517 CachedSurface* color_surface = using_color_fb ? GetSurfaceRect(color_params, true, true, color_rect) : nullptr;
518
519 MathUtil::Rectangle<int> depth_rect;
520 CachedSurface* depth_surface = using_depth_fb ? GetSurfaceRect(depth_params, true, true, depth_rect) : nullptr;
521
522 // Sanity check to make sure found surfaces aren't the same
523 if (using_depth_fb && using_color_fb && color_surface == depth_surface) {
524 LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer surfaces overlap; overlapping surfaces not supported!");
525 using_depth_fb = false;
526 depth_surface = nullptr;
527 }
528
529 MathUtil::Rectangle<int> rect;
65 530
66 for (auto it = texture_cache.begin(); it != cache_upper_bound;) { 531 if (color_surface != nullptr && depth_surface != nullptr && (depth_rect.left != color_rect.left || depth_rect.top != color_rect.top)) {
67 const auto& info = *it->second; 532 // Can't specify separate color and depth viewport offsets in OpenGL, so re-zero both if they don't match
533 if (color_rect.left != 0 || color_rect.top != 0) {
534 color_surface = GetSurface(color_params, true, true);
535 }
68 536
69 // Flush the texture only if the memory region intersects and a change is detected 537 if (depth_rect.left != 0 || depth_rect.top != 0) {
70 if (MathUtil::IntervalsIntersect(addr, size, info.addr, info.size) && 538 depth_surface = GetSurface(depth_params, true, true);
71 (ignore_hash || info.hash != Common::ComputeHash64(Memory::GetPhysicalPointer(info.addr), info.size))) { 539 }
72 540
73 it = texture_cache.erase(it); 541 if (!color_surface->is_tiled) {
542 rect = MathUtil::Rectangle<int>(0, 0, (int)(color_params.width * color_params.res_scale_width), (int)(color_params.height * color_params.res_scale_height));
74 } else { 543 } else {
75 ++it; 544 rect = MathUtil::Rectangle<int>(0, (int)(color_params.height * color_params.res_scale_height), (int)(color_params.width * color_params.res_scale_width), 0);
76 } 545 }
546 } else if (color_surface != nullptr) {
547 rect = color_rect;
548 } else if (depth_surface != nullptr) {
549 rect = depth_rect;
550 } else {
551 rect = MathUtil::Rectangle<int>(0, 0, 0, 0);
77 } 552 }
553
554 return std::make_tuple(color_surface, depth_surface, rect);
78} 555}
79 556
80void RasterizerCacheOpenGL::InvalidateAll() { 557CachedSurface* RasterizerCacheOpenGL::TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config) {
81 texture_cache.clear(); 558 auto surface_interval = boost::icl::interval<PAddr>::right_open(config.GetStartAddress(), config.GetEndAddress());
559 auto range = surface_cache.equal_range(surface_interval);
560 for (auto it = range.first; it != range.second; ++it) {
561 for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) {
562 int bits_per_value = 0;
563 if (config.fill_24bit) {
564 bits_per_value = 24;
565 } else if (config.fill_32bit) {
566 bits_per_value = 32;
567 } else {
568 bits_per_value = 16;
569 }
570
571 CachedSurface* surface = it2->get();
572
573 if (surface->addr == config.GetStartAddress() &&
574 CachedSurface::GetFormatBpp(surface->pixel_format) == bits_per_value &&
575 (surface->width * surface->height * CachedSurface::GetFormatBpp(surface->pixel_format) / 8) == (config.GetEndAddress() - config.GetStartAddress()))
576 {
577 return surface;
578 }
579 }
580 }
581
582 return nullptr;
583}
584
585MICROPROFILE_DEFINE(OpenGL_SurfaceDownload, "OpenGL", "Surface Download", MP_RGB(128, 192, 64));
586void RasterizerCacheOpenGL::FlushSurface(CachedSurface* surface) {
587 using PixelFormat = CachedSurface::PixelFormat;
588 using SurfaceType = CachedSurface::SurfaceType;
589
590 if (!surface->dirty) {
591 return;
592 }
593
594 MICROPROFILE_SCOPE(OpenGL_SurfaceDownload);
595
596 u8* dst_buffer = Memory::GetPhysicalPointer(surface->addr);
597 if (dst_buffer == nullptr) {
598 return;
599 }
600
601 OpenGLState cur_state = OpenGLState::GetCurState();
602 GLuint old_tex = cur_state.texture_units[0].texture_2d;
603
604 OGLTexture unscaled_tex;
605 GLuint texture_to_flush = surface->texture.handle;
606
607 // If not 1x scale, blit scaled texture to a new 1x texture and use that to flush
608 if (surface->res_scale_width != 1.f || surface->res_scale_height != 1.f) {
609 unscaled_tex.Create();
610
611 AllocateSurfaceTexture(unscaled_tex.handle, surface->pixel_format, surface->width, surface->height);
612 BlitTextures(surface->texture.handle, unscaled_tex.handle, CachedSurface::GetFormatType(surface->pixel_format),
613 MathUtil::Rectangle<int>(0, 0, surface->GetScaledWidth(), surface->GetScaledHeight()),
614 MathUtil::Rectangle<int>(0, 0, surface->width, surface->height));
615
616 texture_to_flush = unscaled_tex.handle;
617 }
618
619 cur_state.texture_units[0].texture_2d = texture_to_flush;
620 cur_state.Apply();
621 glActiveTexture(GL_TEXTURE0);
622
623 glPixelStorei(GL_PACK_ROW_LENGTH, (GLint)surface->stride);
624 if (!surface->is_tiled) {
625 // TODO: Ensure this will always be a color format, not a depth or other format
626 ASSERT((size_t)surface->pixel_format < fb_format_tuples.size());
627 const FormatTuple& tuple = fb_format_tuples[(unsigned int)surface->pixel_format];
628
629 glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, dst_buffer);
630 } else {
631 SurfaceType type = CachedSurface::GetFormatType(surface->pixel_format);
632 if (type != SurfaceType::Depth && type != SurfaceType::DepthStencil) {
633 ASSERT((size_t)surface->pixel_format < fb_format_tuples.size());
634 const FormatTuple& tuple = fb_format_tuples[(unsigned int)surface->pixel_format];
635
636 u32 bytes_per_pixel = CachedSurface::GetFormatBpp(surface->pixel_format) / 8;
637
638 std::vector<u8> temp_gl_buffer(surface->width * surface->height * bytes_per_pixel);
639
640 glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data());
641
642 // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion is necessary.
643 MortonCopyPixels(surface->pixel_format, surface->width, surface->height, bytes_per_pixel, bytes_per_pixel, dst_buffer, temp_gl_buffer.data(), false);
644 } else {
645 // Depth/Stencil formats need special treatment since they aren't sampleable using LookupTexture and can't use RGBA format
646 size_t tuple_idx = (size_t)surface->pixel_format - 14;
647 ASSERT(tuple_idx < depth_format_tuples.size());
648 const FormatTuple& tuple = depth_format_tuples[tuple_idx];
649
650 u32 bytes_per_pixel = CachedSurface::GetFormatBpp(surface->pixel_format) / 8;
651
652 // OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type
653 bool use_4bpp = (surface->pixel_format == PixelFormat::D24);
654
655 u32 gl_bytes_per_pixel = use_4bpp ? 4 : bytes_per_pixel;
656
657 std::vector<u8> temp_gl_buffer(surface->width * surface->height * gl_bytes_per_pixel);
658
659 glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data());
660
661 u8* temp_gl_buffer_ptr = use_4bpp ? temp_gl_buffer.data() + 1 : temp_gl_buffer.data();
662
663 MortonCopyPixels(surface->pixel_format, surface->width, surface->height, bytes_per_pixel, gl_bytes_per_pixel, dst_buffer, temp_gl_buffer_ptr, false);
664 }
665 }
666 glPixelStorei(GL_PACK_ROW_LENGTH, 0);
667
668 surface->dirty = false;
669
670 cur_state.texture_units[0].texture_2d = old_tex;
671 cur_state.Apply();
672}
673
674void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, bool invalidate) {
675 if (size == 0) {
676 return;
677 }
678
679 // Gather up unique surfaces that touch the region
680 std::unordered_set<std::shared_ptr<CachedSurface>> touching_surfaces;
681
682 auto surface_interval = boost::icl::interval<PAddr>::right_open(addr, addr + size);
683 auto cache_upper_bound = surface_cache.upper_bound(surface_interval);
684 for (auto it = surface_cache.lower_bound(surface_interval); it != cache_upper_bound; ++it) {
685 std::copy_if(it->second.begin(), it->second.end(), std::inserter(touching_surfaces, touching_surfaces.end()),
686 [skip_surface](std::shared_ptr<CachedSurface> surface) { return (surface.get() != skip_surface); });
687 }
688
689 // Flush and invalidate surfaces
690 for (auto surface : touching_surfaces) {
691 FlushSurface(surface.get());
692 if (invalidate) {
693 Memory::RasterizerMarkRegionCached(surface->addr, surface->size, -1);
694 surface_cache.subtract(std::make_pair(boost::icl::interval<PAddr>::right_open(surface->addr, surface->addr + surface->size), std::set<std::shared_ptr<CachedSurface>>({ surface })));
695 }
696 }
697}
698
699void RasterizerCacheOpenGL::FlushAll() {
700 for (auto& surfaces : surface_cache) {
701 for (auto& surface : surfaces.second) {
702 FlushSurface(surface.get());
703 }
704 }
82} 705}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index b69651427..893d51138 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -6,38 +6,211 @@
6 6
7#include <map> 7#include <map>
8#include <memory> 8#include <memory>
9#include <set>
10
11#include <boost/icl/interval_map.hpp>
12
13#include "common/math_util.h"
14
15#include "core/hw/gpu.h"
9 16
10#include "video_core/pica.h" 17#include "video_core/pica.h"
11#include "video_core/debug_utils/debug_utils.h" 18#include "video_core/debug_utils/debug_utils.h"
12#include "video_core/renderer_opengl/gl_resource_manager.h" 19#include "video_core/renderer_opengl/gl_resource_manager.h"
13#include "video_core/renderer_opengl/gl_state.h" 20#include "video_core/renderer_opengl/gl_state.h"
14 21
22struct CachedSurface;
23
24using SurfaceCache = boost::icl::interval_map<PAddr, std::set<std::shared_ptr<CachedSurface>>>;
25
26struct CachedSurface {
27 enum class PixelFormat {
28 // First 5 formats are shared between textures and color buffers
29 RGBA8 = 0,
30 RGB8 = 1,
31 RGB5A1 = 2,
32 RGB565 = 3,
33 RGBA4 = 4,
34
35 // Texture-only formats
36 IA8 = 5,
37 RG8 = 6,
38 I8 = 7,
39 A8 = 8,
40 IA4 = 9,
41 I4 = 10,
42 A4 = 11,
43 ETC1 = 12,
44 ETC1A4 = 13,
45
46 // Depth buffer-only formats
47 D16 = 14,
48 // gap
49 D24 = 16,
50 D24S8 = 17,
51
52 Invalid = 255,
53 };
54
55 enum class SurfaceType {
56 Color = 0,
57 Texture = 1,
58 Depth = 2,
59 DepthStencil = 3,
60 Invalid = 4,
61 };
62
63 static unsigned int GetFormatBpp(CachedSurface::PixelFormat format) {
64 static const std::array<unsigned int, 18> bpp_table = {
65 32, // RGBA8
66 24, // RGB8
67 16, // RGB5A1
68 16, // RGB565
69 16, // RGBA4
70 16, // IA8
71 16, // RG8
72 8, // I8
73 8, // A8
74 8, // IA4
75 4, // I4
76 4, // A4
77 4, // ETC1
78 8, // ETC1A4
79 16, // D16
80 0,
81 24, // D24
82 32, // D24S8
83 };
84
85 ASSERT((unsigned int)format < ARRAY_SIZE(bpp_table));
86 return bpp_table[(unsigned int)format];
87 }
88
89 static PixelFormat PixelFormatFromTextureFormat(Pica::Regs::TextureFormat format) {
90 return ((unsigned int)format < 14) ? (PixelFormat)format : PixelFormat::Invalid;
91 }
92
93 static PixelFormat PixelFormatFromColorFormat(Pica::Regs::ColorFormat format) {
94 return ((unsigned int)format < 5) ? (PixelFormat)format : PixelFormat::Invalid;
95 }
96
97 static PixelFormat PixelFormatFromDepthFormat(Pica::Regs::DepthFormat format) {
98 return ((unsigned int)format < 4) ? (PixelFormat)((unsigned int)format + 14) : PixelFormat::Invalid;
99 }
100
101 static PixelFormat PixelFormatFromGPUPixelFormat(GPU::Regs::PixelFormat format) {
102 switch (format) {
103 // RGB565 and RGB5A1 are switched in PixelFormat compared to ColorFormat
104 case GPU::Regs::PixelFormat::RGB565:
105 return PixelFormat::RGB565;
106 case GPU::Regs::PixelFormat::RGB5A1:
107 return PixelFormat::RGB5A1;
108 default:
109 return ((unsigned int)format < 5) ? (PixelFormat)format : PixelFormat::Invalid;
110 }
111 }
112
113 static bool CheckFormatsBlittable(PixelFormat pixel_format_a, PixelFormat pixel_format_b) {
114 SurfaceType a_type = GetFormatType(pixel_format_a);
115 SurfaceType b_type = GetFormatType(pixel_format_b);
116
117 if ((a_type == SurfaceType::Color || a_type == SurfaceType::Texture) && (b_type == SurfaceType::Color || b_type == SurfaceType::Texture)) {
118 return true;
119 }
120
121 if (a_type == SurfaceType::Depth && b_type == SurfaceType::Depth) {
122 return true;
123 }
124
125 if (a_type == SurfaceType::DepthStencil && b_type == SurfaceType::DepthStencil) {
126 return true;
127 }
128
129 return false;
130 }
131
132 static SurfaceType GetFormatType(PixelFormat pixel_format) {
133 if ((unsigned int)pixel_format < 5) {
134 return SurfaceType::Color;
135 }
136
137 if ((unsigned int)pixel_format < 14) {
138 return SurfaceType::Texture;
139 }
140
141 if (pixel_format == PixelFormat::D16 || pixel_format == PixelFormat::D24) {
142 return SurfaceType::Depth;
143 }
144
145 if (pixel_format == PixelFormat::D24S8) {
146 return SurfaceType::DepthStencil;
147 }
148
149 return SurfaceType::Invalid;
150 }
151
152 u32 GetScaledWidth() const {
153 return (u32)(width * res_scale_width);
154 }
155
156 u32 GetScaledHeight() const {
157 return (u32)(height * res_scale_height);
158 }
159
160 PAddr addr;
161 u32 size;
162
163 PAddr min_valid;
164 PAddr max_valid;
165
166 OGLTexture texture;
167 u32 width;
168 u32 height;
169 u32 stride = 0;
170 float res_scale_width = 1.f;
171 float res_scale_height = 1.f;
172
173 bool is_tiled;
174 PixelFormat pixel_format;
175 bool dirty;
176};
177
15class RasterizerCacheOpenGL : NonCopyable { 178class RasterizerCacheOpenGL : NonCopyable {
16public: 179public:
180 RasterizerCacheOpenGL();
17 ~RasterizerCacheOpenGL(); 181 ~RasterizerCacheOpenGL();
18 182
183 /// Blits one texture to another
184 bool BlitTextures(GLuint src_tex, GLuint dst_tex, CachedSurface::SurfaceType type, const MathUtil::Rectangle<int>& src_rect, const MathUtil::Rectangle<int>& dst_rect);
185
186 /// Attempt to blit one surface's texture to another
187 bool TryBlitSurfaces(CachedSurface* src_surface, const MathUtil::Rectangle<int>& src_rect, CachedSurface* dst_surface, const MathUtil::Rectangle<int>& dst_rect);
188
19 /// Loads a texture from 3DS memory to OpenGL and caches it (if not already cached) 189 /// Loads a texture from 3DS memory to OpenGL and caches it (if not already cached)
20 void LoadAndBindTexture(OpenGLState &state, unsigned texture_unit, const Pica::DebugUtils::TextureInfo& info); 190 CachedSurface* GetSurface(const CachedSurface& params, bool match_res_scale, bool load_if_create);
21 191
22 void LoadAndBindTexture(OpenGLState &state, unsigned texture_unit, const Pica::Regs::FullTextureConfig& config) { 192 /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from 3DS memory to OpenGL and caches it (if not already cached)
23 LoadAndBindTexture(state, texture_unit, Pica::DebugUtils::TextureInfo::FromPicaRegister(config.config, config.format)); 193 CachedSurface* GetSurfaceRect(const CachedSurface& params, bool match_res_scale, bool load_if_create, MathUtil::Rectangle<int>& out_rect);
24 }
25 194
26 /// Invalidate any cached resource intersecting the specified region. 195 /// Gets a surface based on the texture configuration
27 void InvalidateInRange(PAddr addr, u32 size, bool ignore_hash = false); 196 CachedSurface* GetTextureSurface(const Pica::Regs::FullTextureConfig& config);
28 197
29 /// Invalidate all cached OpenGL resources tracked by this cache manager 198 /// Gets the color and depth surfaces and rect (resolution scaled) based on the framebuffer configuration
30 void InvalidateAll(); 199 std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> GetFramebufferSurfaces(const Pica::Regs::FramebufferConfig& config);
31 200
32private: 201 /// Attempt to get a surface that exactly matches the fill region and format
33 struct CachedTexture { 202 CachedSurface* TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config);
34 OGLTexture texture; 203
35 GLuint width; 204 /// Write the surface back to memory
36 GLuint height; 205 void FlushSurface(CachedSurface* surface);
37 u32 size;
38 u64 hash;
39 PAddr addr;
40 };
41 206
42 std::map<PAddr, std::unique_ptr<CachedTexture>> texture_cache; 207 /// Write any cached resources overlapping the region back to memory (if dirty) and optionally invalidate them in the cache
208 void FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, bool invalidate);
209
210 /// Flush all cached resources tracked by this cache manager
211 void FlushAll();
212
213private:
214 SurfaceCache surface_cache;
215 OGLFramebuffer transfer_framebuffers[2];
43}; 216};
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index ee4b54ab9..646b4eaaf 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -198,6 +198,9 @@ static void AppendColorCombiner(std::string& out, TevStageConfig::Operation oper
198 case Operation::AddThenMultiply: 198 case Operation::AddThenMultiply:
199 out += "min(" + variable_name + "[0] + " + variable_name + "[1], vec3(1.0)) * " + variable_name + "[2]"; 199 out += "min(" + variable_name + "[0] + " + variable_name + "[1], vec3(1.0)) * " + variable_name + "[2]";
200 break; 200 break;
201 case Operation::Dot3_RGB:
202 out += "vec3(dot(" + variable_name + "[0] - vec3(0.5), " + variable_name + "[1] - vec3(0.5)) * 4.0)";
203 break;
201 default: 204 default:
202 out += "vec3(0.0)"; 205 out += "vec3(0.0)";
203 LOG_CRITICAL(Render_OpenGL, "Unknown color combiner operation: %u", operation); 206 LOG_CRITICAL(Render_OpenGL, "Unknown color combiner operation: %u", operation);
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 08e4d0b54..f04bdd8c5 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "video_core/pica.h" 5#include "video_core/pica.h"
6#include "video_core/renderer_opengl/gl_resource_manager.h"
6#include "video_core/renderer_opengl/gl_state.h" 7#include "video_core/renderer_opengl/gl_state.h"
7 8
8OpenGLState OpenGLState::cur_state; 9OpenGLState OpenGLState::cur_state;
@@ -48,17 +49,19 @@ OpenGLState::OpenGLState() {
48 texture_unit.sampler = 0; 49 texture_unit.sampler = 0;
49 } 50 }
50 51
51 for (auto& lut : lighting_lut) { 52 for (auto& lut : lighting_luts) {
52 lut.texture_1d = 0; 53 lut.texture_1d = 0;
53 } 54 }
54 55
55 draw.framebuffer = 0; 56 draw.read_framebuffer = 0;
57 draw.draw_framebuffer = 0;
56 draw.vertex_array = 0; 58 draw.vertex_array = 0;
57 draw.vertex_buffer = 0; 59 draw.vertex_buffer = 0;
60 draw.uniform_buffer = 0;
58 draw.shader_program = 0; 61 draw.shader_program = 0;
59} 62}
60 63
61void OpenGLState::Apply() { 64void OpenGLState::Apply() const {
62 // Culling 65 // Culling
63 if (cull.enabled != cur_state.cull.enabled) { 66 if (cull.enabled != cur_state.cull.enabled) {
64 if (cull.enabled) { 67 if (cull.enabled) {
@@ -175,16 +178,19 @@ void OpenGLState::Apply() {
175 } 178 }
176 179
177 // Lighting LUTs 180 // Lighting LUTs
178 for (unsigned i = 0; i < ARRAY_SIZE(lighting_lut); ++i) { 181 for (unsigned i = 0; i < ARRAY_SIZE(lighting_luts); ++i) {
179 if (lighting_lut[i].texture_1d != cur_state.lighting_lut[i].texture_1d) { 182 if (lighting_luts[i].texture_1d != cur_state.lighting_luts[i].texture_1d) {
180 glActiveTexture(GL_TEXTURE3 + i); 183 glActiveTexture(GL_TEXTURE3 + i);
181 glBindTexture(GL_TEXTURE_1D, lighting_lut[i].texture_1d); 184 glBindTexture(GL_TEXTURE_1D, lighting_luts[i].texture_1d);
182 } 185 }
183 } 186 }
184 187
185 // Framebuffer 188 // Framebuffer
186 if (draw.framebuffer != cur_state.draw.framebuffer) { 189 if (draw.read_framebuffer != cur_state.draw.read_framebuffer) {
187 glBindFramebuffer(GL_FRAMEBUFFER, draw.framebuffer); 190 glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
191 }
192 if (draw.draw_framebuffer != cur_state.draw.draw_framebuffer) {
193 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer);
188 } 194 }
189 195
190 // Vertex array 196 // Vertex array
@@ -210,45 +216,58 @@ void OpenGLState::Apply() {
210 cur_state = *this; 216 cur_state = *this;
211} 217}
212 218
213void OpenGLState::ResetTexture(GLuint id) { 219GLenum OpenGLState::CheckFBStatus(GLenum target) {
220 GLenum fb_status = glCheckFramebufferStatus(target);
221 if (fb_status != GL_FRAMEBUFFER_COMPLETE) {
222 const char* fb_description = (target == GL_READ_FRAMEBUFFER ? "READ" : (target == GL_DRAW_FRAMEBUFFER ? "DRAW" : "UNK"));
223 LOG_CRITICAL(Render_OpenGL, "OpenGL %s framebuffer check failed, status %X", fb_description, fb_status);
224 }
225
226 return fb_status;
227}
228
229void OpenGLState::ResetTexture(GLuint handle) {
214 for (auto& unit : cur_state.texture_units) { 230 for (auto& unit : cur_state.texture_units) {
215 if (unit.texture_2d == id) { 231 if (unit.texture_2d == handle) {
216 unit.texture_2d = 0; 232 unit.texture_2d = 0;
217 } 233 }
218 } 234 }
219} 235}
220 236
221void OpenGLState::ResetSampler(GLuint id) { 237void OpenGLState::ResetSampler(GLuint handle) {
222 for (auto& unit : cur_state.texture_units) { 238 for (auto& unit : cur_state.texture_units) {
223 if (unit.sampler == id) { 239 if (unit.sampler == handle) {
224 unit.sampler = 0; 240 unit.sampler = 0;
225 } 241 }
226 } 242 }
227} 243}
228 244
229void OpenGLState::ResetProgram(GLuint id) { 245void OpenGLState::ResetProgram(GLuint handle) {
230 if (cur_state.draw.shader_program == id) { 246 if (cur_state.draw.shader_program == handle) {
231 cur_state.draw.shader_program = 0; 247 cur_state.draw.shader_program = 0;
232 } 248 }
233} 249}
234 250
235void OpenGLState::ResetBuffer(GLuint id) { 251void OpenGLState::ResetBuffer(GLuint handle) {
236 if (cur_state.draw.vertex_buffer == id) { 252 if (cur_state.draw.vertex_buffer == handle) {
237 cur_state.draw.vertex_buffer = 0; 253 cur_state.draw.vertex_buffer = 0;
238 } 254 }
239 if (cur_state.draw.uniform_buffer == id) { 255 if (cur_state.draw.uniform_buffer == handle) {
240 cur_state.draw.uniform_buffer = 0; 256 cur_state.draw.uniform_buffer = 0;
241 } 257 }
242} 258}
243 259
244void OpenGLState::ResetVertexArray(GLuint id) { 260void OpenGLState::ResetVertexArray(GLuint handle) {
245 if (cur_state.draw.vertex_array == id) { 261 if (cur_state.draw.vertex_array == handle) {
246 cur_state.draw.vertex_array = 0; 262 cur_state.draw.vertex_array = 0;
247 } 263 }
248} 264}
249 265
250void OpenGLState::ResetFramebuffer(GLuint id) { 266void OpenGLState::ResetFramebuffer(GLuint handle) {
251 if (cur_state.draw.framebuffer == id) { 267 if (cur_state.draw.read_framebuffer == handle) {
252 cur_state.draw.framebuffer = 0; 268 cur_state.draw.read_framebuffer = 0;
269 }
270 if (cur_state.draw.draw_framebuffer == handle) {
271 cur_state.draw.draw_framebuffer = 0;
253 } 272 }
254} 273}
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index e848058d7..0f72e9004 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -5,6 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include <glad/glad.h> 7#include <glad/glad.h>
8#include <memory>
8 9
9class OpenGLState { 10class OpenGLState {
10public: 11public:
@@ -63,15 +64,15 @@ public:
63 64
64 struct { 65 struct {
65 GLuint texture_1d; // GL_TEXTURE_BINDING_1D 66 GLuint texture_1d; // GL_TEXTURE_BINDING_1D
66 } lighting_lut[6]; 67 } lighting_luts[6];
67 68
68 struct { 69 struct {
69 GLuint framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING 70 GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING
71 GLuint draw_framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING
70 GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING 72 GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING
71 GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING 73 GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING
72 GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING 74 GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING
73 GLuint shader_program; // GL_CURRENT_PROGRAM 75 GLuint shader_program; // GL_CURRENT_PROGRAM
74 bool shader_dirty;
75 } draw; 76 } draw;
76 77
77 OpenGLState(); 78 OpenGLState();
@@ -82,14 +83,18 @@ public:
82 } 83 }
83 84
84 /// Apply this state as the current OpenGL state 85 /// Apply this state as the current OpenGL state
85 void Apply(); 86 void Apply() const;
86 87
87 static void ResetTexture(GLuint id); 88 /// Check the status of the current OpenGL read or draw framebuffer configuration
88 static void ResetSampler(GLuint id); 89 static GLenum CheckFBStatus(GLenum target);
89 static void ResetProgram(GLuint id); 90
90 static void ResetBuffer(GLuint id); 91 /// Resets and unbinds any references to the given resource in the current OpenGL state
91 static void ResetVertexArray(GLuint id); 92 static void ResetTexture(GLuint handle);
92 static void ResetFramebuffer(GLuint id); 93 static void ResetSampler(GLuint handle);
94 static void ResetProgram(GLuint handle);
95 static void ResetBuffer(GLuint handle);
96 static void ResetVertexArray(GLuint handle);
97 static void ResetFramebuffer(GLuint handle);
93 98
94private: 99private:
95 static OpenGLState cur_state; 100 static OpenGLState cur_state;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 11c4d0daf..8f907593f 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -107,7 +107,7 @@ void RendererOpenGL::SwapBuffers() {
107 OpenGLState prev_state = OpenGLState::GetCurState(); 107 OpenGLState prev_state = OpenGLState::GetCurState();
108 state.Apply(); 108 state.Apply();
109 109
110 for(int i : {0, 1}) { 110 for (int i : {0, 1}) {
111 const auto& framebuffer = GPU::g_regs.framebuffer_config[i]; 111 const auto& framebuffer = GPU::g_regs.framebuffer_config[i];
112 112
113 // Main LCD (0): 0x1ED02204, Sub LCD (1): 0x1ED02A04 113 // Main LCD (0): 0x1ED02204, Sub LCD (1): 0x1ED02A04
@@ -117,25 +117,25 @@ void RendererOpenGL::SwapBuffers() {
117 LCD::Read(color_fill.raw, lcd_color_addr); 117 LCD::Read(color_fill.raw, lcd_color_addr);
118 118
119 if (color_fill.is_enabled) { 119 if (color_fill.is_enabled) {
120 LoadColorToActiveGLTexture(color_fill.color_r, color_fill.color_g, color_fill.color_b, textures[i]); 120 LoadColorToActiveGLTexture(color_fill.color_r, color_fill.color_g, color_fill.color_b, screen_infos[i].texture);
121 121
122 // Resize the texture in case the framebuffer size has changed 122 // Resize the texture in case the framebuffer size has changed
123 textures[i].width = 1; 123 screen_infos[i].texture.width = 1;
124 textures[i].height = 1; 124 screen_infos[i].texture.height = 1;
125 } else { 125 } else {
126 if (textures[i].width != (GLsizei)framebuffer.width || 126 if (screen_infos[i].texture.width != (GLsizei)framebuffer.width ||
127 textures[i].height != (GLsizei)framebuffer.height || 127 screen_infos[i].texture.height != (GLsizei)framebuffer.height ||
128 textures[i].format != framebuffer.color_format) { 128 screen_infos[i].texture.format != framebuffer.color_format) {
129 // Reallocate texture if the framebuffer size has changed. 129 // Reallocate texture if the framebuffer size has changed.
130 // This is expected to not happen very often and hence should not be a 130 // This is expected to not happen very often and hence should not be a
131 // performance problem. 131 // performance problem.
132 ConfigureFramebufferTexture(textures[i], framebuffer); 132 ConfigureFramebufferTexture(screen_infos[i].texture, framebuffer);
133 } 133 }
134 LoadFBToActiveGLTexture(framebuffer, textures[i]); 134 LoadFBToScreenInfo(framebuffer, screen_infos[i]);
135 135
136 // Resize the texture in case the framebuffer size has changed 136 // Resize the texture in case the framebuffer size has changed
137 textures[i].width = framebuffer.width; 137 screen_infos[i].texture.width = framebuffer.width;
138 textures[i].height = framebuffer.height; 138 screen_infos[i].texture.height = framebuffer.height;
139 } 139 }
140 } 140 }
141 141
@@ -166,8 +166,8 @@ void RendererOpenGL::SwapBuffers() {
166/** 166/**
167 * Loads framebuffer from emulated memory into the active OpenGL texture. 167 * Loads framebuffer from emulated memory into the active OpenGL texture.
168 */ 168 */
169void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig& framebuffer, 169void RendererOpenGL::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer,
170 const TextureInfo& texture) { 170 ScreenInfo& screen_info) {
171 171
172 const PAddr framebuffer_addr = framebuffer.active_fb == 0 ? 172 const PAddr framebuffer_addr = framebuffer.active_fb == 0 ?
173 framebuffer.address_left1 : framebuffer.address_left2; 173 framebuffer.address_left1 : framebuffer.address_left2;
@@ -177,8 +177,6 @@ void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig&
177 framebuffer_addr, (int)framebuffer.width, 177 framebuffer_addr, (int)framebuffer.width,
178 (int)framebuffer.height, (int)framebuffer.format); 178 (int)framebuffer.height, (int)framebuffer.format);
179 179
180 const u8* framebuffer_data = Memory::GetPhysicalPointer(framebuffer_addr);
181
182 int bpp = GPU::Regs::BytesPerPixel(framebuffer.color_format); 180 int bpp = GPU::Regs::BytesPerPixel(framebuffer.color_format);
183 size_t pixel_stride = framebuffer.stride / bpp; 181 size_t pixel_stride = framebuffer.stride / bpp;
184 182
@@ -189,24 +187,34 @@ void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig&
189 // only allows rows to have a memory alignement of 4. 187 // only allows rows to have a memory alignement of 4.
190 ASSERT(pixel_stride % 4 == 0); 188 ASSERT(pixel_stride % 4 == 0);
191 189
192 state.texture_units[0].texture_2d = texture.handle; 190 if (!Rasterizer()->AccelerateDisplay(framebuffer, framebuffer_addr, pixel_stride, screen_info)) {
193 state.Apply(); 191 // Reset the screen info's display texture to its own permanent texture
192 screen_info.display_texture = screen_info.texture.resource.handle;
193 screen_info.display_texcoords = MathUtil::Rectangle<float>(0.f, 0.f, 1.f, 1.f);
194 194
195 glActiveTexture(GL_TEXTURE0); 195 Memory::RasterizerFlushRegion(framebuffer_addr, framebuffer.stride * framebuffer.height);
196 glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)pixel_stride);
197 196
198 // Update existing texture 197 const u8* framebuffer_data = Memory::GetPhysicalPointer(framebuffer_addr);
199 // TODO: Test what happens on hardware when you change the framebuffer dimensions so that they
200 // differ from the LCD resolution.
201 // TODO: Applications could theoretically crash Citra here by specifying too large
202 // framebuffer sizes. We should make sure that this cannot happen.
203 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height,
204 texture.gl_format, texture.gl_type, framebuffer_data);
205 198
206 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); 199 state.texture_units[0].texture_2d = screen_info.texture.resource.handle;
200 state.Apply();
207 201
208 state.texture_units[0].texture_2d = 0; 202 glActiveTexture(GL_TEXTURE0);
209 state.Apply(); 203 glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)pixel_stride);
204
205 // Update existing texture
206 // TODO: Test what happens on hardware when you change the framebuffer dimensions so that they
207 // differ from the LCD resolution.
208 // TODO: Applications could theoretically crash Citra here by specifying too large
209 // framebuffer sizes. We should make sure that this cannot happen.
210 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height,
211 screen_info.texture.gl_format, screen_info.texture.gl_type, framebuffer_data);
212
213 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
214
215 state.texture_units[0].texture_2d = 0;
216 state.Apply();
217 }
210} 218}
211 219
212/** 220/**
@@ -216,7 +224,7 @@ void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig&
216 */ 224 */
217void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, 225void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b,
218 const TextureInfo& texture) { 226 const TextureInfo& texture) {
219 state.texture_units[0].texture_2d = texture.handle; 227 state.texture_units[0].texture_2d = texture.resource.handle;
220 state.Apply(); 228 state.Apply();
221 229
222 glActiveTexture(GL_TEXTURE0); 230 glActiveTexture(GL_TEXTURE0);
@@ -224,6 +232,9 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color
224 232
225 // Update existing texture 233 // Update existing texture
226 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, framebuffer_data); 234 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, framebuffer_data);
235
236 state.texture_units[0].texture_2d = 0;
237 state.Apply();
227} 238}
228 239
229/** 240/**
@@ -233,20 +244,22 @@ void RendererOpenGL::InitOpenGLObjects() {
233 glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, 0.0f); 244 glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, 0.0f);
234 245
235 // Link shaders and get variable locations 246 // Link shaders and get variable locations
236 program_id = GLShader::LoadProgram(vertex_shader, fragment_shader); 247 shader.Create(vertex_shader, fragment_shader);
237 uniform_modelview_matrix = glGetUniformLocation(program_id, "modelview_matrix"); 248 state.draw.shader_program = shader.handle;
238 uniform_color_texture = glGetUniformLocation(program_id, "color_texture"); 249 state.Apply();
239 attrib_position = glGetAttribLocation(program_id, "vert_position"); 250 uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix");
240 attrib_tex_coord = glGetAttribLocation(program_id, "vert_tex_coord"); 251 uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture");
252 attrib_position = glGetAttribLocation(shader.handle, "vert_position");
253 attrib_tex_coord = glGetAttribLocation(shader.handle, "vert_tex_coord");
241 254
242 // Generate VBO handle for drawing 255 // Generate VBO handle for drawing
243 glGenBuffers(1, &vertex_buffer_handle); 256 vertex_buffer.Create();
244 257
245 // Generate VAO 258 // Generate VAO
246 glGenVertexArrays(1, &vertex_array_handle); 259 vertex_array.Create();
247 260
248 state.draw.vertex_array = vertex_array_handle; 261 state.draw.vertex_array = vertex_array.handle;
249 state.draw.vertex_buffer = vertex_buffer_handle; 262 state.draw.vertex_buffer = vertex_buffer.handle;
250 state.draw.uniform_buffer = 0; 263 state.draw.uniform_buffer = 0;
251 state.Apply(); 264 state.Apply();
252 265
@@ -258,13 +271,13 @@ void RendererOpenGL::InitOpenGLObjects() {
258 glEnableVertexAttribArray(attrib_tex_coord); 271 glEnableVertexAttribArray(attrib_tex_coord);
259 272
260 // Allocate textures for each screen 273 // Allocate textures for each screen
261 for (auto& texture : textures) { 274 for (auto& screen_info : screen_infos) {
262 glGenTextures(1, &texture.handle); 275 screen_info.texture.resource.Create();
263 276
264 // Allocation of storage is deferred until the first frame, when we 277 // Allocation of storage is deferred until the first frame, when we
265 // know the framebuffer size. 278 // know the framebuffer size.
266 279
267 state.texture_units[0].texture_2d = texture.handle; 280 state.texture_units[0].texture_2d = screen_info.texture.resource.handle;
268 state.Apply(); 281 state.Apply();
269 282
270 glActiveTexture(GL_TEXTURE0); 283 glActiveTexture(GL_TEXTURE0);
@@ -273,6 +286,8 @@ void RendererOpenGL::InitOpenGLObjects() {
273 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); 286 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
274 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); 287 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
275 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); 288 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
289
290 screen_info.display_texture = screen_info.texture.resource.handle;
276 } 291 }
277 292
278 state.texture_units[0].texture_2d = 0; 293 state.texture_units[0].texture_2d = 0;
@@ -327,30 +342,38 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
327 UNIMPLEMENTED(); 342 UNIMPLEMENTED();
328 } 343 }
329 344
330 state.texture_units[0].texture_2d = texture.handle; 345 state.texture_units[0].texture_2d = texture.resource.handle;
331 state.Apply(); 346 state.Apply();
332 347
333 glActiveTexture(GL_TEXTURE0); 348 glActiveTexture(GL_TEXTURE0);
334 glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0, 349 glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0,
335 texture.gl_format, texture.gl_type, nullptr); 350 texture.gl_format, texture.gl_type, nullptr);
351
352 state.texture_units[0].texture_2d = 0;
353 state.Apply();
336} 354}
337 355
338/** 356/**
339 * Draws a single texture to the emulator window, rotating the texture to correct for the 3DS's LCD rotation. 357 * Draws a single texture to the emulator window, rotating the texture to correct for the 3DS's LCD rotation.
340 */ 358 */
341void RendererOpenGL::DrawSingleScreenRotated(const TextureInfo& texture, float x, float y, float w, float h) { 359void RendererOpenGL::DrawSingleScreenRotated(const ScreenInfo& screen_info, float x, float y, float w, float h) {
360 auto& texcoords = screen_info.display_texcoords;
361
342 std::array<ScreenRectVertex, 4> vertices = {{ 362 std::array<ScreenRectVertex, 4> vertices = {{
343 ScreenRectVertex(x, y, 1.f, 0.f), 363 ScreenRectVertex(x, y, texcoords.bottom, texcoords.left),
344 ScreenRectVertex(x+w, y, 1.f, 1.f), 364 ScreenRectVertex(x+w, y, texcoords.bottom, texcoords.right),
345 ScreenRectVertex(x, y+h, 0.f, 0.f), 365 ScreenRectVertex(x, y+h, texcoords.top, texcoords.left),
346 ScreenRectVertex(x+w, y+h, 0.f, 1.f), 366 ScreenRectVertex(x+w, y+h, texcoords.top, texcoords.right),
347 }}; 367 }};
348 368
349 state.texture_units[0].texture_2d = texture.handle; 369 state.texture_units[0].texture_2d = screen_info.display_texture;
350 state.Apply(); 370 state.Apply();
351 371
352 glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices.data()); 372 glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices.data());
353 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); 373 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
374
375 state.texture_units[0].texture_2d = 0;
376 state.Apply();
354} 377}
355 378
356/** 379/**
@@ -362,9 +385,6 @@ void RendererOpenGL::DrawScreens() {
362 glViewport(0, 0, layout.width, layout.height); 385 glViewport(0, 0, layout.width, layout.height);
363 glClear(GL_COLOR_BUFFER_BIT); 386 glClear(GL_COLOR_BUFFER_BIT);
364 387
365 state.draw.shader_program = program_id;
366 state.Apply();
367
368 // Set projection matrix 388 // Set projection matrix
369 std::array<GLfloat, 3 * 2> ortho_matrix = MakeOrthographicMatrix((float)layout.width, 389 std::array<GLfloat, 3 * 2> ortho_matrix = MakeOrthographicMatrix((float)layout.width,
370 (float)layout.height); 390 (float)layout.height);
@@ -374,9 +394,9 @@ void RendererOpenGL::DrawScreens() {
374 glActiveTexture(GL_TEXTURE0); 394 glActiveTexture(GL_TEXTURE0);
375 glUniform1i(uniform_color_texture, 0); 395 glUniform1i(uniform_color_texture, 0);
376 396
377 DrawSingleScreenRotated(textures[0], (float)layout.top_screen.left, (float)layout.top_screen.top, 397 DrawSingleScreenRotated(screen_infos[0], (float)layout.top_screen.left, (float)layout.top_screen.top,
378 (float)layout.top_screen.GetWidth(), (float)layout.top_screen.GetHeight()); 398 (float)layout.top_screen.GetWidth(), (float)layout.top_screen.GetHeight());
379 DrawSingleScreenRotated(textures[1], (float)layout.bottom_screen.left,(float)layout.bottom_screen.top, 399 DrawSingleScreenRotated(screen_infos[1], (float)layout.bottom_screen.left,(float)layout.bottom_screen.top,
380 (float)layout.bottom_screen.GetWidth(), (float)layout.bottom_screen.GetHeight()); 400 (float)layout.bottom_screen.GetWidth(), (float)layout.bottom_screen.GetHeight());
381 401
382 m_current_frame++; 402 m_current_frame++;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index fe4d142a5..5ca5255ac 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -11,10 +11,28 @@
11#include "core/hw/gpu.h" 11#include "core/hw/gpu.h"
12 12
13#include "video_core/renderer_base.h" 13#include "video_core/renderer_base.h"
14#include "video_core/renderer_opengl/gl_resource_manager.h"
14#include "video_core/renderer_opengl/gl_state.h" 15#include "video_core/renderer_opengl/gl_state.h"
15 16
16class EmuWindow; 17class EmuWindow;
17 18
19/// Structure used for storing information about the textures for each 3DS screen
20struct TextureInfo {
21 OGLTexture resource;
22 GLsizei width;
23 GLsizei height;
24 GPU::Regs::PixelFormat format;
25 GLenum gl_format;
26 GLenum gl_type;
27};
28
29/// Structure used for storing information about the display target for each 3DS screen
30struct ScreenInfo {
31 GLuint display_texture;
32 MathUtil::Rectangle<float> display_texcoords;
33 TextureInfo texture;
34};
35
18class RendererOpenGL : public RendererBase { 36class RendererOpenGL : public RendererBase {
19public: 37public:
20 38
@@ -37,26 +55,16 @@ public:
37 void ShutDown() override; 55 void ShutDown() override;
38 56
39private: 57private:
40 /// Structure used for storing information about the textures for each 3DS screen
41 struct TextureInfo {
42 GLuint handle;
43 GLsizei width;
44 GLsizei height;
45 GPU::Regs::PixelFormat format;
46 GLenum gl_format;
47 GLenum gl_type;
48 };
49
50 void InitOpenGLObjects(); 58 void InitOpenGLObjects();
51 void ConfigureFramebufferTexture(TextureInfo& texture, 59 void ConfigureFramebufferTexture(TextureInfo& texture,
52 const GPU::Regs::FramebufferConfig& framebuffer); 60 const GPU::Regs::FramebufferConfig& framebuffer);
53 void DrawScreens(); 61 void DrawScreens();
54 void DrawSingleScreenRotated(const TextureInfo& texture, float x, float y, float w, float h); 62 void DrawSingleScreenRotated(const ScreenInfo& screen_info, float x, float y, float w, float h);
55 void UpdateFramerate(); 63 void UpdateFramerate();
56 64
57 // Loads framebuffer from emulated memory into the active OpenGL texture. 65 // Loads framebuffer from emulated memory into the display information structure
58 void LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig& framebuffer, 66 void LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer,
59 const TextureInfo& texture); 67 ScreenInfo& screen_info);
60 // Fills active OpenGL texture with the given RGB color. 68 // Fills active OpenGL texture with the given RGB color.
61 void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, 69 void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b,
62 const TextureInfo& texture); 70 const TextureInfo& texture);
@@ -69,10 +77,10 @@ private:
69 OpenGLState state; 77 OpenGLState state;
70 78
71 // OpenGL object IDs 79 // OpenGL object IDs
72 GLuint vertex_array_handle; 80 OGLVertexArray vertex_array;
73 GLuint vertex_buffer_handle; 81 OGLBuffer vertex_buffer;
74 GLuint program_id; 82 OGLShader shader;
75 std::array<TextureInfo, 2> textures; ///< Textures for top and bottom screens respectively 83 std::array<ScreenInfo, 2> screen_infos; ///< Display information for top and bottom screens respectively
76 // Shader uniform location indices 84 // Shader uniform location indices
77 GLuint uniform_modelview_matrix; 85 GLuint uniform_modelview_matrix;
78 GLuint uniform_color_texture; 86 GLuint uniform_color_texture;
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index 75301accd..043e99190 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -9,7 +9,6 @@
9 9
10#include "common/hash.h" 10#include "common/hash.h"
11#include "common/microprofile.h" 11#include "common/microprofile.h"
12#include "common/profiler.h"
13 12
14#include "video_core/debug_utils/debug_utils.h" 13#include "video_core/debug_utils/debug_utils.h"
15#include "video_core/pica.h" 14#include "video_core/pica.h"
@@ -57,13 +56,11 @@ void Shutdown() {
57#endif // ARCHITECTURE_x86_64 56#endif // ARCHITECTURE_x86_64
58} 57}
59 58
60static Common::Profiling::TimingCategory shader_category("Vertex Shader");
61MICROPROFILE_DEFINE(GPU_VertexShader, "GPU", "Vertex Shader", MP_RGB(50, 50, 240)); 59MICROPROFILE_DEFINE(GPU_VertexShader, "GPU", "Vertex Shader", MP_RGB(50, 50, 240));
62 60
63OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes) { 61OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes) {
64 auto& config = g_state.regs.vs; 62 auto& config = g_state.regs.vs;
65 63
66 Common::Profiling::ScopeTimer timer(shader_category);
67 MICROPROFILE_SCOPE(GPU_VertexShader); 64 MICROPROFILE_SCOPE(GPU_VertexShader);
68 65
69 state.program_counter = config.main_offset; 66 state.program_counter = config.main_offset;
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index 9c5bd97bd..9ce9344d2 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -25,7 +25,7 @@ namespace Pica {
25namespace Shader { 25namespace Shader {
26 26
27struct InputVertex { 27struct InputVertex {
28 Math::Vec4<float24> attr[16]; 28 alignas(16) Math::Vec4<float24> attr[16];
29}; 29};
30 30
31struct OutputVertex { 31struct OutputVertex {
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index b47d3beda..b7747fa42 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -148,7 +148,7 @@ static Instruction GetVertexShaderInstruction(size_t offset) {
148} 148}
149 149
150static void LogCritical(const char* msg) { 150static void LogCritical(const char* msg) {
151 LOG_CRITICAL(HW_GPU, msg); 151 LOG_CRITICAL(HW_GPU, "%s", msg);
152} 152}
153 153
154void JitShader::Compile_Assert(bool condition, const char* msg) { 154void JitShader::Compile_Assert(bool condition, const char* msg) {
@@ -795,6 +795,8 @@ void JitShader::FindReturnOffsets() {
795 case OpCode::Id::CALLU: 795 case OpCode::Id::CALLU:
796 return_offsets.push_back(instr.flow_control.dest_offset + instr.flow_control.num_instructions); 796 return_offsets.push_back(instr.flow_control.dest_offset + instr.flow_control.num_instructions);
797 break; 797 break;
798 default:
799 break;
798 } 800 }
799 } 801 }
800 802
@@ -854,7 +856,7 @@ void JitShader::Compile() {
854 uintptr_t size = reinterpret_cast<uintptr_t>(GetCodePtr()) - reinterpret_cast<uintptr_t>(program); 856 uintptr_t size = reinterpret_cast<uintptr_t>(GetCodePtr()) - reinterpret_cast<uintptr_t>(program);
855 ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!"); 857 ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!");
856 858
857 LOG_DEBUG(HW_GPU, "Compiled shader size=%d", size); 859 LOG_DEBUG(HW_GPU, "Compiled shader size=%lu", size);
858} 860}
859 861
860JitShader::JitShader() { 862JitShader::JitShader() {
diff --git a/src/video_core/swrasterizer.h b/src/video_core/swrasterizer.h
index 9a9a76d7a..090f899bc 100644
--- a/src/video_core/swrasterizer.h
+++ b/src/video_core/swrasterizer.h
@@ -11,16 +11,14 @@
11namespace VideoCore { 11namespace VideoCore {
12 12
13class SWRasterizer : public RasterizerInterface { 13class SWRasterizer : public RasterizerInterface {
14 void InitObjects() override {}
15 void Reset() override {}
16 void AddTriangle(const Pica::Shader::OutputVertex& v0, 14 void AddTriangle(const Pica::Shader::OutputVertex& v0,
17 const Pica::Shader::OutputVertex& v1, 15 const Pica::Shader::OutputVertex& v1,
18 const Pica::Shader::OutputVertex& v2) override; 16 const Pica::Shader::OutputVertex& v2) override;
19 void DrawTriangles() override {} 17 void DrawTriangles() override {}
20 void FlushFramebuffer() override {}
21 void NotifyPicaRegisterChanged(u32 id) override {} 18 void NotifyPicaRegisterChanged(u32 id) override {}
19 void FlushAll() override {}
22 void FlushRegion(PAddr addr, u32 size) override {} 20 void FlushRegion(PAddr addr, u32 size) override {}
23 void InvalidateRegion(PAddr addr, u32 size) override {} 21 void FlushAndInvalidateRegion(PAddr addr, u32 size) override {}
24}; 22};
25 23
26} 24}
diff --git a/src/video_core/vertex_loader.cpp b/src/video_core/vertex_loader.cpp
new file mode 100644
index 000000000..8a3d91896
--- /dev/null
+++ b/src/video_core/vertex_loader.cpp
@@ -0,0 +1,140 @@
1#include <cmath>
2#include <string>
3
4#include "boost/range/algorithm/fill.hpp"
5
6#include "common/assert.h"
7#include "common/alignment.h"
8#include "common/bit_field.h"
9#include "common/common_funcs.h"
10#include "common/common_types.h"
11#include "common/logging/log.h"
12
13#include "core/memory.h"
14
15#include "video_core/debug_utils/debug_utils.h"
16#include "video_core/pica.h"
17#include "video_core/pica_state.h"
18#include "video_core/pica_types.h"
19#include "video_core/vertex_loader.h"
20
21namespace Pica {
22
23void VertexLoader::Setup(const Pica::Regs& regs) {
24 const auto& attribute_config = regs.vertex_attributes;
25 num_total_attributes = attribute_config.GetNumTotalAttributes();
26
27 boost::fill(vertex_attribute_sources, 0xdeadbeef);
28
29 for (int i = 0; i < 16; i++) {
30 vertex_attribute_is_default[i] = attribute_config.IsDefaultAttribute(i);
31 }
32
33 // Setup attribute data from loaders
34 for (int loader = 0; loader < 12; ++loader) {
35 const auto& loader_config = attribute_config.attribute_loaders[loader];
36
37 u32 offset = 0;
38
39 // TODO: What happens if a loader overwrites a previous one's data?
40 for (unsigned component = 0; component < loader_config.component_count; ++component) {
41 if (component >= 12) {
42 LOG_ERROR(HW_GPU, "Overflow in the vertex attribute loader %u trying to load component %u", loader, component);
43 continue;
44 }
45
46 u32 attribute_index = loader_config.GetComponent(component);
47 if (attribute_index < 12) {
48 offset = Common::AlignUp(offset, attribute_config.GetElementSizeInBytes(attribute_index));
49 vertex_attribute_sources[attribute_index] = loader_config.data_offset + offset;
50 vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count);
51 vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index);
52 vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index);
53 offset += attribute_config.GetStride(attribute_index);
54 } else if (attribute_index < 16) {
55 // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively
56 offset = Common::AlignUp(offset, 4);
57 offset += (attribute_index - 11) * 4;
58 } else {
59 UNREACHABLE(); // This is truly unreachable due to the number of bits for each component
60 }
61 }
62 }
63}
64
65void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, DebugUtils::MemoryAccessTracker& memory_accesses) {
66 for (int i = 0; i < num_total_attributes; ++i) {
67 if (vertex_attribute_elements[i] != 0) {
68 // Load per-vertex data from the loader arrays
69 u32 source_addr = base_address + vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex;
70
71 if (g_debug_context && Pica::g_debug_context->recorder) {
72 memory_accesses.AddAccess(source_addr, vertex_attribute_elements[i] * (
73 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4
74 : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1));
75 }
76
77 switch (vertex_attribute_formats[i]) {
78 case Regs::VertexAttributeFormat::BYTE:
79 {
80 const s8* srcdata = reinterpret_cast<const s8*>(Memory::GetPhysicalPointer(source_addr));
81 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
82 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
83 }
84 break;
85 }
86 case Regs::VertexAttributeFormat::UBYTE:
87 {
88 const u8* srcdata = reinterpret_cast<const u8*>(Memory::GetPhysicalPointer(source_addr));
89 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
90 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
91 }
92 break;
93 }
94 case Regs::VertexAttributeFormat::SHORT:
95 {
96 const s16* srcdata = reinterpret_cast<const s16*>(Memory::GetPhysicalPointer(source_addr));
97 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
98 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
99 }
100 break;
101 }
102 case Regs::VertexAttributeFormat::FLOAT:
103 {
104 const float* srcdata = reinterpret_cast<const float*>(Memory::GetPhysicalPointer(source_addr));
105 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
106 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
107 }
108 break;
109 }
110 }
111
112 // Default attribute values set if array elements have < 4 components. This
113 // is *not* carried over from the default attribute settings even if they're
114 // enabled for this attribute.
115 for (unsigned int comp = vertex_attribute_elements[i]; comp < 4; ++comp) {
116 input.attr[i][comp] = comp == 3 ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f);
117 }
118
119 LOG_TRACE(HW_GPU, "Loaded %d components of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f %f %f %f",
120 vertex_attribute_elements[i], i, vertex, index,
121 base_address,
122 vertex_attribute_sources[i],
123 vertex_attribute_strides[i] * vertex,
124 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
125 } else if (vertex_attribute_is_default[i]) {
126 // Load the default attribute if we're configured to do so
127 input.attr[i] = g_state.vs.default_attributes[i];
128 LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)",
129 i, vertex, index,
130 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(),
131 input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
132 } else {
133 // TODO(yuriks): In this case, no data gets loaded and the vertex
134 // remains with the last value it had. This isn't currently maintained
135 // as global state, however, and so won't work in Citra yet.
136 }
137 }
138}
139
140} // namespace Pica \ No newline at end of file
diff --git a/src/video_core/vertex_loader.h b/src/video_core/vertex_loader.h
new file mode 100644
index 000000000..ff42d1596
--- /dev/null
+++ b/src/video_core/vertex_loader.h
@@ -0,0 +1,28 @@
1#pragma once
2
3#include <iterator>
4#include <algorithm>
5
6#include "video_core/pica.h"
7#include "video_core/shader/shader.h"
8#include "video_core/debug_utils/debug_utils.h"
9
10namespace Pica {
11
12class VertexLoader {
13public:
14 void Setup(const Pica::Regs& regs);
15 void LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, DebugUtils::MemoryAccessTracker& memory_accesses);
16
17 int GetNumTotalAttributes() const { return num_total_attributes; }
18
19private:
20 u32 vertex_attribute_sources[16];
21 u32 vertex_attribute_strides[16] = {};
22 Regs::VertexAttributeFormat vertex_attribute_formats[16] = {};
23 u32 vertex_attribute_elements[16] = {};
24 bool vertex_attribute_is_default[16];
25 int num_total_attributes;
26};
27
28} // namespace Pica
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index 256899c89..855286173 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -25,6 +25,7 @@ std::unique_ptr<RendererBase> g_renderer; ///< Renderer plugin
25 25
26std::atomic<bool> g_hw_renderer_enabled; 26std::atomic<bool> g_hw_renderer_enabled;
27std::atomic<bool> g_shader_jit_enabled; 27std::atomic<bool> g_shader_jit_enabled;
28std::atomic<bool> g_scaled_resolution_enabled;
28 29
29/// Initialize the video core 30/// Initialize the video core
30bool Init(EmuWindow* emu_window) { 31bool Init(EmuWindow* emu_window) {
diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h
index bca67fb8c..30267489e 100644
--- a/src/video_core/video_core.h
+++ b/src/video_core/video_core.h
@@ -36,6 +36,7 @@ extern EmuWindow* g_emu_window; ///< Emu window
36// TODO: Wrap these in a user settings struct along with any other graphics settings (often set from qt ui) 36// TODO: Wrap these in a user settings struct along with any other graphics settings (often set from qt ui)
37extern std::atomic<bool> g_hw_renderer_enabled; 37extern std::atomic<bool> g_hw_renderer_enabled;
38extern std::atomic<bool> g_shader_jit_enabled; 38extern std::atomic<bool> g_shader_jit_enabled;
39extern std::atomic<bool> g_scaled_resolution_enabled;
39 40
40/// Start the video core 41/// Start the video core
41void Start(); 42void Start();