summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/audio_core/CMakeLists.txt23
-rw-r--r--src/audio_core/audio_core.cpp46
-rw-r--r--src/audio_core/audio_core.h7
-rw-r--r--src/audio_core/hle/common.h11
-rw-r--r--src/audio_core/hle/dsp.cpp69
-rw-r--r--src/audio_core/hle/dsp.h40
-rw-r--r--src/audio_core/hle/filter.h1
-rw-r--r--src/audio_core/hle/pipe.cpp41
-rw-r--r--src/audio_core/hle/pipe.h16
-rw-r--r--src/audio_core/hle/source.cpp320
-rw-r--r--src/audio_core/hle/source.h144
-rw-r--r--src/audio_core/interpolate.cpp85
-rw-r--r--src/audio_core/interpolate.h41
-rw-r--r--src/audio_core/null_sink.h29
-rw-r--r--src/audio_core/sdl2_sink.cpp126
-rw-r--r--src/audio_core/sdl2_sink.h30
-rw-r--r--src/audio_core/sink.h2
-rw-r--r--src/audio_core/sink_details.cpp25
-rw-r--r--src/audio_core/sink_details.h27
-rw-r--r--src/citra/CMakeLists.txt2
-rw-r--r--src/citra/citra.cpp29
-rw-r--r--src/citra/config.cpp6
-rw-r--r--src/citra/default_ini.h9
-rw-r--r--src/citra/emu_window/emu_window_sdl2.cpp7
-rw-r--r--src/citra_qt/CMakeLists.txt3
-rw-r--r--src/citra_qt/bootmanager.cpp2
-rw-r--r--src/citra_qt/config.cpp10
-rw-r--r--src/citra_qt/configure_general.cpp2
-rw-r--r--src/citra_qt/configure_general.ui7
-rw-r--r--src/citra_qt/debugger/graphics_breakpoints.cpp6
-rw-r--r--src/citra_qt/debugger/graphics_framebuffer.cpp6
-rw-r--r--src/citra_qt/debugger/graphics_tracing.cpp6
-rw-r--r--src/citra_qt/debugger/graphics_vertex_shader.cpp8
-rw-r--r--src/citra_qt/debugger/profiler.cpp39
-rw-r--r--src/citra_qt/debugger/profiler.h3
-rw-r--r--src/citra_qt/game_list.cpp16
-rw-r--r--src/citra_qt/game_list.h2
-rw-r--r--src/citra_qt/game_list_p.h106
-rw-r--r--src/citra_qt/main.cpp20
-rw-r--r--src/citra_qt/util/util.cpp2
-rw-r--r--src/common/CMakeLists.txt1
-rw-r--r--src/common/assert.h2
-rw-r--r--src/common/bit_field.h2
-rw-r--r--src/common/bit_set.h3
-rw-r--r--src/common/code_block.h6
-rw-r--r--src/common/common_funcs.h4
-rw-r--r--src/common/file_util.cpp15
-rw-r--r--src/common/file_util.h6
-rw-r--r--src/common/logging/backend.cpp1
-rw-r--r--src/common/logging/log.h3
-rw-r--r--src/common/microprofile.h4
-rw-r--r--src/common/microprofileui.h3
-rw-r--r--src/common/profiler.cpp82
-rw-r--r--src/common/profiler.h152
-rw-r--r--src/common/profiler_reporting.h27
-rw-r--r--src/common/swap.h68
-rw-r--r--src/common/x64/emitter.h2
-rw-r--r--src/core/arm/dyncom/arm_dyncom.cpp2
-rw-r--r--src/core/arm/dyncom/arm_dyncom_interpreter.cpp7
-rw-r--r--src/core/core.cpp2
-rw-r--r--src/core/gdbstub/gdbstub.cpp30
-rw-r--r--src/core/hle/applets/mii_selector.cpp24
-rw-r--r--src/core/hle/applets/mii_selector.h50
-rw-r--r--src/core/hle/applets/swkbd.cpp20
-rw-r--r--src/core/hle/applets/swkbd.h7
-rw-r--r--src/core/hle/hle.cpp20
-rw-r--r--src/core/hle/hle.h4
-rw-r--r--src/core/hle/kernel/process.h2
-rw-r--r--src/core/hle/kernel/thread.cpp3
-rw-r--r--src/core/hle/result.h2
-rw-r--r--src/core/hle/service/ac_u.cpp26
-rw-r--r--src/core/hle/service/am/am.cpp2
-rw-r--r--src/core/hle/service/apt/apt.h15
-rw-r--r--src/core/hle/service/cfg/cfg.cpp4
-rw-r--r--src/core/hle/service/cfg/cfg.h13
-rw-r--r--src/core/hle/service/dsp_dsp.cpp195
-rw-r--r--src/core/hle/service/dsp_dsp.h19
-rw-r--r--src/core/hle/service/fs/archive.cpp1
-rw-r--r--src/core/hle/service/fs/fs_user.cpp2
-rw-r--r--src/core/hle/service/gsp_gpu.cpp70
-rw-r--r--src/core/hle/service/gsp_gpu.h1
-rw-r--r--src/core/hle/service/ndm/ndm.cpp197
-rw-r--r--src/core/hle/service/ndm/ndm.h216
-rw-r--r--src/core/hle/service/ndm/ndm_u.cpp34
-rw-r--r--src/core/hle/service/soc_u.cpp100
-rw-r--r--src/core/hle/service/y2r_u.cpp490
-rw-r--r--src/core/hle/service/y2r_u.h20
-rw-r--r--src/core/hle/svc.cpp13
-rw-r--r--src/core/hw/gpu.cpp327
-rw-r--r--src/core/hw/gpu.h4
-rw-r--r--src/core/hw/lcd.h2
-rw-r--r--src/core/loader/3dsx.cpp33
-rw-r--r--src/core/loader/3dsx.h9
-rw-r--r--src/core/loader/loader.cpp53
-rw-r--r--src/core/loader/loader.h57
-rw-r--r--src/core/loader/ncch.cpp27
-rw-r--r--src/core/loader/ncch.h7
-rw-r--r--src/core/memory.cpp140
-rw-r--r--src/core/memory.h16
-rw-r--r--src/core/settings.cpp5
-rw-r--r--src/core/settings.h7
-rw-r--r--src/core/tracer/recorder.cpp24
-rw-r--r--src/core/tracer/recorder.h1
-rw-r--r--src/video_core/CMakeLists.txt3
-rw-r--r--src/video_core/clipper.cpp17
-rw-r--r--src/video_core/command_processor.cpp171
-rw-r--r--src/video_core/debug_utils/debug_utils.cpp32
-rw-r--r--src/video_core/debug_utils/debug_utils.h64
-rw-r--r--src/video_core/pica.cpp7
-rw-r--r--src/video_core/pica.h46
-rw-r--r--src/video_core/pica_state.h9
-rw-r--r--src/video_core/pica_types.h1
-rw-r--r--src/video_core/primitive_assembly.cpp3
-rw-r--r--src/video_core/rasterizer.cpp167
-rw-r--r--src/video_core/rasterizer_interface.h31
-rw-r--r--src/video_core/renderer_base.cpp5
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp879
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h358
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp712
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h221
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp188
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.h1
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp68
-rw-r--r--src/video_core/renderer_opengl/gl_state.h26
-rw-r--r--src/video_core/renderer_opengl/pica_to_gl.h7
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp149
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h47
-rw-r--r--src/video_core/shader/shader.cpp83
-rw-r--r--src/video_core/shader/shader.h111
-rw-r--r--src/video_core/shader/shader_interpreter.cpp80
-rw-r--r--src/video_core/shader/shader_interpreter.h4
-rw-r--r--src/video_core/shader/shader_jit_x64.cpp16
-rw-r--r--src/video_core/shader/shader_jit_x64.h5
-rw-r--r--src/video_core/swrasterizer.h12
-rw-r--r--src/video_core/utils.cpp36
-rw-r--r--src/video_core/utils.h27
-rw-r--r--src/video_core/vertex_loader.cpp140
-rw-r--r--src/video_core/vertex_loader.h33
-rw-r--r--src/video_core/video_core.cpp5
-rw-r--r--src/video_core/video_core.h1
142 files changed, 5475 insertions, 2353 deletions
diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt
index 869da5e83..13b5e400e 100644
--- a/src/audio_core/CMakeLists.txt
+++ b/src/audio_core/CMakeLists.txt
@@ -4,6 +4,9 @@ set(SRCS
4 hle/dsp.cpp 4 hle/dsp.cpp
5 hle/filter.cpp 5 hle/filter.cpp
6 hle/pipe.cpp 6 hle/pipe.cpp
7 hle/source.cpp
8 interpolate.cpp
9 sink_details.cpp
7 ) 10 )
8 11
9set(HEADERS 12set(HEADERS
@@ -13,9 +16,27 @@ set(HEADERS
13 hle/dsp.h 16 hle/dsp.h
14 hle/filter.h 17 hle/filter.h
15 hle/pipe.h 18 hle/pipe.h
19 hle/source.h
20 interpolate.h
21 null_sink.h
16 sink.h 22 sink.h
23 sink_details.h
17 ) 24 )
18 25
26include_directories(../../externals/soundtouch/include)
27
28if(SDL2_FOUND)
29 set(SRCS ${SRCS} sdl2_sink.cpp)
30 set(HEADERS ${HEADERS} sdl2_sink.h)
31 include_directories(${SDL2_INCLUDE_DIR})
32endif()
33
19create_directory_groups(${SRCS} ${HEADERS}) 34create_directory_groups(${SRCS} ${HEADERS})
20 35
21add_library(audio_core STATIC ${SRCS} ${HEADERS}) \ No newline at end of file 36add_library(audio_core STATIC ${SRCS} ${HEADERS})
37target_link_libraries(audio_core SoundTouch)
38
39if(SDL2_FOUND)
40 target_link_libraries(audio_core ${SDL2_LIBRARY})
41 set_property(TARGET audio_core APPEND PROPERTY COMPILE_DEFINITIONS HAVE_SDL2)
42endif()
diff --git a/src/audio_core/audio_core.cpp b/src/audio_core/audio_core.cpp
index 894f46990..d42249ebd 100644
--- a/src/audio_core/audio_core.cpp
+++ b/src/audio_core/audio_core.cpp
@@ -2,8 +2,15 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <memory>
6#include <string>
7
5#include "audio_core/audio_core.h" 8#include "audio_core/audio_core.h"
6#include "audio_core/hle/dsp.h" 9#include "audio_core/hle/dsp.h"
10#include "audio_core/hle/pipe.h"
11#include "audio_core/null_sink.h"
12#include "audio_core/sink.h"
13#include "audio_core/sink_details.h"
7 14
8#include "core/core_timing.h" 15#include "core/core_timing.h"
9#include "core/hle/kernel/vm_manager.h" 16#include "core/hle/kernel/vm_manager.h"
@@ -17,17 +24,16 @@ static constexpr u64 audio_frame_ticks = 1310252ull; ///< Units: ARM11 cycles
17 24
18static void AudioTickCallback(u64 /*userdata*/, int cycles_late) { 25static void AudioTickCallback(u64 /*userdata*/, int cycles_late) {
19 if (DSP::HLE::Tick()) { 26 if (DSP::HLE::Tick()) {
20 // HACK: We're not signaling the interrups when they should be, but just firing them all off together. 27 // TODO(merry): Signal all the other interrupts as appropriate.
21 // It should be only (interrupt_id = 2, channel_id = 2) that's signalled here. 28 DSP_DSP::SignalPipeInterrupt(DSP::HLE::DspPipe::Audio);
22 // TODO(merry): Understand when the other interrupts are fired. 29 // HACK(merry): Added to prevent regressions. Will remove soon.
23 DSP_DSP::SignalAllInterrupts(); 30 DSP_DSP::SignalPipeInterrupt(DSP::HLE::DspPipe::Binary);
24 } 31 }
25 32
26 // Reschedule recurrent event 33 // Reschedule recurrent event
27 CoreTiming::ScheduleEvent(audio_frame_ticks - cycles_late, tick_event); 34 CoreTiming::ScheduleEvent(audio_frame_ticks - cycles_late, tick_event);
28} 35}
29 36
30/// Initialise Audio
31void Init() { 37void Init() {
32 DSP::HLE::Init(); 38 DSP::HLE::Init();
33 39
@@ -35,19 +41,39 @@ void Init() {
35 CoreTiming::ScheduleEvent(audio_frame_ticks, tick_event); 41 CoreTiming::ScheduleEvent(audio_frame_ticks, tick_event);
36} 42}
37 43
38/// Add DSP address spaces to Process's address space.
39void AddAddressSpace(Kernel::VMManager& address_space) { 44void AddAddressSpace(Kernel::VMManager& address_space) {
40 auto r0_vma = address_space.MapBackingMemory(DSP::HLE::region0_base, reinterpret_cast<u8*>(&DSP::HLE::g_region0), sizeof(DSP::HLE::SharedMemory), Kernel::MemoryState::IO).MoveFrom(); 45 auto r0_vma = address_space.MapBackingMemory(DSP::HLE::region0_base, reinterpret_cast<u8*>(&DSP::HLE::g_regions[0]), sizeof(DSP::HLE::SharedMemory), Kernel::MemoryState::IO).MoveFrom();
41 address_space.Reprotect(r0_vma, Kernel::VMAPermission::ReadWrite); 46 address_space.Reprotect(r0_vma, Kernel::VMAPermission::ReadWrite);
42 47
43 auto r1_vma = address_space.MapBackingMemory(DSP::HLE::region1_base, reinterpret_cast<u8*>(&DSP::HLE::g_region1), sizeof(DSP::HLE::SharedMemory), Kernel::MemoryState::IO).MoveFrom(); 48 auto r1_vma = address_space.MapBackingMemory(DSP::HLE::region1_base, reinterpret_cast<u8*>(&DSP::HLE::g_regions[1]), sizeof(DSP::HLE::SharedMemory), Kernel::MemoryState::IO).MoveFrom();
44 address_space.Reprotect(r1_vma, Kernel::VMAPermission::ReadWrite); 49 address_space.Reprotect(r1_vma, Kernel::VMAPermission::ReadWrite);
45} 50}
46 51
47/// Shutdown Audio 52void SelectSink(std::string sink_id) {
53 if (sink_id == "auto") {
54 // Auto-select.
55 // g_sink_details is ordered in terms of desirability, with the best choice at the front.
56 const auto& sink_detail = g_sink_details.front();
57 DSP::HLE::SetSink(sink_detail.factory());
58 return;
59 }
60
61 auto iter = std::find_if(g_sink_details.begin(), g_sink_details.end(), [sink_id](const auto& sink_detail) {
62 return sink_detail.id == sink_id;
63 });
64
65 if (iter == g_sink_details.end()) {
66 LOG_ERROR(Audio, "AudioCore::SelectSink given invalid sink_id");
67 DSP::HLE::SetSink(std::make_unique<NullSink>());
68 return;
69 }
70
71 DSP::HLE::SetSink(iter->factory());
72}
73
48void Shutdown() { 74void Shutdown() {
49 CoreTiming::UnscheduleEvent(tick_event, 0); 75 CoreTiming::UnscheduleEvent(tick_event, 0);
50 DSP::HLE::Shutdown(); 76 DSP::HLE::Shutdown();
51} 77}
52 78
53} //namespace 79} // namespace AudioCore
diff --git a/src/audio_core/audio_core.h b/src/audio_core/audio_core.h
index 64c330914..f618361f3 100644
--- a/src/audio_core/audio_core.h
+++ b/src/audio_core/audio_core.h
@@ -4,14 +4,14 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <string>
8
7namespace Kernel { 9namespace Kernel {
8class VMManager; 10class VMManager;
9} 11}
10 12
11namespace AudioCore { 13namespace AudioCore {
12 14
13constexpr int num_sources = 24;
14constexpr int samples_per_frame = 160; ///< Samples per audio frame at native sample rate
15constexpr int native_sample_rate = 32728; ///< 32kHz 15constexpr int native_sample_rate = 32728; ///< 32kHz
16 16
17/// Initialise Audio Core 17/// Initialise Audio Core
@@ -20,6 +20,9 @@ void Init();
20/// Add DSP address spaces to a Process. 20/// Add DSP address spaces to a Process.
21void AddAddressSpace(Kernel::VMManager& vm_manager); 21void AddAddressSpace(Kernel::VMManager& vm_manager);
22 22
23/// Select the sink to use based on sink id.
24void SelectSink(std::string sink_id);
25
23/// Shutdown Audio Core 26/// Shutdown Audio Core
24void Shutdown(); 27void Shutdown();
25 28
diff --git a/src/audio_core/hle/common.h b/src/audio_core/hle/common.h
index 37d441eb2..596b67eaf 100644
--- a/src/audio_core/hle/common.h
+++ b/src/audio_core/hle/common.h
@@ -7,18 +7,19 @@
7#include <algorithm> 7#include <algorithm>
8#include <array> 8#include <array>
9 9
10#include "audio_core/audio_core.h"
11
12#include "common/common_types.h" 10#include "common/common_types.h"
13 11
14namespace DSP { 12namespace DSP {
15namespace HLE { 13namespace HLE {
16 14
15constexpr int num_sources = 24;
16constexpr int samples_per_frame = 160; ///< Samples per audio frame at native sample rate
17
17/// The final output to the speakers is stereo. Preprocessing output in Source is also stereo. 18/// The final output to the speakers is stereo. Preprocessing output in Source is also stereo.
18using StereoFrame16 = std::array<std::array<s16, 2>, AudioCore::samples_per_frame>; 19using StereoFrame16 = std::array<std::array<s16, 2>, samples_per_frame>;
19 20
20/// The DSP is quadraphonic internally. 21/// The DSP is quadraphonic internally.
21using QuadFrame32 = std::array<std::array<s32, 4>, AudioCore::samples_per_frame>; 22using QuadFrame32 = std::array<std::array<s32, 4>, samples_per_frame>;
22 23
23/** 24/**
24 * This performs the filter operation defined by FilterT::ProcessSample on the frame in-place. 25 * This performs the filter operation defined by FilterT::ProcessSample on the frame in-place.
@@ -26,7 +27,7 @@ using QuadFrame32 = std::array<std::array<s32, 4>, AudioCore::samples_per_fram
26 */ 27 */
27template<typename FrameT, typename FilterT> 28template<typename FrameT, typename FilterT>
28void FilterFrame(FrameT& frame, FilterT& filter) { 29void FilterFrame(FrameT& frame, FilterT& filter) {
29 std::transform(frame.begin(), frame.end(), frame.begin(), [&filter](const typename FrameT::value_type& sample) { 30 std::transform(frame.begin(), frame.end(), frame.begin(), [&filter](const auto& sample) {
30 return filter.ProcessSample(sample); 31 return filter.ProcessSample(sample);
31 }); 32 });
32} 33}
diff --git a/src/audio_core/hle/dsp.cpp b/src/audio_core/hle/dsp.cpp
index c89356edc..0cdbdb06a 100644
--- a/src/audio_core/hle/dsp.cpp
+++ b/src/audio_core/hle/dsp.cpp
@@ -2,40 +2,81 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <array>
6#include <memory>
7
5#include "audio_core/hle/dsp.h" 8#include "audio_core/hle/dsp.h"
6#include "audio_core/hle/pipe.h" 9#include "audio_core/hle/pipe.h"
10#include "audio_core/hle/source.h"
11#include "audio_core/sink.h"
7 12
8namespace DSP { 13namespace DSP {
9namespace HLE { 14namespace HLE {
10 15
11SharedMemory g_region0; 16std::array<SharedMemory, 2> g_regions;
12SharedMemory g_region1; 17
18static size_t CurrentRegionIndex() {
19 // The region with the higher frame counter is chosen unless there is wraparound.
20 // This function only returns a 0 or 1.
21
22 if (g_regions[0].frame_counter == 0xFFFFu && g_regions[1].frame_counter != 0xFFFEu) {
23 // Wraparound has occured.
24 return 1;
25 }
26
27 if (g_regions[1].frame_counter == 0xFFFFu && g_regions[0].frame_counter != 0xFFFEu) {
28 // Wraparound has occured.
29 return 0;
30 }
31
32 return (g_regions[0].frame_counter > g_regions[1].frame_counter) ? 0 : 1;
33}
34
35static SharedMemory& ReadRegion() {
36 return g_regions[CurrentRegionIndex()];
37}
38
39static SharedMemory& WriteRegion() {
40 return g_regions[1 - CurrentRegionIndex()];
41}
42
43static std::array<Source, num_sources> sources = {
44 Source(0), Source(1), Source(2), Source(3), Source(4), Source(5),
45 Source(6), Source(7), Source(8), Source(9), Source(10), Source(11),
46 Source(12), Source(13), Source(14), Source(15), Source(16), Source(17),
47 Source(18), Source(19), Source(20), Source(21), Source(22), Source(23)
48};
49
50static std::unique_ptr<AudioCore::Sink> sink;
13 51
14void Init() { 52void Init() {
15 DSP::HLE::ResetPipes(); 53 DSP::HLE::ResetPipes();
54 for (auto& source : sources) {
55 source.Reset();
56 }
16} 57}
17 58
18void Shutdown() { 59void Shutdown() {
19} 60}
20 61
21bool Tick() { 62bool Tick() {
22 return true; 63 SharedMemory& read = ReadRegion();
23} 64 SharedMemory& write = WriteRegion();
24 65
25SharedMemory& CurrentRegion() { 66 std::array<QuadFrame32, 3> intermediate_mixes = {};
26 // The region with the higher frame counter is chosen unless there is wraparound.
27 67
28 if (g_region0.frame_counter == 0xFFFFu && g_region1.frame_counter != 0xFFFEu) { 68 for (size_t i = 0; i < num_sources; i++) {
29 // Wraparound has occured. 69 write.source_statuses.status[i] = sources[i].Tick(read.source_configurations.config[i], read.adpcm_coefficients.coeff[i]);
30 return g_region1; 70 for (size_t mix = 0; mix < 3; mix++) {
71 sources[i].MixInto(intermediate_mixes[mix], mix);
72 }
31 } 73 }
32 74
33 if (g_region1.frame_counter == 0xFFFFu && g_region0.frame_counter != 0xFFFEu) { 75 return true;
34 // Wraparound has occured. 76}
35 return g_region0;
36 }
37 77
38 return (g_region0.frame_counter > g_region1.frame_counter) ? g_region0 : g_region1; 78void SetSink(std::unique_ptr<AudioCore::Sink> sink_) {
79 sink = std::move(sink_);
39} 80}
40 81
41} // namespace HLE 82} // namespace HLE
diff --git a/src/audio_core/hle/dsp.h b/src/audio_core/hle/dsp.h
index c15ef0b7a..f6e53f68f 100644
--- a/src/audio_core/hle/dsp.h
+++ b/src/audio_core/hle/dsp.h
@@ -4,16 +4,22 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
7#include <cstddef> 8#include <cstddef>
9#include <memory>
8#include <type_traits> 10#include <type_traits>
9 11
10#include "audio_core/audio_core.h" 12#include "audio_core/hle/common.h"
11 13
12#include "common/bit_field.h" 14#include "common/bit_field.h"
13#include "common/common_funcs.h" 15#include "common/common_funcs.h"
14#include "common/common_types.h" 16#include "common/common_types.h"
15#include "common/swap.h" 17#include "common/swap.h"
16 18
19namespace AudioCore {
20class Sink;
21}
22
17namespace DSP { 23namespace DSP {
18namespace HLE { 24namespace HLE {
19 25
@@ -27,13 +33,8 @@ namespace HLE {
27// double-buffer. The frame counter is located as the very last u16 of each region and is incremented 33// double-buffer. The frame counter is located as the very last u16 of each region and is incremented
28// each audio tick. 34// each audio tick.
29 35
30struct SharedMemory;
31
32constexpr VAddr region0_base = 0x1FF50000; 36constexpr VAddr region0_base = 0x1FF50000;
33extern SharedMemory g_region0;
34
35constexpr VAddr region1_base = 0x1FF70000; 37constexpr VAddr region1_base = 0x1FF70000;
36extern SharedMemory g_region1;
37 38
38/** 39/**
39 * The DSP is native 16-bit. The DSP also appears to be big-endian. When reading 32-bit numbers from 40 * The DSP is native 16-bit. The DSP also appears to be big-endian. When reading 32-bit numbers from
@@ -164,9 +165,9 @@ struct SourceConfiguration {
164 float_le rate_multiplier; 165 float_le rate_multiplier;
165 166
166 enum class InterpolationMode : u8 { 167 enum class InterpolationMode : u8 {
167 None = 0, 168 Polyphase = 0,
168 Linear = 1, 169 Linear = 1,
169 Polyphase = 2 170 None = 2
170 }; 171 };
171 172
172 InterpolationMode interpolation_mode; 173 InterpolationMode interpolation_mode;
@@ -305,7 +306,7 @@ struct SourceConfiguration {
305 u16_le buffer_id; 306 u16_le buffer_id;
306 }; 307 };
307 308
308 Configuration config[AudioCore::num_sources]; 309 Configuration config[num_sources];
309}; 310};
310ASSERT_DSP_STRUCT(SourceConfiguration::Configuration, 192); 311ASSERT_DSP_STRUCT(SourceConfiguration::Configuration, 192);
311ASSERT_DSP_STRUCT(SourceConfiguration::Configuration::Buffer, 20); 312ASSERT_DSP_STRUCT(SourceConfiguration::Configuration::Buffer, 20);
@@ -313,14 +314,14 @@ ASSERT_DSP_STRUCT(SourceConfiguration::Configuration::Buffer, 20);
313struct SourceStatus { 314struct SourceStatus {
314 struct Status { 315 struct Status {
315 u8 is_enabled; ///< Is this channel enabled? (Doesn't have to be playing anything.) 316 u8 is_enabled; ///< Is this channel enabled? (Doesn't have to be playing anything.)
316 u8 previous_buffer_id_dirty; ///< Non-zero when previous_buffer_id changes 317 u8 current_buffer_id_dirty; ///< Non-zero when current_buffer_id changes
317 u16_le sync; ///< Is set by the DSP to the value of SourceConfiguration::sync 318 u16_le sync; ///< Is set by the DSP to the value of SourceConfiguration::sync
318 u32_dsp buffer_position; ///< Number of samples into the current buffer 319 u32_dsp buffer_position; ///< Number of samples into the current buffer
319 u16_le previous_buffer_id; ///< Updated when a buffer finishes playing 320 u16_le current_buffer_id; ///< Updated when a buffer finishes playing
320 INSERT_PADDING_DSPWORDS(1); 321 INSERT_PADDING_DSPWORDS(1);
321 }; 322 };
322 323
323 Status status[AudioCore::num_sources]; 324 Status status[num_sources];
324}; 325};
325ASSERT_DSP_STRUCT(SourceStatus::Status, 12); 326ASSERT_DSP_STRUCT(SourceStatus::Status, 12);
326 327
@@ -413,7 +414,7 @@ ASSERT_DSP_STRUCT(DspConfiguration::ReverbEffect, 52);
413struct AdpcmCoefficients { 414struct AdpcmCoefficients {
414 /// Coefficients are signed fixed point with 11 fractional bits. 415 /// Coefficients are signed fixed point with 11 fractional bits.
415 /// Each source has 16 coefficients associated with it. 416 /// Each source has 16 coefficients associated with it.
416 s16_le coeff[AudioCore::num_sources][16]; 417 s16_le coeff[num_sources][16];
417}; 418};
418ASSERT_DSP_STRUCT(AdpcmCoefficients, 768); 419ASSERT_DSP_STRUCT(AdpcmCoefficients, 768);
419 420
@@ -427,7 +428,7 @@ ASSERT_DSP_STRUCT(DspStatus, 32);
427/// Final mixed output in PCM16 stereo format, what you hear out of the speakers. 428/// Final mixed output in PCM16 stereo format, what you hear out of the speakers.
428/// When the application writes to this region it has no effect. 429/// When the application writes to this region it has no effect.
429struct FinalMixSamples { 430struct FinalMixSamples {
430 s16_le pcm16[2 * AudioCore::samples_per_frame]; 431 s16_le pcm16[2 * samples_per_frame];
431}; 432};
432ASSERT_DSP_STRUCT(FinalMixSamples, 640); 433ASSERT_DSP_STRUCT(FinalMixSamples, 640);
433 434
@@ -437,7 +438,7 @@ ASSERT_DSP_STRUCT(FinalMixSamples, 640);
437/// Values that exceed s16 range will be clipped by the DSP after further processing. 438/// Values that exceed s16 range will be clipped by the DSP after further processing.
438struct IntermediateMixSamples { 439struct IntermediateMixSamples {
439 struct Samples { 440 struct Samples {
440 s32_le pcm32[4][AudioCore::samples_per_frame]; ///< Little-endian as opposed to DSP middle-endian. 441 s32_le pcm32[4][samples_per_frame]; ///< Little-endian as opposed to DSP middle-endian.
441 }; 442 };
442 443
443 Samples mix1; 444 Samples mix1;
@@ -502,6 +503,8 @@ struct SharedMemory {
502}; 503};
503ASSERT_DSP_STRUCT(SharedMemory, 0x8000); 504ASSERT_DSP_STRUCT(SharedMemory, 0x8000);
504 505
506extern std::array<SharedMemory, 2> g_regions;
507
505// Structures must have an offset that is a multiple of two. 508// Structures must have an offset that is a multiple of two.
506static_assert(offsetof(SharedMemory, frame_counter) % 2 == 0, "Structures in DSP::HLE::SharedMemory must be 2-byte aligned"); 509static_assert(offsetof(SharedMemory, frame_counter) % 2 == 0, "Structures in DSP::HLE::SharedMemory must be 2-byte aligned");
507static_assert(offsetof(SharedMemory, source_configurations) % 2 == 0, "Structures in DSP::HLE::SharedMemory must be 2-byte aligned"); 510static_assert(offsetof(SharedMemory, source_configurations) % 2 == 0, "Structures in DSP::HLE::SharedMemory must be 2-byte aligned");
@@ -535,8 +538,11 @@ void Shutdown();
535 */ 538 */
536bool Tick(); 539bool Tick();
537 540
538/// Returns a mutable reference to the current region. Current region is selected based on the frame counter. 541/**
539SharedMemory& CurrentRegion(); 542 * Set the output sink. This must be called before calling Tick().
543 * @param sink The sink to which audio will be output to.
544 */
545void SetSink(std::unique_ptr<AudioCore::Sink> sink);
540 546
541} // namespace HLE 547} // namespace HLE
542} // namespace DSP 548} // namespace DSP
diff --git a/src/audio_core/hle/filter.h b/src/audio_core/hle/filter.h
index 75738f600..43d2035cd 100644
--- a/src/audio_core/hle/filter.h
+++ b/src/audio_core/hle/filter.h
@@ -16,6 +16,7 @@ namespace HLE {
16 16
17/// Preprocessing filters. There is an independent set of filters for each Source. 17/// Preprocessing filters. There is an independent set of filters for each Source.
18class SourceFilters final { 18class SourceFilters final {
19public:
19 SourceFilters() { Reset(); } 20 SourceFilters() { Reset(); }
20 21
21 /// Reset internal state. 22 /// Reset internal state.
diff --git a/src/audio_core/hle/pipe.cpp b/src/audio_core/hle/pipe.cpp
index 9381883b4..44dff1345 100644
--- a/src/audio_core/hle/pipe.cpp
+++ b/src/audio_core/hle/pipe.cpp
@@ -12,12 +12,14 @@
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "common/logging/log.h" 13#include "common/logging/log.h"
14 14
15#include "core/hle/service/dsp_dsp.h"
16
15namespace DSP { 17namespace DSP {
16namespace HLE { 18namespace HLE {
17 19
18static DspState dsp_state = DspState::Off; 20static DspState dsp_state = DspState::Off;
19 21
20static std::array<std::vector<u8>, static_cast<size_t>(DspPipe::DspPipe_MAX)> pipe_data; 22static std::array<std::vector<u8>, NUM_DSP_PIPE> pipe_data;
21 23
22void ResetPipes() { 24void ResetPipes() {
23 for (auto& data : pipe_data) { 25 for (auto& data : pipe_data) {
@@ -27,17 +29,24 @@ void ResetPipes() {
27} 29}
28 30
29std::vector<u8> PipeRead(DspPipe pipe_number, u32 length) { 31std::vector<u8> PipeRead(DspPipe pipe_number, u32 length) {
30 if (pipe_number >= DspPipe::DspPipe_MAX) { 32 const size_t pipe_index = static_cast<size_t>(pipe_number);
31 LOG_ERROR(Audio_DSP, "pipe_number = %u invalid", pipe_number); 33
34 if (pipe_index >= NUM_DSP_PIPE) {
35 LOG_ERROR(Audio_DSP, "pipe_number = %zu invalid", pipe_index);
32 return {}; 36 return {};
33 } 37 }
34 38
35 std::vector<u8>& data = pipe_data[static_cast<size_t>(pipe_number)]; 39 if (length > UINT16_MAX) { // Can only read at most UINT16_MAX from the pipe
40 LOG_ERROR(Audio_DSP, "length of %u greater than max of %u", length, UINT16_MAX);
41 return {};
42 }
43
44 std::vector<u8>& data = pipe_data[pipe_index];
36 45
37 if (length > data.size()) { 46 if (length > data.size()) {
38 LOG_WARNING(Audio_DSP, "pipe_number = %u is out of data, application requested read of %u but %zu remain", 47 LOG_WARNING(Audio_DSP, "pipe_number = %zu is out of data, application requested read of %u but %zu remain",
39 pipe_number, length, data.size()); 48 pipe_index, length, data.size());
40 length = data.size(); 49 length = static_cast<u32>(data.size());
41 } 50 }
42 51
43 if (length == 0) 52 if (length == 0)
@@ -49,16 +58,20 @@ std::vector<u8> PipeRead(DspPipe pipe_number, u32 length) {
49} 58}
50 59
51size_t GetPipeReadableSize(DspPipe pipe_number) { 60size_t GetPipeReadableSize(DspPipe pipe_number) {
52 if (pipe_number >= DspPipe::DspPipe_MAX) { 61 const size_t pipe_index = static_cast<size_t>(pipe_number);
53 LOG_ERROR(Audio_DSP, "pipe_number = %u invalid", pipe_number); 62
63 if (pipe_index >= NUM_DSP_PIPE) {
64 LOG_ERROR(Audio_DSP, "pipe_number = %zu invalid", pipe_index);
54 return 0; 65 return 0;
55 } 66 }
56 67
57 return pipe_data[static_cast<size_t>(pipe_number)].size(); 68 return pipe_data[pipe_index].size();
58} 69}
59 70
60static void WriteU16(DspPipe pipe_number, u16 value) { 71static void WriteU16(DspPipe pipe_number, u16 value) {
61 std::vector<u8>& data = pipe_data[static_cast<size_t>(pipe_number)]; 72 const size_t pipe_index = static_cast<size_t>(pipe_number);
73
74 std::vector<u8>& data = pipe_data.at(pipe_index);
62 // Little endian 75 // Little endian
63 data.emplace_back(value & 0xFF); 76 data.emplace_back(value & 0xFF);
64 data.emplace_back(value >> 8); 77 data.emplace_back(value >> 8);
@@ -86,11 +99,13 @@ static void AudioPipeWriteStructAddresses() {
86 }; 99 };
87 100
88 // Begin with a u16 denoting the number of structs. 101 // Begin with a u16 denoting the number of structs.
89 WriteU16(DspPipe::Audio, struct_addresses.size()); 102 WriteU16(DspPipe::Audio, static_cast<u16>(struct_addresses.size()));
90 // Then write the struct addresses. 103 // Then write the struct addresses.
91 for (u16 addr : struct_addresses) { 104 for (u16 addr : struct_addresses) {
92 WriteU16(DspPipe::Audio, addr); 105 WriteU16(DspPipe::Audio, addr);
93 } 106 }
107 // Signal that we have data on this pipe.
108 DSP_DSP::SignalPipeInterrupt(DspPipe::Audio);
94} 109}
95 110
96void PipeWrite(DspPipe pipe_number, const std::vector<u8>& buffer) { 111void PipeWrite(DspPipe pipe_number, const std::vector<u8>& buffer) {
@@ -145,7 +160,7 @@ void PipeWrite(DspPipe pipe_number, const std::vector<u8>& buffer) {
145 return; 160 return;
146 } 161 }
147 default: 162 default:
148 LOG_CRITICAL(Audio_DSP, "pipe_number = %u unimplemented", pipe_number); 163 LOG_CRITICAL(Audio_DSP, "pipe_number = %zu unimplemented", static_cast<size_t>(pipe_number));
149 UNIMPLEMENTED(); 164 UNIMPLEMENTED();
150 return; 165 return;
151 } 166 }
diff --git a/src/audio_core/hle/pipe.h b/src/audio_core/hle/pipe.h
index 382d35e87..b714c0496 100644
--- a/src/audio_core/hle/pipe.h
+++ b/src/audio_core/hle/pipe.h
@@ -19,15 +19,19 @@ enum class DspPipe {
19 Debug = 0, 19 Debug = 0,
20 Dma = 1, 20 Dma = 1,
21 Audio = 2, 21 Audio = 2,
22 Binary = 3, 22 Binary = 3
23 DspPipe_MAX
24}; 23};
24constexpr size_t NUM_DSP_PIPE = 8;
25 25
26/** 26/**
27 * Read a DSP pipe. 27 * Reads `length` bytes from the DSP pipe identified with `pipe_number`.
28 * @param pipe_number The Pipe ID 28 * @note Can read up to the maximum value of a u16 in bytes (65,535).
29 * @param length How much data to request. 29 * @note IF an error is encoutered with either an invalid `pipe_number` or `length` value, an empty vector will be returned.
30 * @return The data read from the pipe. The size of this vector can be less than the length requested. 30 * @note IF `length` is set to 0, an empty vector will be returned.
31 * @note IF `length` is greater than the amount of data available, this function will only read the available amount.
32 * @param pipe_number a `DspPipe`
33 * @param length the number of bytes to read. The max is 65,535 (max of u16).
34 * @returns a vector of bytes from the specified pipe. On error, will be empty.
31 */ 35 */
32std::vector<u8> PipeRead(DspPipe pipe_number, u32 length); 36std::vector<u8> PipeRead(DspPipe pipe_number, u32 length);
33 37
diff --git a/src/audio_core/hle/source.cpp b/src/audio_core/hle/source.cpp
new file mode 100644
index 000000000..30552fe26
--- /dev/null
+++ b/src/audio_core/hle/source.cpp
@@ -0,0 +1,320 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7
8#include "audio_core/codec.h"
9#include "audio_core/hle/common.h"
10#include "audio_core/hle/source.h"
11#include "audio_core/interpolate.h"
12
13#include "common/assert.h"
14#include "common/logging/log.h"
15
16#include "core/memory.h"
17
18namespace DSP {
19namespace HLE {
20
21SourceStatus::Status Source::Tick(SourceConfiguration::Configuration& config, const s16_le (&adpcm_coeffs)[16]) {
22 ParseConfig(config, adpcm_coeffs);
23
24 if (state.enabled) {
25 GenerateFrame();
26 }
27
28 return GetCurrentStatus();
29}
30
31void Source::MixInto(QuadFrame32& dest, size_t intermediate_mix_id) const {
32 if (!state.enabled)
33 return;
34
35 const std::array<float, 4>& gains = state.gain.at(intermediate_mix_id);
36 for (size_t samplei = 0; samplei < samples_per_frame; samplei++) {
37 // Conversion from stereo (current_frame) to quadraphonic (dest) occurs here.
38 dest[samplei][0] += static_cast<s32>(gains[0] * current_frame[samplei][0]);
39 dest[samplei][1] += static_cast<s32>(gains[1] * current_frame[samplei][1]);
40 dest[samplei][2] += static_cast<s32>(gains[2] * current_frame[samplei][0]);
41 dest[samplei][3] += static_cast<s32>(gains[3] * current_frame[samplei][1]);
42 }
43}
44
45void Source::Reset() {
46 current_frame.fill({});
47 state = {};
48}
49
50void Source::ParseConfig(SourceConfiguration::Configuration& config, const s16_le (&adpcm_coeffs)[16]) {
51 if (!config.dirty_raw) {
52 return;
53 }
54
55 if (config.reset_flag) {
56 config.reset_flag.Assign(0);
57 Reset();
58 LOG_TRACE(Audio_DSP, "source_id=%zu reset", source_id);
59 }
60
61 if (config.partial_reset_flag) {
62 config.partial_reset_flag.Assign(0);
63 state.input_queue = std::priority_queue<Buffer, std::vector<Buffer>, BufferOrder>{};
64 LOG_TRACE(Audio_DSP, "source_id=%zu partial_reset", source_id);
65 }
66
67 if (config.enable_dirty) {
68 config.enable_dirty.Assign(0);
69 state.enabled = config.enable != 0;
70 LOG_TRACE(Audio_DSP, "source_id=%zu enable=%d", source_id, state.enabled);
71 }
72
73 if (config.sync_dirty) {
74 config.sync_dirty.Assign(0);
75 state.sync = config.sync;
76 LOG_TRACE(Audio_DSP, "source_id=%zu sync=%u", source_id, state.sync);
77 }
78
79 if (config.rate_multiplier_dirty) {
80 config.rate_multiplier_dirty.Assign(0);
81 state.rate_multiplier = config.rate_multiplier;
82 LOG_TRACE(Audio_DSP, "source_id=%zu rate=%f", source_id, state.rate_multiplier);
83
84 if (state.rate_multiplier <= 0) {
85 LOG_ERROR(Audio_DSP, "Was given an invalid rate multiplier: source_id=%zu rate=%f", source_id, state.rate_multiplier);
86 state.rate_multiplier = 1.0f;
87 // Note: Actual firmware starts producing garbage if this occurs.
88 }
89 }
90
91 if (config.adpcm_coefficients_dirty) {
92 config.adpcm_coefficients_dirty.Assign(0);
93 std::transform(adpcm_coeffs, adpcm_coeffs + state.adpcm_coeffs.size(), state.adpcm_coeffs.begin(),
94 [](const auto& coeff) { return static_cast<s16>(coeff); });
95 LOG_TRACE(Audio_DSP, "source_id=%zu adpcm update", source_id);
96 }
97
98 if (config.gain_0_dirty) {
99 config.gain_0_dirty.Assign(0);
100 std::transform(config.gain[0], config.gain[0] + state.gain[0].size(), state.gain[0].begin(),
101 [](const auto& coeff) { return static_cast<float>(coeff); });
102 LOG_TRACE(Audio_DSP, "source_id=%zu gain 0 update", source_id);
103 }
104
105 if (config.gain_1_dirty) {
106 config.gain_1_dirty.Assign(0);
107 std::transform(config.gain[1], config.gain[1] + state.gain[1].size(), state.gain[1].begin(),
108 [](const auto& coeff) { return static_cast<float>(coeff); });
109 LOG_TRACE(Audio_DSP, "source_id=%zu gain 1 update", source_id);
110 }
111
112 if (config.gain_2_dirty) {
113 config.gain_2_dirty.Assign(0);
114 std::transform(config.gain[2], config.gain[2] + state.gain[2].size(), state.gain[2].begin(),
115 [](const auto& coeff) { return static_cast<float>(coeff); });
116 LOG_TRACE(Audio_DSP, "source_id=%zu gain 2 update", source_id);
117 }
118
119 if (config.filters_enabled_dirty) {
120 config.filters_enabled_dirty.Assign(0);
121 state.filters.Enable(config.simple_filter_enabled.ToBool(), config.biquad_filter_enabled.ToBool());
122 LOG_TRACE(Audio_DSP, "source_id=%zu enable_simple=%hu enable_biquad=%hu",
123 source_id, config.simple_filter_enabled.Value(), config.biquad_filter_enabled.Value());
124 }
125
126 if (config.simple_filter_dirty) {
127 config.simple_filter_dirty.Assign(0);
128 state.filters.Configure(config.simple_filter);
129 LOG_TRACE(Audio_DSP, "source_id=%zu simple filter update", source_id);
130 }
131
132 if (config.biquad_filter_dirty) {
133 config.biquad_filter_dirty.Assign(0);
134 state.filters.Configure(config.biquad_filter);
135 LOG_TRACE(Audio_DSP, "source_id=%zu biquad filter update", source_id);
136 }
137
138 if (config.interpolation_dirty) {
139 config.interpolation_dirty.Assign(0);
140 state.interpolation_mode = config.interpolation_mode;
141 LOG_TRACE(Audio_DSP, "source_id=%zu interpolation_mode=%zu", source_id, static_cast<size_t>(state.interpolation_mode));
142 }
143
144 if (config.format_dirty || config.embedded_buffer_dirty) {
145 config.format_dirty.Assign(0);
146 state.format = config.format;
147 LOG_TRACE(Audio_DSP, "source_id=%zu format=%zu", source_id, static_cast<size_t>(state.format));
148 }
149
150 if (config.mono_or_stereo_dirty || config.embedded_buffer_dirty) {
151 config.mono_or_stereo_dirty.Assign(0);
152 state.mono_or_stereo = config.mono_or_stereo;
153 LOG_TRACE(Audio_DSP, "source_id=%zu mono_or_stereo=%zu", source_id, static_cast<size_t>(state.mono_or_stereo));
154 }
155
156 if (config.embedded_buffer_dirty) {
157 config.embedded_buffer_dirty.Assign(0);
158 state.input_queue.emplace(Buffer{
159 config.physical_address,
160 config.length,
161 static_cast<u8>(config.adpcm_ps),
162 { config.adpcm_yn[0], config.adpcm_yn[1] },
163 config.adpcm_dirty.ToBool(),
164 config.is_looping.ToBool(),
165 config.buffer_id,
166 state.mono_or_stereo,
167 state.format,
168 false
169 });
170 LOG_TRACE(Audio_DSP, "enqueuing embedded addr=0x%08x len=%u id=%hu", config.physical_address, config.length, config.buffer_id);
171 }
172
173 if (config.buffer_queue_dirty) {
174 config.buffer_queue_dirty.Assign(0);
175 for (size_t i = 0; i < 4; i++) {
176 if (config.buffers_dirty & (1 << i)) {
177 const auto& b = config.buffers[i];
178 state.input_queue.emplace(Buffer{
179 b.physical_address,
180 b.length,
181 static_cast<u8>(b.adpcm_ps),
182 { b.adpcm_yn[0], b.adpcm_yn[1] },
183 b.adpcm_dirty != 0,
184 b.is_looping != 0,
185 b.buffer_id,
186 state.mono_or_stereo,
187 state.format,
188 true
189 });
190 LOG_TRACE(Audio_DSP, "enqueuing queued %zu addr=0x%08x len=%u id=%hu", i, b.physical_address, b.length, b.buffer_id);
191 }
192 }
193 config.buffers_dirty = 0;
194 }
195
196 if (config.dirty_raw) {
197 LOG_DEBUG(Audio_DSP, "source_id=%zu remaining_dirty=%x", source_id, config.dirty_raw);
198 }
199
200 config.dirty_raw = 0;
201}
202
203void Source::GenerateFrame() {
204 current_frame.fill({});
205
206 if (state.current_buffer.empty() && !DequeueBuffer()) {
207 state.enabled = false;
208 state.buffer_update = true;
209 state.current_buffer_id = 0;
210 return;
211 }
212
213 size_t frame_position = 0;
214
215 state.current_sample_number = state.next_sample_number;
216 while (frame_position < current_frame.size()) {
217 if (state.current_buffer.empty() && !DequeueBuffer()) {
218 break;
219 }
220
221 const size_t size_to_copy = std::min(state.current_buffer.size(), current_frame.size() - frame_position);
222
223 std::copy(state.current_buffer.begin(), state.current_buffer.begin() + size_to_copy, current_frame.begin() + frame_position);
224 state.current_buffer.erase(state.current_buffer.begin(), state.current_buffer.begin() + size_to_copy);
225
226 frame_position += size_to_copy;
227 state.next_sample_number += static_cast<u32>(size_to_copy);
228 }
229
230 state.filters.ProcessFrame(current_frame);
231}
232
233
234bool Source::DequeueBuffer() {
235 ASSERT_MSG(state.current_buffer.empty(), "Shouldn't dequeue; we still have data in current_buffer");
236
237 if (state.input_queue.empty())
238 return false;
239
240 const Buffer buf = state.input_queue.top();
241 state.input_queue.pop();
242
243 if (buf.adpcm_dirty) {
244 state.adpcm_state.yn1 = buf.adpcm_yn[0];
245 state.adpcm_state.yn2 = buf.adpcm_yn[1];
246 }
247
248 if (buf.is_looping) {
249 LOG_ERROR(Audio_DSP, "Looped buffers are unimplemented at the moment");
250 }
251
252 const u8* const memory = Memory::GetPhysicalPointer(buf.physical_address);
253 if (memory) {
254 const unsigned num_channels = buf.mono_or_stereo == MonoOrStereo::Stereo ? 2 : 1;
255 switch (buf.format) {
256 case Format::PCM8:
257 state.current_buffer = Codec::DecodePCM8(num_channels, memory, buf.length);
258 break;
259 case Format::PCM16:
260 state.current_buffer = Codec::DecodePCM16(num_channels, memory, buf.length);
261 break;
262 case Format::ADPCM:
263 DEBUG_ASSERT(num_channels == 1);
264 state.current_buffer = Codec::DecodeADPCM(memory, buf.length, state.adpcm_coeffs, state.adpcm_state);
265 break;
266 default:
267 UNIMPLEMENTED();
268 break;
269 }
270 } else {
271 LOG_WARNING(Audio_DSP, "source_id=%zu buffer_id=%hu length=%u: Invalid physical address 0x%08X",
272 source_id, buf.buffer_id, buf.length, buf.physical_address);
273 state.current_buffer.clear();
274 return true;
275 }
276
277 switch (state.interpolation_mode) {
278 case InterpolationMode::None:
279 state.current_buffer = AudioInterp::None(state.interp_state, state.current_buffer, state.rate_multiplier);
280 break;
281 case InterpolationMode::Linear:
282 state.current_buffer = AudioInterp::Linear(state.interp_state, state.current_buffer, state.rate_multiplier);
283 break;
284 case InterpolationMode::Polyphase:
285 // TODO(merry): Implement polyphase interpolation
286 state.current_buffer = AudioInterp::Linear(state.interp_state, state.current_buffer, state.rate_multiplier);
287 break;
288 default:
289 UNIMPLEMENTED();
290 break;
291 }
292
293 state.current_sample_number = 0;
294 state.next_sample_number = 0;
295 state.current_buffer_id = buf.buffer_id;
296 state.buffer_update = buf.from_queue;
297
298 LOG_TRACE(Audio_DSP, "source_id=%zu buffer_id=%hu from_queue=%s current_buffer.size()=%zu",
299 source_id, buf.buffer_id, buf.from_queue ? "true" : "false", state.current_buffer.size());
300 return true;
301}
302
303SourceStatus::Status Source::GetCurrentStatus() {
304 SourceStatus::Status ret;
305
306 // Applications depend on the correct emulation of
307 // current_buffer_id_dirty and current_buffer_id to synchronise
308 // audio with video.
309 ret.is_enabled = state.enabled;
310 ret.current_buffer_id_dirty = state.buffer_update ? 1 : 0;
311 state.buffer_update = false;
312 ret.current_buffer_id = state.current_buffer_id;
313 ret.buffer_position = state.current_sample_number;
314 ret.sync = state.sync;
315
316 return ret;
317}
318
319} // namespace HLE
320} // namespace DSP
diff --git a/src/audio_core/hle/source.h b/src/audio_core/hle/source.h
new file mode 100644
index 000000000..7ee08d424
--- /dev/null
+++ b/src/audio_core/hle/source.h
@@ -0,0 +1,144 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <queue>
9#include <vector>
10
11#include "audio_core/codec.h"
12#include "audio_core/hle/common.h"
13#include "audio_core/hle/dsp.h"
14#include "audio_core/hle/filter.h"
15#include "audio_core/interpolate.h"
16
17#include "common/common_types.h"
18
19namespace DSP {
20namespace HLE {
21
22/**
23 * This module performs:
24 * - Buffer management
25 * - Decoding of buffers
26 * - Buffer resampling and interpolation
27 * - Per-source filtering (SimpleFilter, BiquadFilter)
28 * - Per-source gain
29 * - Other per-source processing
30 */
31class Source final {
32public:
33 explicit Source(size_t source_id_) : source_id(source_id_) {
34 Reset();
35 }
36
37 /// Resets internal state.
38 void Reset();
39
40 /**
41 * This is called once every audio frame. This performs per-source processing every frame.
42 * @param config The new configuration we've got for this Source from the application.
43 * @param adpcm_coeffs ADPCM coefficients to use if config tells us to use them (may contain invalid values otherwise).
44 * @return The current status of this Source. This is given back to the emulated application via SharedMemory.
45 */
46 SourceStatus::Status Tick(SourceConfiguration::Configuration& config, const s16_le (&adpcm_coeffs)[16]);
47
48 /**
49 * Mix this source's output into dest, using the gains for the `intermediate_mix_id`-th intermediate mixer.
50 * @param dest The QuadFrame32 to mix into.
51 * @param intermediate_mix_id The id of the intermediate mix whose gains we are using.
52 */
53 void MixInto(QuadFrame32& dest, size_t intermediate_mix_id) const;
54
55private:
56 const size_t source_id;
57 StereoFrame16 current_frame;
58
59 using Format = SourceConfiguration::Configuration::Format;
60 using InterpolationMode = SourceConfiguration::Configuration::InterpolationMode;
61 using MonoOrStereo = SourceConfiguration::Configuration::MonoOrStereo;
62
63 /// Internal representation of a buffer for our buffer queue
64 struct Buffer {
65 PAddr physical_address;
66 u32 length;
67 u8 adpcm_ps;
68 std::array<u16, 2> adpcm_yn;
69 bool adpcm_dirty;
70 bool is_looping;
71 u16 buffer_id;
72
73 MonoOrStereo mono_or_stereo;
74 Format format;
75
76 bool from_queue;
77 };
78
79 struct BufferOrder {
80 bool operator() (const Buffer& a, const Buffer& b) const {
81 // Lower buffer_id comes first.
82 return a.buffer_id > b.buffer_id;
83 }
84 };
85
86 struct {
87
88 // State variables
89
90 bool enabled = false;
91 u16 sync = 0;
92
93 // Mixing
94
95 std::array<std::array<float, 4>, 3> gain = {};
96
97 // Buffer queue
98
99 std::priority_queue<Buffer, std::vector<Buffer>, BufferOrder> input_queue;
100 MonoOrStereo mono_or_stereo = MonoOrStereo::Mono;
101 Format format = Format::ADPCM;
102
103 // Current buffer
104
105 u32 current_sample_number = 0;
106 u32 next_sample_number = 0;
107 std::vector<std::array<s16, 2>> current_buffer;
108
109 // buffer_id state
110
111 bool buffer_update = false;
112 u32 current_buffer_id = 0;
113
114 // Decoding state
115
116 std::array<s16, 16> adpcm_coeffs = {};
117 Codec::ADPCMState adpcm_state = {};
118
119 // Resampling state
120
121 float rate_multiplier = 1.0;
122 InterpolationMode interpolation_mode = InterpolationMode::Polyphase;
123 AudioInterp::State interp_state = {};
124
125 // Filter state
126
127 SourceFilters filters;
128
129 } state;
130
131 // Internal functions
132
133 /// INTERNAL: Update our internal state based on the current config.
134 void ParseConfig(SourceConfiguration::Configuration& config, const s16_le (&adpcm_coeffs)[16]);
135 /// INTERNAL: Generate the current audio output for this frame based on our internal state.
136 void GenerateFrame();
137 /// INTERNAL: Dequeues a buffer and does preprocessing on it (decoding, resampling). Puts it into current_buffer.
138 bool DequeueBuffer();
139 /// INTERNAL: Generates a SourceStatus::Status based on our internal state.
140 SourceStatus::Status GetCurrentStatus();
141};
142
143} // namespace HLE
144} // namespace DSP
diff --git a/src/audio_core/interpolate.cpp b/src/audio_core/interpolate.cpp
new file mode 100644
index 000000000..fcd3aa066
--- /dev/null
+++ b/src/audio_core/interpolate.cpp
@@ -0,0 +1,85 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "audio_core/interpolate.h"
6
7#include "common/assert.h"
8#include "common/math_util.h"
9
10namespace AudioInterp {
11
12// Calculations are done in fixed point with 24 fractional bits.
13// (This is not verified. This was chosen for minimal error.)
14constexpr u64 scale_factor = 1 << 24;
15constexpr u64 scale_mask = scale_factor - 1;
16
17/// Here we step over the input in steps of rate_multiplier, until we consume all of the input.
18/// Three adjacent samples are passed to fn each step.
19template <typename Function>
20static StereoBuffer16 StepOverSamples(State& state, const StereoBuffer16& input, float rate_multiplier, Function fn) {
21 ASSERT(rate_multiplier > 0);
22
23 if (input.size() < 2)
24 return {};
25
26 StereoBuffer16 output;
27 output.reserve(static_cast<size_t>(input.size() / rate_multiplier));
28
29 u64 step_size = static_cast<u64>(rate_multiplier * scale_factor);
30
31 u64 fposition = 0;
32 const u64 max_fposition = input.size() * scale_factor;
33
34 while (fposition < 1 * scale_factor) {
35 u64 fraction = fposition & scale_mask;
36
37 output.push_back(fn(fraction, state.xn2, state.xn1, input[0]));
38
39 fposition += step_size;
40 }
41
42 while (fposition < 2 * scale_factor) {
43 u64 fraction = fposition & scale_mask;
44
45 output.push_back(fn(fraction, state.xn1, input[0], input[1]));
46
47 fposition += step_size;
48 }
49
50 while (fposition < max_fposition) {
51 u64 fraction = fposition & scale_mask;
52
53 size_t index = static_cast<size_t>(fposition / scale_factor);
54 output.push_back(fn(fraction, input[index - 2], input[index - 1], input[index]));
55
56 fposition += step_size;
57 }
58
59 state.xn2 = input[input.size() - 2];
60 state.xn1 = input[input.size() - 1];
61
62 return output;
63}
64
65StereoBuffer16 None(State& state, const StereoBuffer16& input, float rate_multiplier) {
66 return StepOverSamples(state, input, rate_multiplier, [](u64 fraction, const auto& x0, const auto& x1, const auto& x2) {
67 return x0;
68 });
69}
70
71StereoBuffer16 Linear(State& state, const StereoBuffer16& input, float rate_multiplier) {
72 // Note on accuracy: Some values that this produces are +/- 1 from the actual firmware.
73 return StepOverSamples(state, input, rate_multiplier, [](u64 fraction, const auto& x0, const auto& x1, const auto& x2) {
74 // This is a saturated subtraction. (Verified by black-box fuzzing.)
75 s64 delta0 = MathUtil::Clamp<s64>(x1[0] - x0[0], -32768, 32767);
76 s64 delta1 = MathUtil::Clamp<s64>(x1[1] - x0[1], -32768, 32767);
77
78 return std::array<s16, 2> {
79 static_cast<s16>(x0[0] + fraction * delta0 / scale_factor),
80 static_cast<s16>(x0[1] + fraction * delta1 / scale_factor)
81 };
82 });
83}
84
85} // namespace AudioInterp
diff --git a/src/audio_core/interpolate.h b/src/audio_core/interpolate.h
new file mode 100644
index 000000000..a4c0a453d
--- /dev/null
+++ b/src/audio_core/interpolate.h
@@ -0,0 +1,41 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <vector>
9
10#include "common/common_types.h"
11
12namespace AudioInterp {
13
14/// A variable length buffer of signed PCM16 stereo samples.
15using StereoBuffer16 = std::vector<std::array<s16, 2>>;
16
17struct State {
18 // Two historical samples.
19 std::array<s16, 2> xn1 = {}; ///< x[n-1]
20 std::array<s16, 2> xn2 = {}; ///< x[n-2]
21};
22
23/**
24 * No interpolation. This is equivalent to a zero-order hold. There is a two-sample predelay.
25 * @param input Input buffer.
26 * @param rate_multiplier Stretch factor. Must be a positive non-zero value.
27 * rate_multiplier > 1.0 performs decimation and rate_multipler < 1.0 performs upsampling.
28 * @return The resampled audio buffer.
29 */
30StereoBuffer16 None(State& state, const StereoBuffer16& input, float rate_multiplier);
31
32/**
33 * Linear interpolation. This is equivalent to a first-order hold. There is a two-sample predelay.
34 * @param input Input buffer.
35 * @param rate_multiplier Stretch factor. Must be a positive non-zero value.
36 * rate_multiplier > 1.0 performs decimation and rate_multipler < 1.0 performs upsampling.
37 * @return The resampled audio buffer.
38 */
39StereoBuffer16 Linear(State& state, const StereoBuffer16& input, float rate_multiplier);
40
41} // namespace AudioInterp
diff --git a/src/audio_core/null_sink.h b/src/audio_core/null_sink.h
new file mode 100644
index 000000000..faf0ee4e1
--- /dev/null
+++ b/src/audio_core/null_sink.h
@@ -0,0 +1,29 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstddef>
8
9#include "audio_core/audio_core.h"
10#include "audio_core/sink.h"
11
12namespace AudioCore {
13
14class NullSink final : public Sink {
15public:
16 ~NullSink() override = default;
17
18 unsigned int GetNativeSampleRate() const override {
19 return native_sample_rate;
20 }
21
22 void EnqueueSamples(const std::vector<s16>&) override {}
23
24 size_t SamplesInQueue() const override {
25 return 0;
26 }
27};
28
29} // namespace AudioCore
diff --git a/src/audio_core/sdl2_sink.cpp b/src/audio_core/sdl2_sink.cpp
new file mode 100644
index 000000000..dc75c04ee
--- /dev/null
+++ b/src/audio_core/sdl2_sink.cpp
@@ -0,0 +1,126 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <list>
6#include <vector>
7
8#include <SDL.h>
9
10#include "audio_core/audio_core.h"
11#include "audio_core/sdl2_sink.h"
12
13#include "common/assert.h"
14#include "common/logging/log.h"
15#include <numeric>
16
17namespace AudioCore {
18
19struct SDL2Sink::Impl {
20 unsigned int sample_rate = 0;
21
22 SDL_AudioDeviceID audio_device_id = 0;
23
24 std::list<std::vector<s16>> queue;
25
26 static void Callback(void* impl_, u8* buffer, int buffer_size_in_bytes);
27};
28
29SDL2Sink::SDL2Sink() : impl(std::make_unique<Impl>()) {
30 if (SDL_Init(SDL_INIT_AUDIO) < 0) {
31 LOG_CRITICAL(Audio_Sink, "SDL_Init(SDL_INIT_AUDIO) failed");
32 impl->audio_device_id = 0;
33 return;
34 }
35
36 SDL_AudioSpec desired_audiospec;
37 SDL_zero(desired_audiospec);
38 desired_audiospec.format = AUDIO_S16;
39 desired_audiospec.channels = 2;
40 desired_audiospec.freq = native_sample_rate;
41 desired_audiospec.samples = 1024;
42 desired_audiospec.userdata = impl.get();
43 desired_audiospec.callback = &Impl::Callback;
44
45 SDL_AudioSpec obtained_audiospec;
46 SDL_zero(obtained_audiospec);
47
48 impl->audio_device_id = SDL_OpenAudioDevice(nullptr, false, &desired_audiospec, &obtained_audiospec, 0);
49 if (impl->audio_device_id <= 0) {
50 LOG_CRITICAL(Audio_Sink, "SDL_OpenAudioDevice failed");
51 return;
52 }
53
54 impl->sample_rate = obtained_audiospec.freq;
55
56 // SDL2 audio devices start out paused, unpause it:
57 SDL_PauseAudioDevice(impl->audio_device_id, 0);
58}
59
60SDL2Sink::~SDL2Sink() {
61 if (impl->audio_device_id <= 0)
62 return;
63
64 SDL_CloseAudioDevice(impl->audio_device_id);
65}
66
67unsigned int SDL2Sink::GetNativeSampleRate() const {
68 if (impl->audio_device_id <= 0)
69 return native_sample_rate;
70
71 return impl->sample_rate;
72}
73
74void SDL2Sink::EnqueueSamples(const std::vector<s16>& samples) {
75 if (impl->audio_device_id <= 0)
76 return;
77
78 ASSERT_MSG(samples.size() % 2 == 0, "Samples must be in interleaved stereo PCM16 format (size must be a multiple of two)");
79
80 SDL_LockAudioDevice(impl->audio_device_id);
81 impl->queue.emplace_back(samples);
82 SDL_UnlockAudioDevice(impl->audio_device_id);
83}
84
85size_t SDL2Sink::SamplesInQueue() const {
86 if (impl->audio_device_id <= 0)
87 return 0;
88
89 SDL_LockAudioDevice(impl->audio_device_id);
90
91 size_t total_size = std::accumulate(impl->queue.begin(), impl->queue.end(), static_cast<size_t>(0),
92 [](size_t sum, const auto& buffer) {
93 // Division by two because each stereo sample is made of two s16.
94 return sum + buffer.size() / 2;
95 });
96
97 SDL_UnlockAudioDevice(impl->audio_device_id);
98
99 return total_size;
100}
101
102void SDL2Sink::Impl::Callback(void* impl_, u8* buffer, int buffer_size_in_bytes) {
103 Impl* impl = reinterpret_cast<Impl*>(impl_);
104
105 size_t remaining_size = static_cast<size_t>(buffer_size_in_bytes) / sizeof(s16); // Keep track of size in 16-bit increments.
106
107 while (remaining_size > 0 && !impl->queue.empty()) {
108 if (impl->queue.front().size() <= remaining_size) {
109 memcpy(buffer, impl->queue.front().data(), impl->queue.front().size() * sizeof(s16));
110 buffer += impl->queue.front().size() * sizeof(s16);
111 remaining_size -= impl->queue.front().size();
112 impl->queue.pop_front();
113 } else {
114 memcpy(buffer, impl->queue.front().data(), remaining_size * sizeof(s16));
115 buffer += remaining_size * sizeof(s16);
116 impl->queue.front().erase(impl->queue.front().begin(), impl->queue.front().begin() + remaining_size);
117 remaining_size = 0;
118 }
119 }
120
121 if (remaining_size > 0) {
122 memset(buffer, 0, remaining_size * sizeof(s16));
123 }
124}
125
126} // namespace AudioCore
diff --git a/src/audio_core/sdl2_sink.h b/src/audio_core/sdl2_sink.h
new file mode 100644
index 000000000..0f296b673
--- /dev/null
+++ b/src/audio_core/sdl2_sink.h
@@ -0,0 +1,30 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstddef>
8#include <memory>
9
10#include "audio_core/sink.h"
11
12namespace AudioCore {
13
14class SDL2Sink final : public Sink {
15public:
16 SDL2Sink();
17 ~SDL2Sink() override;
18
19 unsigned int GetNativeSampleRate() const override;
20
21 void EnqueueSamples(const std::vector<s16>& samples) override;
22
23 size_t SamplesInQueue() const override;
24
25private:
26 struct Impl;
27 std::unique_ptr<Impl> impl;
28};
29
30} // namespace AudioCore
diff --git a/src/audio_core/sink.h b/src/audio_core/sink.h
index cad21a85e..1c881c3d2 100644
--- a/src/audio_core/sink.h
+++ b/src/audio_core/sink.h
@@ -19,7 +19,7 @@ public:
19 virtual ~Sink() = default; 19 virtual ~Sink() = default;
20 20
21 /// The native rate of this sink. The sink expects to be fed samples that respect this. (Units: samples/sec) 21 /// The native rate of this sink. The sink expects to be fed samples that respect this. (Units: samples/sec)
22 virtual unsigned GetNativeSampleRate() const = 0; 22 virtual unsigned int GetNativeSampleRate() const = 0;
23 23
24 /** 24 /**
25 * Feed stereo samples to sink. 25 * Feed stereo samples to sink.
diff --git a/src/audio_core/sink_details.cpp b/src/audio_core/sink_details.cpp
new file mode 100644
index 000000000..ba5e83d17
--- /dev/null
+++ b/src/audio_core/sink_details.cpp
@@ -0,0 +1,25 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <memory>
6#include <vector>
7
8#include "audio_core/null_sink.h"
9#include "audio_core/sink_details.h"
10
11#ifdef HAVE_SDL2
12#include "audio_core/sdl2_sink.h"
13#endif
14
15namespace AudioCore {
16
17// g_sink_details is ordered in terms of desirability, with the best choice at the top.
18const std::vector<SinkDetails> g_sink_details = {
19#ifdef HAVE_SDL2
20 { "sdl2", []() { return std::make_unique<SDL2Sink>(); } },
21#endif
22 { "null", []() { return std::make_unique<NullSink>(); } },
23};
24
25} // namespace AudioCore
diff --git a/src/audio_core/sink_details.h b/src/audio_core/sink_details.h
new file mode 100644
index 000000000..4b30cf835
--- /dev/null
+++ b/src/audio_core/sink_details.h
@@ -0,0 +1,27 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <functional>
8#include <memory>
9#include <vector>
10
11namespace AudioCore {
12
13class Sink;
14
15struct SinkDetails {
16 SinkDetails(const char* id_, std::function<std::unique_ptr<Sink>()> factory_)
17 : id(id_), factory(factory_) {}
18
19 /// Name for this sink.
20 const char* id;
21 /// A method to call to construct an instance of this type of sink.
22 std::function<std::unique_ptr<Sink>()> factory;
23};
24
25extern const std::vector<SinkDetails> g_sink_details;
26
27} // namespace AudioCore
diff --git a/src/citra/CMakeLists.txt b/src/citra/CMakeLists.txt
index fa615deb9..43fa06b4e 100644
--- a/src/citra/CMakeLists.txt
+++ b/src/citra/CMakeLists.txt
@@ -21,7 +21,7 @@ target_link_libraries(citra ${SDL2_LIBRARY} ${OPENGL_gl_LIBRARY} inih glad)
21if (MSVC) 21if (MSVC)
22 target_link_libraries(citra getopt) 22 target_link_libraries(citra getopt)
23endif() 23endif()
24target_link_libraries(citra ${PLATFORM_LIBRARIES}) 24target_link_libraries(citra ${PLATFORM_LIBRARIES} Threads::Threads)
25 25
26if(${CMAKE_SYSTEM_NAME} MATCHES "Linux|FreeBSD|OpenBSD|NetBSD") 26if(${CMAKE_SYSTEM_NAME} MATCHES "Linux|FreeBSD|OpenBSD|NetBSD")
27 install(TARGETS citra RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/bin") 27 install(TARGETS citra RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/bin")
diff --git a/src/citra/citra.cpp b/src/citra/citra.cpp
index d6ad13f69..b4501eb2e 100644
--- a/src/citra/citra.cpp
+++ b/src/citra/citra.cpp
@@ -20,6 +20,7 @@
20#include "common/logging/log.h" 20#include "common/logging/log.h"
21#include "common/logging/backend.h" 21#include "common/logging/backend.h"
22#include "common/logging/filter.h" 22#include "common/logging/filter.h"
23#include "common/scm_rev.h"
23#include "common/scope_exit.h" 24#include "common/scope_exit.h"
24 25
25#include "core/settings.h" 26#include "core/settings.h"
@@ -34,11 +35,17 @@
34#include "video_core/video_core.h" 35#include "video_core/video_core.h"
35 36
36 37
37static void PrintHelp() 38static void PrintHelp(const char *argv0)
38{ 39{
39 std::cout << "Usage: citra [options] <filename>" << std::endl; 40 std::cout << "Usage: " << argv0 << " [options] <filename>\n"
40 std::cout << "--help, -h Display this information" << std::endl; 41 "-g, --gdbport=NUMBER Enable gdb stub on port NUMBER\n"
41 std::cout << "--gdbport, -g number Enable gdb stub on port number" << std::endl; 42 "-h, --help Display this help and exit\n"
43 "-v, --version Output version information and exit\n";
44}
45
46static void PrintVersion()
47{
48 std::cout << "Citra " << Common::g_scm_branch << " " << Common::g_scm_desc << std::endl;
42} 49}
43 50
44/// Application entry point 51/// Application entry point
@@ -51,18 +58,16 @@ int main(int argc, char **argv) {
51 std::string boot_filename; 58 std::string boot_filename;
52 59
53 static struct option long_options[] = { 60 static struct option long_options[] = {
54 { "help", no_argument, 0, 'h' },
55 { "gdbport", required_argument, 0, 'g' }, 61 { "gdbport", required_argument, 0, 'g' },
62 { "help", no_argument, 0, 'h' },
63 { "version", no_argument, 0, 'v' },
56 { 0, 0, 0, 0 } 64 { 0, 0, 0, 0 }
57 }; 65 };
58 66
59 while (optind < argc) { 67 while (optind < argc) {
60 char arg = getopt_long(argc, argv, ":hg:", long_options, &option_index); 68 char arg = getopt_long(argc, argv, "g:hv", long_options, &option_index);
61 if (arg != -1) { 69 if (arg != -1) {
62 switch (arg) { 70 switch (arg) {
63 case 'h':
64 PrintHelp();
65 return 0;
66 case 'g': 71 case 'g':
67 errno = 0; 72 errno = 0;
68 gdb_port = strtoul(optarg, &endarg, 0); 73 gdb_port = strtoul(optarg, &endarg, 0);
@@ -73,6 +78,12 @@ int main(int argc, char **argv) {
73 exit(1); 78 exit(1);
74 } 79 }
75 break; 80 break;
81 case 'h':
82 PrintHelp(argv[0]);
83 return 0;
84 case 'v':
85 PrintVersion();
86 return 0;
76 } 87 }
77 } else { 88 } else {
78 boot_filename = argv[optind]; 89 boot_filename = argv[optind];
diff --git a/src/citra/config.cpp b/src/citra/config.cpp
index 6b6617352..c5cb4fb38 100644
--- a/src/citra/config.cpp
+++ b/src/citra/config.cpp
@@ -65,11 +65,15 @@ void Config::ReadValues() {
65 // Renderer 65 // Renderer
66 Settings::values.use_hw_renderer = sdl2_config->GetBoolean("Renderer", "use_hw_renderer", false); 66 Settings::values.use_hw_renderer = sdl2_config->GetBoolean("Renderer", "use_hw_renderer", false);
67 Settings::values.use_shader_jit = sdl2_config->GetBoolean("Renderer", "use_shader_jit", true); 67 Settings::values.use_shader_jit = sdl2_config->GetBoolean("Renderer", "use_shader_jit", true);
68 Settings::values.use_scaled_resolution = sdl2_config->GetBoolean("Renderer", "use_scaled_resolution", false);
68 69
69 Settings::values.bg_red = (float)sdl2_config->GetReal("Renderer", "bg_red", 1.0); 70 Settings::values.bg_red = (float)sdl2_config->GetReal("Renderer", "bg_red", 1.0);
70 Settings::values.bg_green = (float)sdl2_config->GetReal("Renderer", "bg_green", 1.0); 71 Settings::values.bg_green = (float)sdl2_config->GetReal("Renderer", "bg_green", 1.0);
71 Settings::values.bg_blue = (float)sdl2_config->GetReal("Renderer", "bg_blue", 1.0); 72 Settings::values.bg_blue = (float)sdl2_config->GetReal("Renderer", "bg_blue", 1.0);
72 73
74 // Audio
75 Settings::values.sink_id = sdl2_config->Get("Audio", "output_engine", "auto");
76
73 // Data Storage 77 // Data Storage
74 Settings::values.use_virtual_sd = sdl2_config->GetBoolean("Data Storage", "use_virtual_sd", true); 78 Settings::values.use_virtual_sd = sdl2_config->GetBoolean("Data Storage", "use_virtual_sd", true);
75 79
@@ -81,7 +85,7 @@ void Config::ReadValues() {
81 85
82 // Debugging 86 // Debugging
83 Settings::values.use_gdbstub = sdl2_config->GetBoolean("Debugging", "use_gdbstub", false); 87 Settings::values.use_gdbstub = sdl2_config->GetBoolean("Debugging", "use_gdbstub", false);
84 Settings::values.gdbstub_port = sdl2_config->GetInteger("Debugging", "gdbstub_port", 24689); 88 Settings::values.gdbstub_port = static_cast<u16>(sdl2_config->GetInteger("Debugging", "gdbstub_port", 24689));
85} 89}
86 90
87void Config::Reload() { 91void Config::Reload() {
diff --git a/src/citra/default_ini.h b/src/citra/default_ini.h
index c9b490a00..49126356f 100644
--- a/src/citra/default_ini.h
+++ b/src/citra/default_ini.h
@@ -46,12 +46,21 @@ use_hw_renderer =
46# 0 : Interpreter (slow), 1 (default): JIT (fast) 46# 0 : Interpreter (slow), 1 (default): JIT (fast)
47use_shader_jit = 47use_shader_jit =
48 48
49# Whether to use native 3DS screen resolution or to scale rendering resolution to the displayed screen size.
50# 0 (default): Native, 1: Scaled
51use_scaled_resolution =
52
49# The clear color for the renderer. What shows up on the sides of the bottom screen. 53# The clear color for the renderer. What shows up on the sides of the bottom screen.
50# Must be in range of 0.0-1.0. Defaults to 1.0 for all. 54# Must be in range of 0.0-1.0. Defaults to 1.0 for all.
51bg_red = 55bg_red =
52bg_blue = 56bg_blue =
53bg_green = 57bg_green =
54 58
59[Audio]
60# Which audio output engine to use.
61# auto (default): Auto-select, null: No audio output, sdl2: SDL2 (if available)
62output_engine =
63
55[Data Storage] 64[Data Storage]
56# Whether to create a virtual SD card. 65# Whether to create a virtual SD card.
57# 1 (default): Yes, 0: No 66# 1 (default): Yes, 0: No
diff --git a/src/citra/emu_window/emu_window_sdl2.cpp b/src/citra/emu_window/emu_window_sdl2.cpp
index 924189f4c..12cdd9d95 100644
--- a/src/citra/emu_window/emu_window_sdl2.cpp
+++ b/src/citra/emu_window/emu_window_sdl2.cpp
@@ -9,6 +9,8 @@
9#define SDL_MAIN_HANDLED 9#define SDL_MAIN_HANDLED
10#include <SDL.h> 10#include <SDL.h>
11 11
12#include <glad/glad.h>
13
12#include "common/key_map.h" 14#include "common/key_map.h"
13#include "common/logging/log.h" 15#include "common/logging/log.h"
14#include "common/scm_rev.h" 16#include "common/scm_rev.h"
@@ -98,6 +100,11 @@ EmuWindow_SDL2::EmuWindow_SDL2() {
98 exit(1); 100 exit(1);
99 } 101 }
100 102
103 if (!gladLoadGLLoader(static_cast<GLADloadproc>(SDL_GL_GetProcAddress))) {
104 LOG_CRITICAL(Frontend, "Failed to initialize GL functions! Exiting...");
105 exit(1);
106 }
107
101 OnResize(); 108 OnResize();
102 OnMinimalClientAreaChangeRequest(GetActiveConfig().min_client_area_size); 109 OnMinimalClientAreaChangeRequest(GetActiveConfig().min_client_area_size);
103 SDL_PumpEvents(); 110 SDL_PumpEvents();
diff --git a/src/citra_qt/CMakeLists.txt b/src/citra_qt/CMakeLists.txt
index 6660d9879..3f0099200 100644
--- a/src/citra_qt/CMakeLists.txt
+++ b/src/citra_qt/CMakeLists.txt
@@ -55,6 +55,7 @@ set(HEADERS
55 configure_dialog.h 55 configure_dialog.h
56 configure_general.h 56 configure_general.h
57 game_list.h 57 game_list.h
58 game_list_p.h
58 hotkeys.h 59 hotkeys.h
59 main.h 60 main.h
60 ui_settings.h 61 ui_settings.h
@@ -92,7 +93,7 @@ else()
92endif() 93endif()
93target_link_libraries(citra-qt core video_core audio_core common qhexedit) 94target_link_libraries(citra-qt core video_core audio_core common qhexedit)
94target_link_libraries(citra-qt ${OPENGL_gl_LIBRARY} ${CITRA_QT_LIBS}) 95target_link_libraries(citra-qt ${OPENGL_gl_LIBRARY} ${CITRA_QT_LIBS})
95target_link_libraries(citra-qt ${PLATFORM_LIBRARIES}) 96target_link_libraries(citra-qt ${PLATFORM_LIBRARIES} Threads::Threads)
96 97
97if(${CMAKE_SYSTEM_NAME} MATCHES "Linux|FreeBSD|OpenBSD|NetBSD") 98if(${CMAKE_SYSTEM_NAME} MATCHES "Linux|FreeBSD|OpenBSD|NetBSD")
98 install(TARGETS citra-qt RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/bin") 99 install(TARGETS citra-qt RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/bin")
diff --git a/src/citra_qt/bootmanager.cpp b/src/citra_qt/bootmanager.cpp
index 8e60b9cad..01b81c11c 100644
--- a/src/citra_qt/bootmanager.cpp
+++ b/src/citra_qt/bootmanager.cpp
@@ -71,7 +71,9 @@ void EmuThread::run() {
71 // Shutdown the core emulation 71 // Shutdown the core emulation
72 System::Shutdown(); 72 System::Shutdown();
73 73
74#if MICROPROFILE_ENABLED
74 MicroProfileOnThreadExit(); 75 MicroProfileOnThreadExit();
76#endif
75 77
76 render_window->moveContext(); 78 render_window->moveContext();
77} 79}
diff --git a/src/citra_qt/config.cpp b/src/citra_qt/config.cpp
index e363be38a..b5bb75537 100644
--- a/src/citra_qt/config.cpp
+++ b/src/citra_qt/config.cpp
@@ -45,12 +45,17 @@ void Config::ReadValues() {
45 qt_config->beginGroup("Renderer"); 45 qt_config->beginGroup("Renderer");
46 Settings::values.use_hw_renderer = qt_config->value("use_hw_renderer", false).toBool(); 46 Settings::values.use_hw_renderer = qt_config->value("use_hw_renderer", false).toBool();
47 Settings::values.use_shader_jit = qt_config->value("use_shader_jit", true).toBool(); 47 Settings::values.use_shader_jit = qt_config->value("use_shader_jit", true).toBool();
48 Settings::values.use_scaled_resolution = qt_config->value("use_scaled_resolution", false).toBool();
48 49
49 Settings::values.bg_red = qt_config->value("bg_red", 1.0).toFloat(); 50 Settings::values.bg_red = qt_config->value("bg_red", 1.0).toFloat();
50 Settings::values.bg_green = qt_config->value("bg_green", 1.0).toFloat(); 51 Settings::values.bg_green = qt_config->value("bg_green", 1.0).toFloat();
51 Settings::values.bg_blue = qt_config->value("bg_blue", 1.0).toFloat(); 52 Settings::values.bg_blue = qt_config->value("bg_blue", 1.0).toFloat();
52 qt_config->endGroup(); 53 qt_config->endGroup();
53 54
55 qt_config->beginGroup("Audio");
56 Settings::values.sink_id = qt_config->value("output_engine", "auto").toString().toStdString();
57 qt_config->endGroup();
58
54 qt_config->beginGroup("Data Storage"); 59 qt_config->beginGroup("Data Storage");
55 Settings::values.use_virtual_sd = qt_config->value("use_virtual_sd", true).toBool(); 60 Settings::values.use_virtual_sd = qt_config->value("use_virtual_sd", true).toBool();
56 qt_config->endGroup(); 61 qt_config->endGroup();
@@ -129,6 +134,7 @@ void Config::SaveValues() {
129 qt_config->beginGroup("Renderer"); 134 qt_config->beginGroup("Renderer");
130 qt_config->setValue("use_hw_renderer", Settings::values.use_hw_renderer); 135 qt_config->setValue("use_hw_renderer", Settings::values.use_hw_renderer);
131 qt_config->setValue("use_shader_jit", Settings::values.use_shader_jit); 136 qt_config->setValue("use_shader_jit", Settings::values.use_shader_jit);
137 qt_config->setValue("use_scaled_resolution", Settings::values.use_scaled_resolution);
132 138
133 // Cast to double because Qt's written float values are not human-readable 139 // Cast to double because Qt's written float values are not human-readable
134 qt_config->setValue("bg_red", (double)Settings::values.bg_red); 140 qt_config->setValue("bg_red", (double)Settings::values.bg_red);
@@ -136,6 +142,10 @@ void Config::SaveValues() {
136 qt_config->setValue("bg_blue", (double)Settings::values.bg_blue); 142 qt_config->setValue("bg_blue", (double)Settings::values.bg_blue);
137 qt_config->endGroup(); 143 qt_config->endGroup();
138 144
145 qt_config->beginGroup("Audio");
146 qt_config->setValue("output_engine", QString::fromStdString(Settings::values.sink_id));
147 qt_config->endGroup();
148
139 qt_config->beginGroup("Data Storage"); 149 qt_config->beginGroup("Data Storage");
140 qt_config->setValue("use_virtual_sd", Settings::values.use_virtual_sd); 150 qt_config->setValue("use_virtual_sd", Settings::values.use_virtual_sd);
141 qt_config->endGroup(); 151 qt_config->endGroup();
diff --git a/src/citra_qt/configure_general.cpp b/src/citra_qt/configure_general.cpp
index a27d0d26c..62648e665 100644
--- a/src/citra_qt/configure_general.cpp
+++ b/src/citra_qt/configure_general.cpp
@@ -25,6 +25,7 @@ void ConfigureGeneral::setConfiguration() {
25 ui->region_combobox->setCurrentIndex(Settings::values.region_value); 25 ui->region_combobox->setCurrentIndex(Settings::values.region_value);
26 ui->toogle_hw_renderer->setChecked(Settings::values.use_hw_renderer); 26 ui->toogle_hw_renderer->setChecked(Settings::values.use_hw_renderer);
27 ui->toogle_shader_jit->setChecked(Settings::values.use_shader_jit); 27 ui->toogle_shader_jit->setChecked(Settings::values.use_shader_jit);
28 ui->toogle_scaled_resolution->setChecked(Settings::values.use_scaled_resolution);
28} 29}
29 30
30void ConfigureGeneral::applyConfiguration() { 31void ConfigureGeneral::applyConfiguration() {
@@ -33,5 +34,6 @@ void ConfigureGeneral::applyConfiguration() {
33 Settings::values.region_value = ui->region_combobox->currentIndex(); 34 Settings::values.region_value = ui->region_combobox->currentIndex();
34 Settings::values.use_hw_renderer = ui->toogle_hw_renderer->isChecked(); 35 Settings::values.use_hw_renderer = ui->toogle_hw_renderer->isChecked();
35 Settings::values.use_shader_jit = ui->toogle_shader_jit->isChecked(); 36 Settings::values.use_shader_jit = ui->toogle_shader_jit->isChecked();
37 Settings::values.use_scaled_resolution = ui->toogle_scaled_resolution->isChecked();
36 Settings::Apply(); 38 Settings::Apply();
37} 39}
diff --git a/src/citra_qt/configure_general.ui b/src/citra_qt/configure_general.ui
index 47184c5c6..5eb309793 100644
--- a/src/citra_qt/configure_general.ui
+++ b/src/citra_qt/configure_general.ui
@@ -128,6 +128,13 @@
128 </property> 128 </property>
129 </widget> 129 </widget>
130 </item> 130 </item>
131 <item>
132 <widget class="QCheckBox" name="toogle_scaled_resolution">
133 <property name="text">
134 <string>Enable scaled resolution</string>
135 </property>
136 </widget>
137 </item>
131 </layout> 138 </layout>
132 </item> 139 </item>
133 </layout> 140 </layout>
diff --git a/src/citra_qt/debugger/graphics_breakpoints.cpp b/src/citra_qt/debugger/graphics_breakpoints.cpp
index 819ec7707..fe66918a8 100644
--- a/src/citra_qt/debugger/graphics_breakpoints.cpp
+++ b/src/citra_qt/debugger/graphics_breakpoints.cpp
@@ -44,7 +44,7 @@ QVariant BreakPointModel::data(const QModelIndex& index, int role) const
44 { Pica::DebugContext::Event::PicaCommandProcessed, tr("Pica command processed") }, 44 { Pica::DebugContext::Event::PicaCommandProcessed, tr("Pica command processed") },
45 { Pica::DebugContext::Event::IncomingPrimitiveBatch, tr("Incoming primitive batch") }, 45 { Pica::DebugContext::Event::IncomingPrimitiveBatch, tr("Incoming primitive batch") },
46 { Pica::DebugContext::Event::FinishedPrimitiveBatch, tr("Finished primitive batch") }, 46 { Pica::DebugContext::Event::FinishedPrimitiveBatch, tr("Finished primitive batch") },
47 { Pica::DebugContext::Event::VertexLoaded, tr("Vertex loaded") }, 47 { Pica::DebugContext::Event::VertexShaderInvocation, tr("Vertex shader invocation") },
48 { Pica::DebugContext::Event::IncomingDisplayTransfer, tr("Incoming display transfer") }, 48 { Pica::DebugContext::Event::IncomingDisplayTransfer, tr("Incoming display transfer") },
49 { Pica::DebugContext::Event::GSPCommandProcessed, tr("GSP command processed") }, 49 { Pica::DebugContext::Event::GSPCommandProcessed, tr("GSP command processed") },
50 { Pica::DebugContext::Event::BufferSwapped, tr("Buffers swapped") } 50 { Pica::DebugContext::Event::BufferSwapped, tr("Buffers swapped") }
@@ -75,7 +75,7 @@ QVariant BreakPointModel::data(const QModelIndex& index, int role) const
75 case Role_IsEnabled: 75 case Role_IsEnabled:
76 { 76 {
77 auto context = context_weak.lock(); 77 auto context = context_weak.lock();
78 return context && context->breakpoints[event].enabled; 78 return context && context->breakpoints[(int)event].enabled;
79 } 79 }
80 80
81 default: 81 default:
@@ -110,7 +110,7 @@ bool BreakPointModel::setData(const QModelIndex& index, const QVariant& value, i
110 if (!context) 110 if (!context)
111 return false; 111 return false;
112 112
113 context->breakpoints[event].enabled = value == Qt::Checked; 113 context->breakpoints[(int)event].enabled = value == Qt::Checked;
114 QModelIndex changed_index = createIndex(index.row(), 0); 114 QModelIndex changed_index = createIndex(index.row(), 0);
115 emit dataChanged(changed_index, changed_index); 115 emit dataChanged(changed_index, changed_index);
116 return true; 116 return true;
diff --git a/src/citra_qt/debugger/graphics_framebuffer.cpp b/src/citra_qt/debugger/graphics_framebuffer.cpp
index c30e75933..68cff78b2 100644
--- a/src/citra_qt/debugger/graphics_framebuffer.cpp
+++ b/src/citra_qt/debugger/graphics_framebuffer.cpp
@@ -346,5 +346,11 @@ u32 GraphicsFramebufferWidget::BytesPerPixel(GraphicsFramebufferWidget::Format f
346 case Format::RGBA4: 346 case Format::RGBA4:
347 case Format::D16: 347 case Format::D16:
348 return 2; 348 return 2;
349 default:
350 UNREACHABLE_MSG("GraphicsFramebufferWidget::BytesPerPixel: this "
351 "should not be reached as this function should "
352 "be given a format which is in "
353 "GraphicsFramebufferWidget::Format. Instead got %i",
354 static_cast<int>(format));
349 } 355 }
350} 356}
diff --git a/src/citra_qt/debugger/graphics_tracing.cpp b/src/citra_qt/debugger/graphics_tracing.cpp
index e06498744..9c80f7ec9 100644
--- a/src/citra_qt/debugger/graphics_tracing.cpp
+++ b/src/citra_qt/debugger/graphics_tracing.cpp
@@ -2,6 +2,9 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
6#include <array>
7#include <iterator>
5#include <memory> 8#include <memory>
6 9
7#include <boost/range/algorithm/copy.hpp> 10#include <boost/range/algorithm/copy.hpp>
@@ -18,6 +21,7 @@
18 21
19#include "core/hw/gpu.h" 22#include "core/hw/gpu.h"
20#include "core/hw/lcd.h" 23#include "core/hw/lcd.h"
24#include "core/tracer/recorder.h"
21 25
22#include "nihstro/float24.h" 26#include "nihstro/float24.h"
23 27
@@ -70,7 +74,7 @@ void GraphicsTracingWidget::StartRecording() {
70 std::array<u32, 4 * 16> default_attributes; 74 std::array<u32, 4 * 16> default_attributes;
71 for (unsigned i = 0; i < 16; ++i) { 75 for (unsigned i = 0; i < 16; ++i) {
72 for (unsigned comp = 0; comp < 3; ++comp) { 76 for (unsigned comp = 0; comp < 3; ++comp) {
73 default_attributes[4 * i + comp] = nihstro::to_float24(Pica::g_state.vs.default_attributes[i][comp].ToFloat32()); 77 default_attributes[4 * i + comp] = nihstro::to_float24(Pica::g_state.vs_default_attributes[i][comp].ToFloat32());
74 } 78 }
75 } 79 }
76 80
diff --git a/src/citra_qt/debugger/graphics_vertex_shader.cpp b/src/citra_qt/debugger/graphics_vertex_shader.cpp
index d648d4640..391666d35 100644
--- a/src/citra_qt/debugger/graphics_vertex_shader.cpp
+++ b/src/citra_qt/debugger/graphics_vertex_shader.cpp
@@ -365,7 +365,7 @@ GraphicsVertexShaderWidget::GraphicsVertexShaderWidget(std::shared_ptr< Pica::De
365 input_data[i]->setValidator(new QDoubleValidator(input_data[i])); 365 input_data[i]->setValidator(new QDoubleValidator(input_data[i]));
366 } 366 }
367 367
368 breakpoint_warning = new QLabel(tr("(data only available at VertexLoaded breakpoints)")); 368 breakpoint_warning = new QLabel(tr("(data only available at vertex shader invocation breakpoints)"));
369 369
370 // TODO: Add some button for jumping to the shader entry point 370 // TODO: Add some button for jumping to the shader entry point
371 371
@@ -454,7 +454,7 @@ GraphicsVertexShaderWidget::GraphicsVertexShaderWidget(std::shared_ptr< Pica::De
454 454
455void GraphicsVertexShaderWidget::OnBreakPointHit(Pica::DebugContext::Event event, void* data) { 455void GraphicsVertexShaderWidget::OnBreakPointHit(Pica::DebugContext::Event event, void* data) {
456 auto input = static_cast<Pica::Shader::InputVertex*>(data); 456 auto input = static_cast<Pica::Shader::InputVertex*>(data);
457 if (event == Pica::DebugContext::Event::VertexLoaded) { 457 if (event == Pica::DebugContext::Event::VertexShaderInvocation) {
458 Reload(true, data); 458 Reload(true, data);
459 } else { 459 } else {
460 // No vertex data is retrievable => invalidate currently stored vertex data 460 // No vertex data is retrievable => invalidate currently stored vertex data
@@ -501,7 +501,7 @@ void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_d
501 info.labels.insert({ entry_point, "main" }); 501 info.labels.insert({ entry_point, "main" });
502 502
503 // Generate debug information 503 // Generate debug information
504 debug_data = Pica::Shader::ProduceDebugInfo(input_vertex, num_attributes, shader_config, shader_setup); 504 debug_data = Pica::g_state.vs.ProduceDebugInfo(input_vertex, num_attributes, shader_config, shader_setup);
505 505
506 // Reload widget state 506 // Reload widget state
507 for (int attr = 0; attr < num_attributes; ++attr) { 507 for (int attr = 0; attr < num_attributes; ++attr) {
@@ -515,7 +515,7 @@ void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_d
515 } 515 }
516 516
517 // Initialize debug info text for current cycle count 517 // Initialize debug info text for current cycle count
518 cycle_index->setMaximum(debug_data.records.size() - 1); 518 cycle_index->setMaximum(static_cast<int>(debug_data.records.size() - 1));
519 OnCycleIndexChanged(cycle_index->value()); 519 OnCycleIndexChanged(cycle_index->value());
520 520
521 model->endResetModel(); 521 model->endResetModel();
diff --git a/src/citra_qt/debugger/profiler.cpp b/src/citra_qt/debugger/profiler.cpp
index 4f6ba0e1f..7bb010f77 100644
--- a/src/citra_qt/debugger/profiler.cpp
+++ b/src/citra_qt/debugger/profiler.cpp
@@ -9,13 +9,16 @@
9#include "citra_qt/debugger/profiler.h" 9#include "citra_qt/debugger/profiler.h"
10#include "citra_qt/util/util.h" 10#include "citra_qt/util/util.h"
11 11
12#include "common/common_types.h"
12#include "common/microprofile.h" 13#include "common/microprofile.h"
13#include "common/profiler_reporting.h" 14#include "common/profiler_reporting.h"
14 15
15// Include the implementation of the UI in this file. This isn't in microprofile.cpp because the 16// Include the implementation of the UI in this file. This isn't in microprofile.cpp because the
16// non-Qt frontends don't need it (and don't implement the UI drawing hooks either). 17// non-Qt frontends don't need it (and don't implement the UI drawing hooks either).
18#if MICROPROFILE_ENABLED
17#define MICROPROFILEUI_IMPL 1 19#define MICROPROFILEUI_IMPL 1
18#include "common/microprofileui.h" 20#include "common/microprofileui.h"
21#endif
19 22
20using namespace Common::Profiling; 23using namespace Common::Profiling;
21 24
@@ -34,21 +37,9 @@ static QVariant GetDataForColumn(int col, const AggregatedDuration& duration)
34 } 37 }
35} 38}
36 39
37static const TimingCategoryInfo* GetCategoryInfo(int id)
38{
39 const auto& categories = GetProfilingManager().GetTimingCategoriesInfo();
40 if ((size_t)id >= categories.size()) {
41 return nullptr;
42 } else {
43 return &categories[id];
44 }
45}
46
47ProfilerModel::ProfilerModel(QObject* parent) : QAbstractItemModel(parent) 40ProfilerModel::ProfilerModel(QObject* parent) : QAbstractItemModel(parent)
48{ 41{
49 updateProfilingInfo(); 42 updateProfilingInfo();
50 const auto& categories = GetProfilingManager().GetTimingCategoriesInfo();
51 results.time_per_category.resize(categories.size());
52} 43}
53 44
54QVariant ProfilerModel::headerData(int section, Qt::Orientation orientation, int role) const 45QVariant ProfilerModel::headerData(int section, Qt::Orientation orientation, int role) const
@@ -85,7 +76,7 @@ int ProfilerModel::rowCount(const QModelIndex& parent) const
85 if (parent.isValid()) { 76 if (parent.isValid()) {
86 return 0; 77 return 0;
87 } else { 78 } else {
88 return static_cast<int>(results.time_per_category.size() + 2); 79 return 2;
89 } 80 }
90} 81}
91 82
@@ -104,17 +95,6 @@ QVariant ProfilerModel::data(const QModelIndex& index, int role) const
104 } else { 95 } else {
105 return GetDataForColumn(index.column(), results.interframe_time); 96 return GetDataForColumn(index.column(), results.interframe_time);
106 } 97 }
107 } else {
108 if (index.column() == 0) {
109 const TimingCategoryInfo* info = GetCategoryInfo(index.row() - 2);
110 return info != nullptr ? QString(info->name) : QVariant();
111 } else {
112 if (index.row() - 2 < (int)results.time_per_category.size()) {
113 return GetDataForColumn(index.column(), results.time_per_category[index.row() - 2]);
114 } else {
115 return QVariant();
116 }
117 }
118 } 98 }
119 } 99 }
120 100
@@ -148,6 +128,8 @@ void ProfilerWidget::setProfilingInfoUpdateEnabled(bool enable)
148 } 128 }
149} 129}
150 130
131#if MICROPROFILE_ENABLED
132
151class MicroProfileWidget : public QWidget { 133class MicroProfileWidget : public QWidget {
152public: 134public:
153 MicroProfileWidget(QWidget* parent = nullptr); 135 MicroProfileWidget(QWidget* parent = nullptr);
@@ -171,6 +153,8 @@ private:
171 QTimer update_timer; 153 QTimer update_timer;
172}; 154};
173 155
156#endif
157
174MicroProfileDialog::MicroProfileDialog(QWidget* parent) 158MicroProfileDialog::MicroProfileDialog(QWidget* parent)
175 : QWidget(parent, Qt::Dialog) 159 : QWidget(parent, Qt::Dialog)
176{ 160{
@@ -180,6 +164,8 @@ MicroProfileDialog::MicroProfileDialog(QWidget* parent)
180 // Remove the "?" button from the titlebar and enable the maximize button 164 // Remove the "?" button from the titlebar and enable the maximize button
181 setWindowFlags(windowFlags() & ~Qt::WindowContextHelpButtonHint | Qt::WindowMaximizeButtonHint); 165 setWindowFlags(windowFlags() & ~Qt::WindowContextHelpButtonHint | Qt::WindowMaximizeButtonHint);
182 166
167#if MICROPROFILE_ENABLED
168
183 MicroProfileWidget* widget = new MicroProfileWidget(this); 169 MicroProfileWidget* widget = new MicroProfileWidget(this);
184 170
185 QLayout* layout = new QVBoxLayout(this); 171 QLayout* layout = new QVBoxLayout(this);
@@ -191,6 +177,7 @@ MicroProfileDialog::MicroProfileDialog(QWidget* parent)
191 setFocusProxy(widget); 177 setFocusProxy(widget);
192 widget->setFocusPolicy(Qt::StrongFocus); 178 widget->setFocusPolicy(Qt::StrongFocus);
193 widget->setFocus(); 179 widget->setFocus();
180#endif
194} 181}
195 182
196QAction* MicroProfileDialog::toggleViewAction() { 183QAction* MicroProfileDialog::toggleViewAction() {
@@ -218,6 +205,9 @@ void MicroProfileDialog::hideEvent(QHideEvent* ev) {
218 QWidget::hideEvent(ev); 205 QWidget::hideEvent(ev);
219} 206}
220 207
208
209#if MICROPROFILE_ENABLED
210
221/// There's no way to pass a user pointer to MicroProfile, so this variable is used to make the 211/// There's no way to pass a user pointer to MicroProfile, so this variable is used to make the
222/// QPainter available inside the drawing callbacks. 212/// QPainter available inside the drawing callbacks.
223static QPainter* mp_painter = nullptr; 213static QPainter* mp_painter = nullptr;
@@ -337,3 +327,4 @@ void MicroProfileDrawLine2D(u32 vertices_length, float* vertices, u32 hex_color)
337 mp_painter->drawPolyline(point_buf.data(), vertices_length); 327 mp_painter->drawPolyline(point_buf.data(), vertices_length);
338 point_buf.clear(); 328 point_buf.clear();
339} 329}
330#endif
diff --git a/src/citra_qt/debugger/profiler.h b/src/citra_qt/debugger/profiler.h
index 036054740..3b38ed8ec 100644
--- a/src/citra_qt/debugger/profiler.h
+++ b/src/citra_qt/debugger/profiler.h
@@ -7,8 +7,10 @@
7#include <QAbstractItemModel> 7#include <QAbstractItemModel>
8#include <QDockWidget> 8#include <QDockWidget>
9#include <QTimer> 9#include <QTimer>
10
10#include "ui_profiler.h" 11#include "ui_profiler.h"
11 12
13#include "common/microprofile.h"
12#include "common/profiler_reporting.h" 14#include "common/profiler_reporting.h"
13 15
14class ProfilerModel : public QAbstractItemModel 16class ProfilerModel : public QAbstractItemModel
@@ -49,6 +51,7 @@ private:
49 QTimer update_timer; 51 QTimer update_timer;
50}; 52};
51 53
54
52class MicroProfileDialog : public QWidget { 55class MicroProfileDialog : public QWidget {
53 Q_OBJECT 56 Q_OBJECT
54 57
diff --git a/src/citra_qt/game_list.cpp b/src/citra_qt/game_list.cpp
index d14532102..d4ac9c96e 100644
--- a/src/citra_qt/game_list.cpp
+++ b/src/citra_qt/game_list.cpp
@@ -34,8 +34,8 @@ GameList::GameList(QWidget* parent)
34 tree_view->setUniformRowHeights(true); 34 tree_view->setUniformRowHeights(true);
35 35
36 item_model->insertColumns(0, COLUMN_COUNT); 36 item_model->insertColumns(0, COLUMN_COUNT);
37 item_model->setHeaderData(COLUMN_FILE_TYPE, Qt::Horizontal, "File type");
38 item_model->setHeaderData(COLUMN_NAME, Qt::Horizontal, "Name"); 37 item_model->setHeaderData(COLUMN_NAME, Qt::Horizontal, "Name");
38 item_model->setHeaderData(COLUMN_FILE_TYPE, Qt::Horizontal, "File type");
39 item_model->setHeaderData(COLUMN_SIZE, Qt::Horizontal, "Size"); 39 item_model->setHeaderData(COLUMN_SIZE, Qt::Horizontal, "Size");
40 40
41 connect(tree_view, SIGNAL(activated(const QModelIndex&)), this, SLOT(ValidateEntry(const QModelIndex&))); 41 connect(tree_view, SIGNAL(activated(const QModelIndex&)), this, SLOT(ValidateEntry(const QModelIndex&)));
@@ -109,7 +109,11 @@ void GameList::SaveInterfaceLayout()
109void GameList::LoadInterfaceLayout() 109void GameList::LoadInterfaceLayout()
110{ 110{
111 auto header = tree_view->header(); 111 auto header = tree_view->header();
112 header->restoreState(UISettings::values.gamelist_header_state); 112 if (!header->restoreState(UISettings::values.gamelist_header_state)) {
113 // We are using the name column to display icons and titles
114 // so make it as large as possible as default.
115 header->resizeSection(COLUMN_NAME, header->width());
116 }
113 117
114 item_model->sort(header->sortIndicatorSection(), header->sortIndicatorOrder()); 118 item_model->sort(header->sortIndicatorSection(), header->sortIndicatorOrder());
115} 119}
@@ -143,9 +147,15 @@ void GameListWorker::AddFstEntriesToGameList(const std::string& dir_path, bool d
143 LOG_WARNING(Frontend, "Filetype and extension of file %s do not match.", physical_name.c_str()); 147 LOG_WARNING(Frontend, "Filetype and extension of file %s do not match.", physical_name.c_str());
144 } 148 }
145 149
150 std::vector<u8> smdh;
151 std::unique_ptr<Loader::AppLoader> loader = Loader::GetLoader(FileUtil::IOFile(physical_name, "rb"), filetype, filename_filename, physical_name);
152
153 if (loader)
154 loader->ReadIcon(smdh);
155
146 emit EntryReady({ 156 emit EntryReady({
157 new GameListItemPath(QString::fromStdString(physical_name), smdh),
147 new GameListItem(QString::fromStdString(Loader::GetFileTypeString(filetype))), 158 new GameListItem(QString::fromStdString(Loader::GetFileTypeString(filetype))),
148 new GameListItemPath(QString::fromStdString(physical_name)),
149 new GameListItemSize(FileUtil::GetSize(physical_name)), 159 new GameListItemSize(FileUtil::GetSize(physical_name)),
150 }); 160 });
151 } 161 }
diff --git a/src/citra_qt/game_list.h b/src/citra_qt/game_list.h
index 48febdc60..198674f04 100644
--- a/src/citra_qt/game_list.h
+++ b/src/citra_qt/game_list.h
@@ -20,8 +20,8 @@ class GameList : public QWidget {
20 20
21public: 21public:
22 enum { 22 enum {
23 COLUMN_FILE_TYPE,
24 COLUMN_NAME, 23 COLUMN_NAME,
24 COLUMN_FILE_TYPE,
25 COLUMN_SIZE, 25 COLUMN_SIZE,
26 COLUMN_COUNT, // Number of columns 26 COLUMN_COUNT, // Number of columns
27 }; 27 };
diff --git a/src/citra_qt/game_list_p.h b/src/citra_qt/game_list_p.h
index 820012bce..284f5da81 100644
--- a/src/citra_qt/game_list_p.h
+++ b/src/citra_qt/game_list_p.h
@@ -6,13 +6,85 @@
6 6
7#include <atomic> 7#include <atomic>
8 8
9#include <QImage>
9#include <QRunnable> 10#include <QRunnable>
10#include <QStandardItem> 11#include <QStandardItem>
11#include <QString> 12#include <QString>
12 13
13#include "citra_qt/util/util.h" 14#include "citra_qt/util/util.h"
14#include "common/string_util.h" 15#include "common/string_util.h"
16#include "common/color.h"
15 17
18#include "core/loader/loader.h"
19
20#include "video_core/utils.h"
21
22/**
23 * Tests if data is a valid SMDH by its length and magic number.
24 * @param smdh_data data buffer to test
25 * @return bool test result
26 */
27static bool IsValidSMDH(const std::vector<u8>& smdh_data) {
28 if (smdh_data.size() < sizeof(Loader::SMDH))
29 return false;
30
31 u32 magic;
32 memcpy(&magic, smdh_data.data(), 4);
33
34 return Loader::MakeMagic('S', 'M', 'D', 'H') == magic;
35}
36
37/**
38 * Gets game icon from SMDH
39 * @param sdmh SMDH data
40 * @param large If true, returns large icon (48x48), otherwise returns small icon (24x24)
41 * @return QPixmap game icon
42 */
43static QPixmap GetIconFromSMDH(const Loader::SMDH& smdh, bool large) {
44 u32 size;
45 const u8* icon_data;
46
47 if (large) {
48 size = 48;
49 icon_data = smdh.large_icon.data();
50 } else {
51 size = 24;
52 icon_data = smdh.small_icon.data();
53 }
54
55 QImage icon(size, size, QImage::Format::Format_RGB888);
56 for (u32 x = 0; x < size; ++x) {
57 for (u32 y = 0; y < size; ++y) {
58 u32 coarse_y = y & ~7;
59 auto v = Color::DecodeRGB565(
60 icon_data + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * size * 2);
61 icon.setPixel(x, y, qRgb(v.r(), v.g(), v.b()));
62 }
63 }
64 return QPixmap::fromImage(icon);
65}
66
67/**
68 * Gets the default icon (for games without valid SMDH)
69 * @param large If true, returns large icon (48x48), otherwise returns small icon (24x24)
70 * @return QPixmap default icon
71 */
72static QPixmap GetDefaultIcon(bool large) {
73 int size = large ? 48 : 24;
74 QPixmap icon(size, size);
75 icon.fill(Qt::transparent);
76 return icon;
77}
78
79/**
80 * Gets the short game title fromn SMDH
81 * @param sdmh SMDH data
82 * @param language title language
83 * @return QString short title
84 */
85static QString GetShortTitleFromSMDH(const Loader::SMDH& smdh, Loader::SMDH::TitleLanguage language) {
86 return QString::fromUtf16(smdh.titles[static_cast<int>(language)].short_title.data());
87}
16 88
17class GameListItem : public QStandardItem { 89class GameListItem : public QStandardItem {
18 90
@@ -27,29 +99,43 @@ public:
27 * A specialization of GameListItem for path values. 99 * A specialization of GameListItem for path values.
28 * This class ensures that for every full path value it holds, a correct string representation 100 * This class ensures that for every full path value it holds, a correct string representation
29 * of just the filename (with no extension) will be displayed to the user. 101 * of just the filename (with no extension) will be displayed to the user.
102 * If this class recieves valid SMDH data, it will also display game icons and titles.
30 */ 103 */
31class GameListItemPath : public GameListItem { 104class GameListItemPath : public GameListItem {
32 105
33public: 106public:
34 static const int FullPathRole = Qt::UserRole + 1; 107 static const int FullPathRole = Qt::UserRole + 1;
108 static const int TitleRole = Qt::UserRole + 2;
35 109
36 GameListItemPath(): GameListItem() {} 110 GameListItemPath(): GameListItem() {}
37 GameListItemPath(const QString& game_path): GameListItem() 111 GameListItemPath(const QString& game_path, const std::vector<u8>& smdh_data): GameListItem()
38 { 112 {
39 setData(game_path, FullPathRole); 113 setData(game_path, FullPathRole);
114
115 if (!IsValidSMDH(smdh_data)) {
116 // SMDH is not valid, set a default icon
117 setData(GetDefaultIcon(true), Qt::DecorationRole);
118 return;
119 }
120
121 Loader::SMDH smdh;
122 memcpy(&smdh, smdh_data.data(), sizeof(Loader::SMDH));
123
124 // Get icon from SMDH
125 setData(GetIconFromSMDH(smdh, true), Qt::DecorationRole);
126
127 // Get title form SMDH
128 setData(GetShortTitleFromSMDH(smdh, Loader::SMDH::TitleLanguage::English), TitleRole);
40 } 129 }
41 130
42 void setData(const QVariant& value, int role) override 131 QVariant data(int role) const override {
43 { 132 if (role == Qt::DisplayRole) {
44 // By specializing setData for FullPathRole, we can ensure that the two string
45 // representations of the data are always accurate and in the correct format.
46 if (role == FullPathRole) {
47 std::string filename; 133 std::string filename;
48 Common::SplitPath(value.toString().toStdString(), nullptr, &filename, nullptr); 134 Common::SplitPath(data(FullPathRole).toString().toStdString(), nullptr, &filename, nullptr);
49 GameListItem::setData(QString::fromStdString(filename), Qt::DisplayRole); 135 QString title = data(TitleRole).toString();
50 GameListItem::setData(value, FullPathRole); 136 return QString::fromStdString(filename) + (title.isEmpty() ? "" : "\n " + title);
51 } else { 137 } else {
52 GameListItem::setData(value, role); 138 return GameListItem::data(role);
53 } 139 }
54 } 140 }
55}; 141};
diff --git a/src/citra_qt/main.cpp b/src/citra_qt/main.cpp
index 2ca1e51f6..a85c94a4b 100644
--- a/src/citra_qt/main.cpp
+++ b/src/citra_qt/main.cpp
@@ -6,6 +6,9 @@
6#include <memory> 6#include <memory>
7#include <thread> 7#include <thread>
8 8
9#include <glad/glad.h>
10
11#define QT_NO_OPENGL
9#include <QDesktopWidget> 12#include <QDesktopWidget>
10#include <QtGui> 13#include <QtGui>
11#include <QFileDialog> 14#include <QFileDialog>
@@ -69,8 +72,10 @@ GMainWindow::GMainWindow() : config(new Config()), emu_thread(nullptr)
69 addDockWidget(Qt::BottomDockWidgetArea, profilerWidget); 72 addDockWidget(Qt::BottomDockWidgetArea, profilerWidget);
70 profilerWidget->hide(); 73 profilerWidget->hide();
71 74
75#if MICROPROFILE_ENABLED
72 microProfileDialog = new MicroProfileDialog(this); 76 microProfileDialog = new MicroProfileDialog(this);
73 microProfileDialog->hide(); 77 microProfileDialog->hide();
78#endif
74 79
75 disasmWidget = new DisassemblerWidget(this, emu_thread.get()); 80 disasmWidget = new DisassemblerWidget(this, emu_thread.get());
76 addDockWidget(Qt::BottomDockWidgetArea, disasmWidget); 81 addDockWidget(Qt::BottomDockWidgetArea, disasmWidget);
@@ -110,7 +115,9 @@ GMainWindow::GMainWindow() : config(new Config()), emu_thread(nullptr)
110 115
111 QMenu* debug_menu = ui.menu_View->addMenu(tr("Debugging")); 116 QMenu* debug_menu = ui.menu_View->addMenu(tr("Debugging"));
112 debug_menu->addAction(profilerWidget->toggleViewAction()); 117 debug_menu->addAction(profilerWidget->toggleViewAction());
118#if MICROPROFILE_ENABLED
113 debug_menu->addAction(microProfileDialog->toggleViewAction()); 119 debug_menu->addAction(microProfileDialog->toggleViewAction());
120#endif
114 debug_menu->addAction(disasmWidget->toggleViewAction()); 121 debug_menu->addAction(disasmWidget->toggleViewAction());
115 debug_menu->addAction(registersWidget->toggleViewAction()); 122 debug_menu->addAction(registersWidget->toggleViewAction());
116 debug_menu->addAction(callstackWidget->toggleViewAction()); 123 debug_menu->addAction(callstackWidget->toggleViewAction());
@@ -136,8 +143,10 @@ GMainWindow::GMainWindow() : config(new Config()), emu_thread(nullptr)
136 restoreGeometry(UISettings::values.geometry); 143 restoreGeometry(UISettings::values.geometry);
137 restoreState(UISettings::values.state); 144 restoreState(UISettings::values.state);
138 render_window->restoreGeometry(UISettings::values.renderwindow_geometry); 145 render_window->restoreGeometry(UISettings::values.renderwindow_geometry);
146#if MICROPROFILE_ENABLED
139 microProfileDialog->restoreGeometry(UISettings::values.microprofile_geometry); 147 microProfileDialog->restoreGeometry(UISettings::values.microprofile_geometry);
140 microProfileDialog->setVisible(UISettings::values.microprofile_visible); 148 microProfileDialog->setVisible(UISettings::values.microprofile_visible);
149#endif
141 150
142 game_list->LoadInterfaceLayout(); 151 game_list->LoadInterfaceLayout();
143 152
@@ -234,6 +243,14 @@ bool GMainWindow::InitializeSystem() {
234 if (emu_thread != nullptr) 243 if (emu_thread != nullptr)
235 ShutdownGame(); 244 ShutdownGame();
236 245
246 render_window->MakeCurrent();
247 if (!gladLoadGL()) {
248 QMessageBox::critical(this, tr("Error while starting Citra!"),
249 tr("Failed to initialize the video core!\n\n"
250 "Please ensure that your GPU supports OpenGL 3.3 and that you have the latest graphics driver."));
251 return false;
252 }
253
237 // Initialize the core emulation 254 // Initialize the core emulation
238 System::Result system_result = System::Init(render_window); 255 System::Result system_result = System::Init(render_window);
239 if (System::Result::Success != system_result) { 256 if (System::Result::Success != system_result) {
@@ -511,9 +528,10 @@ void GMainWindow::closeEvent(QCloseEvent* event) {
511 UISettings::values.geometry = saveGeometry(); 528 UISettings::values.geometry = saveGeometry();
512 UISettings::values.state = saveState(); 529 UISettings::values.state = saveState();
513 UISettings::values.renderwindow_geometry = render_window->saveGeometry(); 530 UISettings::values.renderwindow_geometry = render_window->saveGeometry();
531#if MICROPROFILE_ENABLED
514 UISettings::values.microprofile_geometry = microProfileDialog->saveGeometry(); 532 UISettings::values.microprofile_geometry = microProfileDialog->saveGeometry();
515 UISettings::values.microprofile_visible = microProfileDialog->isVisible(); 533 UISettings::values.microprofile_visible = microProfileDialog->isVisible();
516 534#endif
517 UISettings::values.single_window_mode = ui.action_Single_Window_Mode->isChecked(); 535 UISettings::values.single_window_mode = ui.action_Single_Window_Mode->isChecked();
518 UISettings::values.display_titlebar = ui.actionDisplay_widget_title_bars->isChecked(); 536 UISettings::values.display_titlebar = ui.actionDisplay_widget_title_bars->isChecked();
519 UISettings::values.first_start = false; 537 UISettings::values.first_start = false;
diff --git a/src/citra_qt/util/util.cpp b/src/citra_qt/util/util.cpp
index 8734a8efd..2f9beb5cc 100644
--- a/src/citra_qt/util/util.cpp
+++ b/src/citra_qt/util/util.cpp
@@ -19,7 +19,7 @@ QString ReadableByteSize(qulonglong size) {
19 static const std::array<const char*, 6> units = { "B", "KiB", "MiB", "GiB", "TiB", "PiB" }; 19 static const std::array<const char*, 6> units = { "B", "KiB", "MiB", "GiB", "TiB", "PiB" };
20 if (size == 0) 20 if (size == 0)
21 return "0"; 21 return "0";
22 int digit_groups = std::min<int>((int)(std::log10(size) / std::log10(1024)), units.size()); 22 int digit_groups = std::min<int>(static_cast<int>(std::log10(size) / std::log10(1024)), static_cast<int>(units.size()));
23 return QString("%L1 %2").arg(size / std::pow(1024, digit_groups), 0, 'f', 1) 23 return QString("%L1 %2").arg(size / std::pow(1024, digit_groups), 0, 'f', 1)
24 .arg(units[digit_groups]); 24 .arg(units[digit_groups]);
25} 25}
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index c839ce173..aa6eee2a3 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -47,7 +47,6 @@ set(HEADERS
47 microprofile.h 47 microprofile.h
48 microprofileui.h 48 microprofileui.h
49 platform.h 49 platform.h
50 profiler.h
51 profiler_reporting.h 50 profiler_reporting.h
52 scm_rev.h 51 scm_rev.h
53 scope_exit.h 52 scope_exit.h
diff --git a/src/common/assert.h b/src/common/assert.h
index 6849778b7..cd9b819a9 100644
--- a/src/common/assert.h
+++ b/src/common/assert.h
@@ -39,6 +39,7 @@ static void assert_noinline_call(const Fn& fn) {
39 }); } while (0) 39 }); } while (0)
40 40
41#define UNREACHABLE() ASSERT_MSG(false, "Unreachable code!") 41#define UNREACHABLE() ASSERT_MSG(false, "Unreachable code!")
42#define UNREACHABLE_MSG(...) ASSERT_MSG(false, __VA_ARGS__)
42 43
43#ifdef _DEBUG 44#ifdef _DEBUG
44#define DEBUG_ASSERT(_a_) ASSERT(_a_) 45#define DEBUG_ASSERT(_a_) ASSERT(_a_)
@@ -49,3 +50,4 @@ static void assert_noinline_call(const Fn& fn) {
49#endif 50#endif
50 51
51#define UNIMPLEMENTED() DEBUG_ASSERT_MSG(false, "Unimplemented code!") 52#define UNIMPLEMENTED() DEBUG_ASSERT_MSG(false, "Unimplemented code!")
53#define UNIMPLEMENTED_MSG(_a_, ...) ASSERT_MSG(false, _a_, __VA_ARGS__) \ No newline at end of file
diff --git a/src/common/bit_field.h b/src/common/bit_field.h
index 371eb17a1..4748999ed 100644
--- a/src/common/bit_field.h
+++ b/src/common/bit_field.h
@@ -186,5 +186,5 @@ private:
186#pragma pack() 186#pragma pack()
187 187
188#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER) 188#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER)
189static_assert(std::is_trivially_copyable<BitField<0, 1, u32>>::value, "BitField must be trivially copyable"); 189static_assert(std::is_trivially_copyable<BitField<0, 1, unsigned>>::value, "BitField must be trivially copyable");
190#endif 190#endif
diff --git a/src/common/bit_set.h b/src/common/bit_set.h
index 85f91e786..7f5de8df2 100644
--- a/src/common/bit_set.h
+++ b/src/common/bit_set.h
@@ -7,6 +7,7 @@
7#include <intrin.h> 7#include <intrin.h>
8#endif 8#endif
9#include <initializer_list> 9#include <initializer_list>
10#include <new>
10#include <type_traits> 11#include <type_traits>
11#include "common/common_types.h" 12#include "common/common_types.h"
12 13
@@ -186,4 +187,4 @@ public:
186typedef Common::BitSet<u8> BitSet8; 187typedef Common::BitSet<u8> BitSet8;
187typedef Common::BitSet<u16> BitSet16; 188typedef Common::BitSet<u16> BitSet16;
188typedef Common::BitSet<u32> BitSet32; 189typedef Common::BitSet<u32> BitSet32;
189typedef Common::BitSet<u64> BitSet64; \ No newline at end of file 190typedef Common::BitSet<u64> BitSet64;
diff --git a/src/common/code_block.h b/src/common/code_block.h
index 9ef7296d3..2fa4a0090 100644
--- a/src/common/code_block.h
+++ b/src/common/code_block.h
@@ -4,8 +4,10 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "common_types.h" 7#include <cstddef>
8#include "memory_util.h" 8
9#include "common/common_types.h"
10#include "common/memory_util.h"
9 11
10// Everything that needs to generate code should inherit from this. 12// Everything that needs to generate code should inherit from this.
11// You get memory management for free, plus, you can use all emitter functions without 13// You get memory management for free, plus, you can use all emitter functions without
diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h
index aa6aff7b9..ab3515683 100644
--- a/src/common/common_funcs.h
+++ b/src/common/common_funcs.h
@@ -4,6 +4,10 @@
4 4
5#pragma once 5#pragma once
6 6
7#if !defined(ARCHITECTURE_x86_64) && !defined(_M_ARM)
8#include <cstdlib> // for exit
9#endif
10
7#include "common_types.h" 11#include "common_types.h"
8 12
9#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) 13#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
diff --git a/src/common/file_util.cpp b/src/common/file_util.cpp
index 53700c865..6e2867658 100644
--- a/src/common/file_util.cpp
+++ b/src/common/file_util.cpp
@@ -69,9 +69,10 @@ static void StripTailDirSlashes(std::string &fname)
69{ 69{
70 if (fname.length() > 1) 70 if (fname.length() > 1)
71 { 71 {
72 size_t i = fname.length() - 1; 72 size_t i = fname.length();
73 while (fname[i] == DIR_SEP_CHR) 73 while (i > 0 && fname[i - 1] == DIR_SEP_CHR)
74 fname[i--] = '\0'; 74 --i;
75 fname.resize(i);
75 } 76 }
76 return; 77 return;
77} 78}
@@ -85,6 +86,10 @@ bool Exists(const std::string &filename)
85 StripTailDirSlashes(copy); 86 StripTailDirSlashes(copy);
86 87
87#ifdef _WIN32 88#ifdef _WIN32
89 // Windows needs a slash to identify a driver root
90 if (copy.size() != 0 && copy.back() == ':')
91 copy += DIR_SEP_CHR;
92
88 int result = _wstat64(Common::UTF8ToUTF16W(copy).c_str(), &file_info); 93 int result = _wstat64(Common::UTF8ToUTF16W(copy).c_str(), &file_info);
89#else 94#else
90 int result = stat64(copy.c_str(), &file_info); 95 int result = stat64(copy.c_str(), &file_info);
@@ -102,6 +107,10 @@ bool IsDirectory(const std::string &filename)
102 StripTailDirSlashes(copy); 107 StripTailDirSlashes(copy);
103 108
104#ifdef _WIN32 109#ifdef _WIN32
110 // Windows needs a slash to identify a driver root
111 if (copy.size() != 0 && copy.back() == ':')
112 copy += DIR_SEP_CHR;
113
105 int result = _wstat64(Common::UTF8ToUTF16W(copy).c_str(), &file_info); 114 int result = _wstat64(Common::UTF8ToUTF16W(copy).c_str(), &file_info);
106#else 115#else
107 int result = stat64(copy.c_str(), &file_info); 116 int result = stat64(copy.c_str(), &file_info);
diff --git a/src/common/file_util.h b/src/common/file_util.h
index b54a9fb72..c6a8694ce 100644
--- a/src/common/file_util.h
+++ b/src/common/file_util.h
@@ -7,9 +7,9 @@
7#include <array> 7#include <array>
8#include <fstream> 8#include <fstream>
9#include <functional> 9#include <functional>
10#include <cstddef>
11#include <cstdio> 10#include <cstdio>
12#include <string> 11#include <string>
12#include <type_traits>
13#include <vector> 13#include <vector>
14 14
15#include "common/common_types.h" 15#include "common/common_types.h"
@@ -192,7 +192,9 @@ public:
192 size_t ReadArray(T* data, size_t length) 192 size_t ReadArray(T* data, size_t length)
193 { 193 {
194 static_assert(std::is_standard_layout<T>(), "Given array does not consist of standard layout objects"); 194 static_assert(std::is_standard_layout<T>(), "Given array does not consist of standard layout objects");
195#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER)
195 static_assert(std::is_trivially_copyable<T>(), "Given array does not consist of trivially copyable objects"); 196 static_assert(std::is_trivially_copyable<T>(), "Given array does not consist of trivially copyable objects");
197#endif
196 198
197 if (!IsOpen()) { 199 if (!IsOpen()) {
198 m_good = false; 200 m_good = false;
@@ -210,7 +212,9 @@ public:
210 size_t WriteArray(const T* data, size_t length) 212 size_t WriteArray(const T* data, size_t length)
211 { 213 {
212 static_assert(std::is_standard_layout<T>(), "Given array does not consist of standard layout objects"); 214 static_assert(std::is_standard_layout<T>(), "Given array does not consist of standard layout objects");
215#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER)
213 static_assert(std::is_trivially_copyable<T>(), "Given array does not consist of trivially copyable objects"); 216 static_assert(std::is_trivially_copyable<T>(), "Given array does not consist of trivially copyable objects");
217#endif
214 218
215 if (!IsOpen()) { 219 if (!IsOpen()) {
216 m_good = false; 220 m_good = false;
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp
index 3d39f94d5..d7008fc66 100644
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -65,6 +65,7 @@ namespace Log {
65 SUB(Render, OpenGL) \ 65 SUB(Render, OpenGL) \
66 CLS(Audio) \ 66 CLS(Audio) \
67 SUB(Audio, DSP) \ 67 SUB(Audio, DSP) \
68 SUB(Audio, Sink) \
68 CLS(Loader) 69 CLS(Loader)
69 70
70// GetClassName is a macro defined by Windows.h, grrr... 71// GetClassName is a macro defined by Windows.h, grrr...
diff --git a/src/common/logging/log.h b/src/common/logging/log.h
index 521362317..c6910b1c7 100644
--- a/src/common/logging/log.h
+++ b/src/common/logging/log.h
@@ -78,8 +78,9 @@ enum class Class : ClassType {
78 Render, ///< Emulator video output and hardware acceleration 78 Render, ///< Emulator video output and hardware acceleration
79 Render_Software, ///< Software renderer backend 79 Render_Software, ///< Software renderer backend
80 Render_OpenGL, ///< OpenGL backend 80 Render_OpenGL, ///< OpenGL backend
81 Audio, ///< Emulator audio output 81 Audio, ///< Audio emulation
82 Audio_DSP, ///< The HLE implementation of the DSP 82 Audio_DSP, ///< The HLE implementation of the DSP
83 Audio_Sink, ///< Emulator audio output backend
83 Loader, ///< ROM loader 84 Loader, ///< ROM loader
84 85
85 Count ///< Total number of logging classes 86 Count ///< Total number of logging classes
diff --git a/src/common/microprofile.h b/src/common/microprofile.h
index d3b6cb97c..ef312c6e1 100644
--- a/src/common/microprofile.h
+++ b/src/common/microprofile.h
@@ -4,6 +4,10 @@
4 4
5#pragma once 5#pragma once
6 6
7// Uncomment this to disable microprofile. This will get you cleaner profiles when using
8// external sampling profilers like "Very Sleepy", and will improve performance somewhat.
9// #define MICROPROFILE_ENABLED 0
10
7// Customized Citra settings. 11// Customized Citra settings.
8// This file wraps the MicroProfile header so that these are consistent everywhere. 12// This file wraps the MicroProfile header so that these are consistent everywhere.
9#define MICROPROFILE_WEBSERVER 0 13#define MICROPROFILE_WEBSERVER 0
diff --git a/src/common/microprofileui.h b/src/common/microprofileui.h
index 97c369bd9..41abe6b75 100644
--- a/src/common/microprofileui.h
+++ b/src/common/microprofileui.h
@@ -13,4 +13,7 @@
13#define MICROPROFILE_HELP_ALT "Right-Click" 13#define MICROPROFILE_HELP_ALT "Right-Click"
14#define MICROPROFILE_HELP_MOD "Ctrl" 14#define MICROPROFILE_HELP_MOD "Ctrl"
15 15
16// This isn't included by microprofileui.h :(
17#include <cstdlib> // For std::abs
18
16#include <microprofileui.h> 19#include <microprofileui.h>
diff --git a/src/common/profiler.cpp b/src/common/profiler.cpp
index 7792edd2f..49eb3f40c 100644
--- a/src/common/profiler.cpp
+++ b/src/common/profiler.cpp
@@ -7,71 +7,16 @@
7#include <vector> 7#include <vector>
8 8
9#include "common/assert.h" 9#include "common/assert.h"
10#include "common/profiler.h"
11#include "common/profiler_reporting.h" 10#include "common/profiler_reporting.h"
12#include "common/synchronized_wrapper.h" 11#include "common/synchronized_wrapper.h"
13 12
14#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013.
15 #define WIN32_LEAN_AND_MEAN
16 #include <Windows.h> // For QueryPerformanceCounter/Frequency
17#endif
18
19namespace Common { 13namespace Common {
20namespace Profiling { 14namespace Profiling {
21 15
22#if ENABLE_PROFILING
23thread_local Timer* Timer::current_timer = nullptr;
24#endif
25
26#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013
27QPCClock::time_point QPCClock::now() {
28 static LARGE_INTEGER freq;
29 // Use this dummy local static to ensure this gets initialized once.
30 static BOOL dummy = QueryPerformanceFrequency(&freq);
31
32 LARGE_INTEGER ticks;
33 QueryPerformanceCounter(&ticks);
34
35 // This is prone to overflow when multiplying, which is why I'm using micro instead of nano. The
36 // correct way to approach this would be to just return ticks as a time_point and then subtract
37 // and do this conversion when creating a duration from two time_points, however, as far as I
38 // could tell the C++ requirements for these types are incompatible with this approach.
39 return time_point(duration(ticks.QuadPart * std::micro::den / freq.QuadPart));
40}
41#endif
42
43TimingCategory::TimingCategory(const char* name, TimingCategory* parent)
44 : accumulated_duration(0) {
45
46 ProfilingManager& manager = GetProfilingManager();
47 category_id = manager.RegisterTimingCategory(this, name);
48 if (parent != nullptr)
49 manager.SetTimingCategoryParent(category_id, parent->category_id);
50}
51
52ProfilingManager::ProfilingManager() 16ProfilingManager::ProfilingManager()
53 : last_frame_end(Clock::now()), this_frame_start(Clock::now()) { 17 : last_frame_end(Clock::now()), this_frame_start(Clock::now()) {
54} 18}
55 19
56unsigned int ProfilingManager::RegisterTimingCategory(TimingCategory* category, const char* name) {
57 TimingCategoryInfo info;
58 info.category = category;
59 info.name = name;
60 info.parent = TimingCategoryInfo::NO_PARENT;
61
62 unsigned int id = (unsigned int)timing_categories.size();
63 timing_categories.push_back(std::move(info));
64
65 return id;
66}
67
68void ProfilingManager::SetTimingCategoryParent(unsigned int category, unsigned int parent) {
69 ASSERT(category < timing_categories.size());
70 ASSERT(parent < timing_categories.size());
71
72 timing_categories[category].parent = parent;
73}
74
75void ProfilingManager::BeginFrame() { 20void ProfilingManager::BeginFrame() {
76 this_frame_start = Clock::now(); 21 this_frame_start = Clock::now();
77} 22}
@@ -82,11 +27,6 @@ void ProfilingManager::FinishFrame() {
82 results.interframe_time = now - last_frame_end; 27 results.interframe_time = now - last_frame_end;
83 results.frame_time = now - this_frame_start; 28 results.frame_time = now - this_frame_start;
84 29
85 results.time_per_category.resize(timing_categories.size());
86 for (size_t i = 0; i < timing_categories.size(); ++i) {
87 results.time_per_category[i] = timing_categories[i].category->GetAccumulatedTime();
88 }
89
90 last_frame_end = now; 30 last_frame_end = now;
91} 31}
92 32
@@ -100,26 +40,9 @@ void TimingResultsAggregator::Clear() {
100 window_size = cursor = 0; 40 window_size = cursor = 0;
101} 41}
102 42
103void TimingResultsAggregator::SetNumberOfCategories(size_t n) {
104 size_t old_size = times_per_category.size();
105 if (n == old_size)
106 return;
107
108 times_per_category.resize(n);
109
110 for (size_t i = old_size; i < n; ++i) {
111 times_per_category[i].resize(max_window_size, Duration::zero());
112 }
113}
114
115void TimingResultsAggregator::AddFrame(const ProfilingFrameResult& frame_result) { 43void TimingResultsAggregator::AddFrame(const ProfilingFrameResult& frame_result) {
116 SetNumberOfCategories(frame_result.time_per_category.size());
117
118 interframe_times[cursor] = frame_result.interframe_time; 44 interframe_times[cursor] = frame_result.interframe_time;
119 frame_times[cursor] = frame_result.frame_time; 45 frame_times[cursor] = frame_result.frame_time;
120 for (size_t i = 0; i < frame_result.time_per_category.size(); ++i) {
121 times_per_category[i][cursor] = frame_result.time_per_category[i];
122 }
123 46
124 ++cursor; 47 ++cursor;
125 if (cursor == max_window_size) 48 if (cursor == max_window_size)
@@ -162,11 +85,6 @@ AggregatedFrameResult TimingResultsAggregator::GetAggregatedResults() const {
162 result.fps = 0.0f; 85 result.fps = 0.0f;
163 } 86 }
164 87
165 result.time_per_category.resize(times_per_category.size());
166 for (size_t i = 0; i < times_per_category.size(); ++i) {
167 result.time_per_category[i] = AggregateField(times_per_category[i], window_size);
168 }
169
170 return result; 88 return result;
171} 89}
172 90
diff --git a/src/common/profiler.h b/src/common/profiler.h
deleted file mode 100644
index 3e967b4bc..000000000
--- a/src/common/profiler.h
+++ /dev/null
@@ -1,152 +0,0 @@
1// Copyright 2015 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <atomic>
8#include <chrono>
9
10#include "common/assert.h"
11#include "common/thread.h"
12
13namespace Common {
14namespace Profiling {
15
16// If this is defined to 0, it turns all Timers into no-ops.
17#ifndef ENABLE_PROFILING
18#define ENABLE_PROFILING 1
19#endif
20
21#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013
22// MSVC up to 2013 doesn't use QueryPerformanceCounter for high_resolution_clock, so it has bad
23// precision. We manually implement a clock based on QPC to get good results.
24
25struct QPCClock {
26 using duration = std::chrono::microseconds;
27 using time_point = std::chrono::time_point<QPCClock>;
28 using rep = duration::rep;
29 using period = duration::period;
30 static const bool is_steady = false;
31
32 static time_point now();
33};
34
35using Clock = QPCClock;
36#else
37using Clock = std::chrono::high_resolution_clock;
38#endif
39
40using Duration = Clock::duration;
41
42/**
43 * Represents a timing category that measured time can be accounted towards. Should be declared as a
44 * global variable and passed to Timers.
45 */
46class TimingCategory final {
47public:
48 TimingCategory(const char* name, TimingCategory* parent = nullptr);
49
50 unsigned int GetCategoryId() const {
51 return category_id;
52 }
53
54 /// Adds some time to this category. Can safely be called from multiple threads at the same time.
55 void AddTime(Duration amount) {
56 std::atomic_fetch_add_explicit(
57 &accumulated_duration, amount.count(),
58 std::memory_order_relaxed);
59 }
60
61 /**
62 * Atomically retrieves the accumulated measured time for this category and resets the counter
63 * to zero. Can be safely called concurrently with AddTime.
64 */
65 Duration GetAccumulatedTime() {
66 return Duration(std::atomic_exchange_explicit(
67 &accumulated_duration, (Duration::rep)0,
68 std::memory_order_relaxed));
69 }
70
71private:
72 unsigned int category_id;
73 std::atomic<Duration::rep> accumulated_duration;
74};
75
76/**
77 * Measures time elapsed between a call to Start and a call to Stop and attributes it to the given
78 * TimingCategory. Start/Stop can be called multiple times on the same timer, but each call must be
79 * appropriately paired.
80 *
81 * When a Timer is started, it automatically pauses a previously running timer on the same thread,
82 * which is resumed when it is stopped. As such, no special action needs to be taken to avoid
83 * double-accounting of time on two categories.
84 */
85class Timer {
86public:
87 Timer(TimingCategory& category) : category(category) {
88 }
89
90 void Start() {
91#if ENABLE_PROFILING
92 ASSERT(!running);
93 previous_timer = current_timer;
94 current_timer = this;
95 if (previous_timer != nullptr)
96 previous_timer->StopTiming();
97
98 StartTiming();
99#endif
100 }
101
102 void Stop() {
103#if ENABLE_PROFILING
104 ASSERT(running);
105 StopTiming();
106
107 if (previous_timer != nullptr)
108 previous_timer->StartTiming();
109 current_timer = previous_timer;
110#endif
111 }
112
113private:
114#if ENABLE_PROFILING
115 void StartTiming() {
116 start = Clock::now();
117 running = true;
118 }
119
120 void StopTiming() {
121 auto duration = Clock::now() - start;
122 running = false;
123 category.AddTime(std::chrono::duration_cast<Duration>(duration));
124 }
125
126 Clock::time_point start;
127 bool running = false;
128
129 Timer* previous_timer;
130 static thread_local Timer* current_timer;
131#endif
132
133 TimingCategory& category;
134};
135
136/**
137 * A Timer that automatically starts timing when created and stops at the end of the scope. Should
138 * be used in the majority of cases.
139 */
140class ScopeTimer : public Timer {
141public:
142 ScopeTimer(TimingCategory& category) : Timer(category) {
143 Start();
144 }
145
146 ~ScopeTimer() {
147 Stop();
148 }
149};
150
151} // namespace Profiling
152} // namespace Common
diff --git a/src/common/profiler_reporting.h b/src/common/profiler_reporting.h
index df98e05b7..fa1ac883f 100644
--- a/src/common/profiler_reporting.h
+++ b/src/common/profiler_reporting.h
@@ -4,22 +4,17 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <chrono>
7#include <cstddef> 8#include <cstddef>
8#include <vector> 9#include <vector>
9 10
10#include "common/profiler.h"
11#include "common/synchronized_wrapper.h" 11#include "common/synchronized_wrapper.h"
12 12
13namespace Common { 13namespace Common {
14namespace Profiling { 14namespace Profiling {
15 15
16struct TimingCategoryInfo { 16using Clock = std::chrono::high_resolution_clock;
17 static const unsigned int NO_PARENT = -1; 17using Duration = Clock::duration;
18
19 TimingCategory* category;
20 const char* name;
21 unsigned int parent;
22};
23 18
24struct ProfilingFrameResult { 19struct ProfilingFrameResult {
25 /// Time since the last delivered frame 20 /// Time since the last delivered frame
@@ -27,22 +22,12 @@ struct ProfilingFrameResult {
27 22
28 /// Time spent processing a frame, excluding VSync 23 /// Time spent processing a frame, excluding VSync
29 Duration frame_time; 24 Duration frame_time;
30
31 /// Total amount of time spent inside each category in this frame. Indexed by the category id
32 std::vector<Duration> time_per_category;
33}; 25};
34 26
35class ProfilingManager final { 27class ProfilingManager final {
36public: 28public:
37 ProfilingManager(); 29 ProfilingManager();
38 30
39 unsigned int RegisterTimingCategory(TimingCategory* category, const char* name);
40 void SetTimingCategoryParent(unsigned int category, unsigned int parent);
41
42 const std::vector<TimingCategoryInfo>& GetTimingCategoriesInfo() const {
43 return timing_categories;
44 }
45
46 /// This should be called after swapping screen buffers. 31 /// This should be called after swapping screen buffers.
47 void BeginFrame(); 32 void BeginFrame();
48 /// This should be called before swapping screen buffers. 33 /// This should be called before swapping screen buffers.
@@ -54,7 +39,6 @@ public:
54 } 39 }
55 40
56private: 41private:
57 std::vector<TimingCategoryInfo> timing_categories;
58 Clock::time_point last_frame_end; 42 Clock::time_point last_frame_end;
59 Clock::time_point this_frame_start; 43 Clock::time_point this_frame_start;
60 44
@@ -73,9 +57,6 @@ struct AggregatedFrameResult {
73 AggregatedDuration frame_time; 57 AggregatedDuration frame_time;
74 58
75 float fps; 59 float fps;
76
77 /// Total amount of time spent inside each category in this frame. Indexed by the category id
78 std::vector<AggregatedDuration> time_per_category;
79}; 60};
80 61
81class TimingResultsAggregator final { 62class TimingResultsAggregator final {
@@ -83,7 +64,6 @@ public:
83 TimingResultsAggregator(size_t window_size); 64 TimingResultsAggregator(size_t window_size);
84 65
85 void Clear(); 66 void Clear();
86 void SetNumberOfCategories(size_t n);
87 67
88 void AddFrame(const ProfilingFrameResult& frame_result); 68 void AddFrame(const ProfilingFrameResult& frame_result);
89 69
@@ -95,7 +75,6 @@ public:
95 75
96 std::vector<Duration> interframe_times; 76 std::vector<Duration> interframe_times;
97 std::vector<Duration> frame_times; 77 std::vector<Duration> frame_times;
98 std::vector<std::vector<Duration>> times_per_category;
99}; 78};
100 79
101ProfilingManager& GetProfilingManager(); 80ProfilingManager& GetProfilingManager();
diff --git a/src/common/swap.h b/src/common/swap.h
index a7c37bc44..1749bd7a4 100644
--- a/src/common/swap.h
+++ b/src/common/swap.h
@@ -25,6 +25,8 @@
25 #include <sys/endian.h> 25 #include <sys/endian.h>
26#endif 26#endif
27 27
28#include <cstring>
29
28#include "common/common_types.h" 30#include "common/common_types.h"
29 31
30// GCC 4.6+ 32// GCC 4.6+
@@ -58,9 +60,6 @@
58 60
59namespace Common { 61namespace Common {
60 62
61inline u8 swap8(u8 _data) {return _data;}
62inline u32 swap24(const u8* _data) {return (_data[0] << 16) | (_data[1] << 8) | _data[2];}
63
64#ifdef _MSC_VER 63#ifdef _MSC_VER
65inline u16 swap16(u16 _data) {return _byteswap_ushort(_data);} 64inline u16 swap16(u16 _data) {return _byteswap_ushort(_data);}
66inline u32 swap32(u32 _data) {return _byteswap_ulong (_data);} 65inline u32 swap32(u32 _data) {return _byteswap_ulong (_data);}
@@ -92,52 +91,29 @@ inline u64 swap64(u64 data) {return ((u64)swap32(data) << 32) | swap32(data >> 3
92#endif 91#endif
93 92
94inline float swapf(float f) { 93inline float swapf(float f) {
95 union { 94 static_assert(sizeof(u32) == sizeof(float),
96 float f; 95 "float must be the same size as uint32_t.");
97 unsigned int u32;
98 } dat1, dat2;
99
100 dat1.f = f;
101 dat2.u32 = swap32(dat1.u32);
102 96
103 return dat2.f; 97 u32 value;
104} 98 std::memcpy(&value, &f, sizeof(u32));
105
106inline double swapd(double f) {
107 union {
108 double f;
109 unsigned long long u64;
110 } dat1, dat2;
111 99
112 dat1.f = f; 100 value = swap32(value);
113 dat2.u64 = swap64(dat1.u64); 101 std::memcpy(&f, &value, sizeof(u32));
114 102
115 return dat2.f; 103 return f;
116} 104}
117 105
118inline u16 swap16(const u8* _pData) {return swap16(*(const u16*)_pData);} 106inline double swapd(double f) {
119inline u32 swap32(const u8* _pData) {return swap32(*(const u32*)_pData);} 107 static_assert(sizeof(u64) == sizeof(double),
120inline u64 swap64(const u8* _pData) {return swap64(*(const u64*)_pData);} 108 "double must be the same size as uint64_t.");
121
122template <int count>
123void swap(u8*);
124 109
125template <> 110 u64 value;
126inline void swap<1>(u8* data) { } 111 std::memcpy(&value, &f, sizeof(u64));
127 112
128template <> 113 value = swap64(value);
129inline void swap<2>(u8* data) { 114 std::memcpy(&f, &value, sizeof(u64));
130 *reinterpret_cast<u16*>(data) = swap16(data);
131}
132
133template <>
134inline void swap<4>(u8* data) {
135 *reinterpret_cast<u32*>(data) = swap32(data);
136}
137 115
138template <> 116 return f;
139inline void swap<8>(u8* data) {
140 *reinterpret_cast<u64*>(data) = swap64(data);
141} 117}
142 118
143} // Namespace Common 119} // Namespace Common
@@ -534,35 +510,35 @@ bool operator==(const S &p, const swap_struct_t<T, F> v) {
534template <typename T> 510template <typename T>
535struct swap_64_t { 511struct swap_64_t {
536 static T swap(T x) { 512 static T swap(T x) {
537 return (T)Common::swap64(*(u64 *)&x); 513 return static_cast<T>(Common::swap64(x));
538 } 514 }
539}; 515};
540 516
541template <typename T> 517template <typename T>
542struct swap_32_t { 518struct swap_32_t {
543 static T swap(T x) { 519 static T swap(T x) {
544 return (T)Common::swap32(*(u32 *)&x); 520 return static_cast<T>(Common::swap32(x));
545 } 521 }
546}; 522};
547 523
548template <typename T> 524template <typename T>
549struct swap_16_t { 525struct swap_16_t {
550 static T swap(T x) { 526 static T swap(T x) {
551 return (T)Common::swap16(*(u16 *)&x); 527 return static_cast<T>(Common::swap16(x));
552 } 528 }
553}; 529};
554 530
555template <typename T> 531template <typename T>
556struct swap_float_t { 532struct swap_float_t {
557 static T swap(T x) { 533 static T swap(T x) {
558 return (T)Common::swapf(*(float *)&x); 534 return static_cast<T>(Common::swapf(x));
559 } 535 }
560}; 536};
561 537
562template <typename T> 538template <typename T>
563struct swap_double_t { 539struct swap_double_t {
564 static T swap(T x) { 540 static T swap(T x) {
565 return (T)Common::swapd(*(double *)&x); 541 return static_cast<T>(Common::swapd(x));
566 } 542 }
567}; 543};
568 544
diff --git a/src/common/x64/emitter.h b/src/common/x64/emitter.h
index a33724146..60a77dfe1 100644
--- a/src/common/x64/emitter.h
+++ b/src/common/x64/emitter.h
@@ -17,6 +17,8 @@
17 17
18#pragma once 18#pragma once
19 19
20#include <cstddef>
21
20#include "common/assert.h" 22#include "common/assert.h"
21#include "common/bit_set.h" 23#include "common/bit_set.h"
22#include "common/common_types.h" 24#include "common/common_types.h"
diff --git a/src/core/arm/dyncom/arm_dyncom.cpp b/src/core/arm/dyncom/arm_dyncom.cpp
index a3581132c..13492a08b 100644
--- a/src/core/arm/dyncom/arm_dyncom.cpp
+++ b/src/core/arm/dyncom/arm_dyncom.cpp
@@ -93,7 +93,7 @@ void ARM_DynCom::ResetContext(Core::ThreadContext& context, u32 stack_top, u32 e
93 context.cpu_registers[0] = arg; 93 context.cpu_registers[0] = arg;
94 context.pc = entry_point; 94 context.pc = entry_point;
95 context.sp = stack_top; 95 context.sp = stack_top;
96 context.cpsr = 0x1F | ((entry_point & 1) << 5); // Usermode and THUMB mode 96 context.cpsr = USER32MODE | ((entry_point & 1) << 5); // Usermode and THUMB mode
97} 97}
98 98
99void ARM_DynCom::SaveContext(Core::ThreadContext& ctx) { 99void ARM_DynCom::SaveContext(Core::ThreadContext& ctx) {
diff --git a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
index 647784208..8d4b26815 100644
--- a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
+++ b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
@@ -10,7 +10,6 @@
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "common/logging/log.h" 11#include "common/logging/log.h"
12#include "common/microprofile.h" 12#include "common/microprofile.h"
13#include "common/profiler.h"
14 13
15#include "core/memory.h" 14#include "core/memory.h"
16#include "core/hle/svc.h" 15#include "core/hle/svc.h"
@@ -25,9 +24,6 @@
25 24
26#include "core/gdbstub/gdbstub.h" 25#include "core/gdbstub/gdbstub.h"
27 26
28Common::Profiling::TimingCategory profile_execute("DynCom::Execute");
29Common::Profiling::TimingCategory profile_decode("DynCom::Decode");
30
31enum { 27enum {
32 COND = (1 << 0), 28 COND = (1 << 0),
33 NON_BRANCH = (1 << 1), 29 NON_BRANCH = (1 << 1),
@@ -3496,7 +3492,6 @@ static unsigned int InterpreterTranslateInstruction(const ARMul_State* cpu, cons
3496} 3492}
3497 3493
3498static int InterpreterTranslateBlock(ARMul_State* cpu, int& bb_start, u32 addr) { 3494static int InterpreterTranslateBlock(ARMul_State* cpu, int& bb_start, u32 addr) {
3499 Common::Profiling::ScopeTimer timer_decode(profile_decode);
3500 MICROPROFILE_SCOPE(DynCom_Decode); 3495 MICROPROFILE_SCOPE(DynCom_Decode);
3501 3496
3502 // Decode instruction, get index 3497 // Decode instruction, get index
@@ -3530,7 +3525,6 @@ static int InterpreterTranslateBlock(ARMul_State* cpu, int& bb_start, u32 addr)
3530} 3525}
3531 3526
3532static int InterpreterTranslateSingle(ARMul_State* cpu, int& bb_start, u32 addr) { 3527static int InterpreterTranslateSingle(ARMul_State* cpu, int& bb_start, u32 addr) {
3533 Common::Profiling::ScopeTimer timer_decode(profile_decode);
3534 MICROPROFILE_SCOPE(DynCom_Decode); 3528 MICROPROFILE_SCOPE(DynCom_Decode);
3535 3529
3536 ARM_INST_PTR inst_base = nullptr; 3530 ARM_INST_PTR inst_base = nullptr;
@@ -3565,7 +3559,6 @@ static int clz(unsigned int x) {
3565MICROPROFILE_DEFINE(DynCom_Execute, "DynCom", "Execute", MP_RGB(255, 0, 0)); 3559MICROPROFILE_DEFINE(DynCom_Execute, "DynCom", "Execute", MP_RGB(255, 0, 0));
3566 3560
3567unsigned InterpreterMainLoop(ARMul_State* cpu) { 3561unsigned InterpreterMainLoop(ARMul_State* cpu) {
3568 Common::Profiling::ScopeTimer timer_execute(profile_execute);
3569 MICROPROFILE_SCOPE(DynCom_Execute); 3562 MICROPROFILE_SCOPE(DynCom_Execute);
3570 3563
3571 GDBStub::BreakpointAddress breakpoint_data; 3564 GDBStub::BreakpointAddress breakpoint_data;
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 3bb843aab..cabab744a 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -51,7 +51,7 @@ void RunLoop(int tight_loop) {
51 } 51 }
52 52
53 HW::Update(); 53 HW::Update();
54 if (HLE::g_reschedule) { 54 if (HLE::IsReschedulePending()) {
55 Kernel::Reschedule(); 55 Kernel::Reschedule();
56 } 56 }
57} 57}
diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp
index c1a7ec5bf..820b19e1a 100644
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -374,7 +374,7 @@ static void SendReply(const char* reply) {
374 374
375 memset(command_buffer, 0, sizeof(command_buffer)); 375 memset(command_buffer, 0, sizeof(command_buffer));
376 376
377 command_length = strlen(reply); 377 command_length = static_cast<u32>(strlen(reply));
378 if (command_length + 4 > sizeof(command_buffer)) { 378 if (command_length + 4 > sizeof(command_buffer)) {
379 LOG_ERROR(Debug_GDBStub, "command_buffer overflow in SendReply"); 379 LOG_ERROR(Debug_GDBStub, "command_buffer overflow in SendReply");
380 return; 380 return;
@@ -437,7 +437,7 @@ static void HandleSetThread() {
437 * 437 *
438 * @param signal Signal to be sent to client. 438 * @param signal Signal to be sent to client.
439 */ 439 */
440void SendSignal(u32 signal) { 440static void SendSignal(u32 signal) {
441 if (gdbserver_socket == -1) { 441 if (gdbserver_socket == -1) {
442 return; 442 return;
443 } 443 }
@@ -515,7 +515,7 @@ static bool IsDataAvailable() {
515 return false; 515 return false;
516 } 516 }
517 517
518 return FD_ISSET(gdbserver_socket, &fd_socket); 518 return FD_ISSET(gdbserver_socket, &fd_socket) != 0;
519} 519}
520 520
521/// Send requested register to gdb client. 521/// Send requested register to gdb client.
@@ -529,7 +529,7 @@ static void ReadRegister() {
529 id |= HexCharToValue(command_buffer[2]); 529 id |= HexCharToValue(command_buffer[2]);
530 } 530 }
531 531
532 if (id >= R0_REGISTER && id <= R15_REGISTER) { 532 if (id <= R15_REGISTER) {
533 IntToGdbHex(reply, Core::g_app_core->GetReg(id)); 533 IntToGdbHex(reply, Core::g_app_core->GetReg(id));
534 } else if (id == CPSR_REGISTER) { 534 } else if (id == CPSR_REGISTER) {
535 IntToGdbHex(reply, Core::g_app_core->GetCPSR()); 535 IntToGdbHex(reply, Core::g_app_core->GetCPSR());
@@ -584,7 +584,7 @@ static void WriteRegister() {
584 id |= HexCharToValue(command_buffer[2]); 584 id |= HexCharToValue(command_buffer[2]);
585 } 585 }
586 586
587 if (id >= R0_REGISTER && id <= R15_REGISTER) { 587 if (id <= R15_REGISTER) {
588 Core::g_app_core->SetReg(id, GdbHexToInt(buffer_ptr)); 588 Core::g_app_core->SetReg(id, GdbHexToInt(buffer_ptr));
589 } else if (id == CPSR_REGISTER) { 589 } else if (id == CPSR_REGISTER) {
590 Core::g_app_core->SetCPSR(GdbHexToInt(buffer_ptr)); 590 Core::g_app_core->SetCPSR(GdbHexToInt(buffer_ptr));
@@ -633,10 +633,10 @@ static void ReadMemory() {
633 633
634 auto start_offset = command_buffer+1; 634 auto start_offset = command_buffer+1;
635 auto addr_pos = std::find(start_offset, command_buffer+command_length, ','); 635 auto addr_pos = std::find(start_offset, command_buffer+command_length, ',');
636 PAddr addr = HexToInt(start_offset, addr_pos - start_offset); 636 PAddr addr = HexToInt(start_offset, static_cast<u32>(addr_pos - start_offset));
637 637
638 start_offset = addr_pos+1; 638 start_offset = addr_pos+1;
639 u32 len = HexToInt(start_offset, (command_buffer + command_length) - start_offset); 639 u32 len = HexToInt(start_offset, static_cast<u32>((command_buffer + command_length) - start_offset));
640 640
641 LOG_DEBUG(Debug_GDBStub, "gdb: addr: %08x len: %08x\n", addr, len); 641 LOG_DEBUG(Debug_GDBStub, "gdb: addr: %08x len: %08x\n", addr, len);
642 642
@@ -658,11 +658,11 @@ static void ReadMemory() {
658static void WriteMemory() { 658static void WriteMemory() {
659 auto start_offset = command_buffer+1; 659 auto start_offset = command_buffer+1;
660 auto addr_pos = std::find(start_offset, command_buffer+command_length, ','); 660 auto addr_pos = std::find(start_offset, command_buffer+command_length, ',');
661 PAddr addr = HexToInt(start_offset, addr_pos - start_offset); 661 PAddr addr = HexToInt(start_offset, static_cast<u32>(addr_pos - start_offset));
662 662
663 start_offset = addr_pos+1; 663 start_offset = addr_pos+1;
664 auto len_pos = std::find(start_offset, command_buffer+command_length, ':'); 664 auto len_pos = std::find(start_offset, command_buffer+command_length, ':');
665 u32 len = HexToInt(start_offset, len_pos - start_offset); 665 u32 len = HexToInt(start_offset, static_cast<u32>(len_pos - start_offset));
666 666
667 u8* dst = Memory::GetPointer(addr); 667 u8* dst = Memory::GetPointer(addr);
668 if (!dst) { 668 if (!dst) {
@@ -713,7 +713,7 @@ static void Continue() {
713 * @param addr Address of breakpoint. 713 * @param addr Address of breakpoint.
714 * @param len Length of breakpoint. 714 * @param len Length of breakpoint.
715 */ 715 */
716bool CommitBreakpoint(BreakpointType type, PAddr addr, u32 len) { 716static bool CommitBreakpoint(BreakpointType type, PAddr addr, u32 len) {
717 std::map<u32, Breakpoint>& p = GetBreakpointList(type); 717 std::map<u32, Breakpoint>& p = GetBreakpointList(type);
718 718
719 Breakpoint breakpoint; 719 Breakpoint breakpoint;
@@ -752,10 +752,10 @@ static void AddBreakpoint() {
752 752
753 auto start_offset = command_buffer+3; 753 auto start_offset = command_buffer+3;
754 auto addr_pos = std::find(start_offset, command_buffer+command_length, ','); 754 auto addr_pos = std::find(start_offset, command_buffer+command_length, ',');
755 PAddr addr = HexToInt(start_offset, addr_pos - start_offset); 755 PAddr addr = HexToInt(start_offset, static_cast<u32>(addr_pos - start_offset));
756 756
757 start_offset = addr_pos+1; 757 start_offset = addr_pos+1;
758 u32 len = HexToInt(start_offset, (command_buffer + command_length) - start_offset); 758 u32 len = HexToInt(start_offset, static_cast<u32>((command_buffer + command_length) - start_offset));
759 759
760 if (type == BreakpointType::Access) { 760 if (type == BreakpointType::Access) {
761 // Access is made up of Read and Write types, so add both breakpoints 761 // Access is made up of Read and Write types, so add both breakpoints
@@ -800,10 +800,10 @@ static void RemoveBreakpoint() {
800 800
801 auto start_offset = command_buffer+3; 801 auto start_offset = command_buffer+3;
802 auto addr_pos = std::find(start_offset, command_buffer+command_length, ','); 802 auto addr_pos = std::find(start_offset, command_buffer+command_length, ',');
803 PAddr addr = HexToInt(start_offset, addr_pos - start_offset); 803 PAddr addr = HexToInt(start_offset, static_cast<u32>(addr_pos - start_offset));
804 804
805 start_offset = addr_pos+1; 805 start_offset = addr_pos+1;
806 u32 len = HexToInt(start_offset, (command_buffer + command_length) - start_offset); 806 u32 len = HexToInt(start_offset, static_cast<u32>((command_buffer + command_length) - start_offset));
807 807
808 if (type == BreakpointType::Access) { 808 if (type == BreakpointType::Access) {
809 // Access is made up of Read and Write types, so add both breakpoints 809 // Access is made up of Read and Write types, so add both breakpoints
@@ -907,7 +907,7 @@ void ToggleServer(bool status) {
907 } 907 }
908} 908}
909 909
910void Init(u16 port) { 910static void Init(u16 port) {
911 if (!g_server_enabled) { 911 if (!g_server_enabled) {
912 // Set the halt loop to false in case the user enabled the gdbstub mid-execution. 912 // Set the halt loop to false in case the user enabled the gdbstub mid-execution.
913 // This way the CPU can still execute normally. 913 // This way the CPU can still execute normally.
diff --git a/src/core/hle/applets/mii_selector.cpp b/src/core/hle/applets/mii_selector.cpp
index 708d2f630..b4456ca90 100644
--- a/src/core/hle/applets/mii_selector.cpp
+++ b/src/core/hle/applets/mii_selector.cpp
@@ -21,13 +21,6 @@
21namespace HLE { 21namespace HLE {
22namespace Applets { 22namespace Applets {
23 23
24MiiSelector::MiiSelector(Service::APT::AppletId id) : Applet(id), started(false) {
25 // Create the SharedMemory that will hold the framebuffer data
26 // TODO(Subv): What size should we use here?
27 using Kernel::MemoryPermission;
28 framebuffer_memory = Kernel::SharedMemory::Create(0x1000, MemoryPermission::ReadWrite, MemoryPermission::ReadWrite, "MiiSelector Memory");
29}
30
31ResultCode MiiSelector::ReceiveParameter(const Service::APT::MessageParameter& parameter) { 24ResultCode MiiSelector::ReceiveParameter(const Service::APT::MessageParameter& parameter) {
32 if (parameter.signal != static_cast<u32>(Service::APT::SignalType::LibAppJustStarted)) { 25 if (parameter.signal != static_cast<u32>(Service::APT::SignalType::LibAppJustStarted)) {
33 LOG_ERROR(Service_APT, "unsupported signal %u", parameter.signal); 26 LOG_ERROR(Service_APT, "unsupported signal %u", parameter.signal);
@@ -36,8 +29,18 @@ ResultCode MiiSelector::ReceiveParameter(const Service::APT::MessageParameter& p
36 return ResultCode(-1); 29 return ResultCode(-1);
37 } 30 }
38 31
32 // The LibAppJustStarted message contains a buffer with the size of the framebuffer shared memory.
33 // Create the SharedMemory that will hold the framebuffer data
34 Service::APT::CaptureBufferInfo capture_info;
35 ASSERT(sizeof(capture_info) == parameter.buffer_size);
36
37 memcpy(&capture_info, parameter.data, sizeof(capture_info));
38 using Kernel::MemoryPermission;
39 framebuffer_memory = Kernel::SharedMemory::Create(capture_info.size, MemoryPermission::ReadWrite,
40 MemoryPermission::ReadWrite, "MiiSelector Memory");
41
42 // Send the response message with the newly created SharedMemory
39 Service::APT::MessageParameter result; 43 Service::APT::MessageParameter result;
40 // The buffer passed in parameter contains the data returned by GSPGPU::ImportDisplayCaptureInfo
41 result.signal = static_cast<u32>(Service::APT::SignalType::LibAppFinished); 44 result.signal = static_cast<u32>(Service::APT::SignalType::LibAppFinished);
42 result.data = nullptr; 45 result.data = nullptr;
43 result.buffer_size = 0; 46 result.buffer_size = 0;
@@ -55,6 +58,11 @@ ResultCode MiiSelector::StartImpl(const Service::APT::AppletStartupParameter& pa
55 // TODO(Subv): Set the expected fields in the response buffer before resending it to the application. 58 // TODO(Subv): Set the expected fields in the response buffer before resending it to the application.
56 // TODO(Subv): Reverse the parameter format for the Mii Selector 59 // TODO(Subv): Reverse the parameter format for the Mii Selector
57 60
61 if(parameter.buffer_size >= sizeof(u32)) {
62 // TODO: defaults return no error, but garbage in other unknown fields
63 memset(parameter.data, 0, sizeof(u32));
64 }
65
58 // Let the application know that we're closing 66 // Let the application know that we're closing
59 Service::APT::MessageParameter message; 67 Service::APT::MessageParameter message;
60 message.buffer_size = parameter.buffer_size; 68 message.buffer_size = parameter.buffer_size;
diff --git a/src/core/hle/applets/mii_selector.h b/src/core/hle/applets/mii_selector.h
index 6a3e7c8eb..be6b04642 100644
--- a/src/core/hle/applets/mii_selector.h
+++ b/src/core/hle/applets/mii_selector.h
@@ -16,17 +16,61 @@
16namespace HLE { 16namespace HLE {
17namespace Applets { 17namespace Applets {
18 18
19struct MiiConfig {
20 u8 unk_000;
21 u8 unk_001;
22 u8 unk_002;
23 u8 unk_003;
24 u8 unk_004;
25 INSERT_PADDING_BYTES(3);
26 u16 unk_008;
27 INSERT_PADDING_BYTES(0x8C - 0xA);
28 u8 unk_08C;
29 INSERT_PADDING_BYTES(3);
30 u16 unk_090;
31 INSERT_PADDING_BYTES(2);
32 u32 unk_094;
33 u16 unk_098;
34 u8 unk_09A[0x64];
35 u8 unk_0FE;
36 u8 unk_0FF;
37 u32 unk_100;
38};
39
40static_assert(sizeof(MiiConfig) == 0x104, "MiiConfig structure has incorrect size");
41#define ASSERT_REG_POSITION(field_name, position) static_assert(offsetof(MiiConfig, field_name) == position, "Field "#field_name" has invalid position")
42ASSERT_REG_POSITION(unk_008, 0x08);
43ASSERT_REG_POSITION(unk_08C, 0x8C);
44ASSERT_REG_POSITION(unk_090, 0x90);
45ASSERT_REG_POSITION(unk_094, 0x94);
46ASSERT_REG_POSITION(unk_0FE, 0xFE);
47#undef ASSERT_REG_POSITION
48
49struct MiiResult {
50 u32 result_code;
51 u8 unk_04;
52 INSERT_PADDING_BYTES(7);
53 u8 unk_0C[0x60];
54 u8 unk_6C[0x16];
55 INSERT_PADDING_BYTES(2);
56};
57static_assert(sizeof(MiiResult) == 0x84, "MiiResult structure has incorrect size");
58#define ASSERT_REG_POSITION(field_name, position) static_assert(offsetof(MiiResult, field_name) == position, "Field "#field_name" has invalid position")
59ASSERT_REG_POSITION(unk_0C, 0x0C);
60ASSERT_REG_POSITION(unk_6C, 0x6C);
61#undef ASSERT_REG_POSITION
62
19class MiiSelector final : public Applet { 63class MiiSelector final : public Applet {
20public: 64public:
21 MiiSelector(Service::APT::AppletId id); 65 MiiSelector(Service::APT::AppletId id) : Applet(id), started(false) { }
22 66
23 ResultCode ReceiveParameter(const Service::APT::MessageParameter& parameter) override; 67 ResultCode ReceiveParameter(const Service::APT::MessageParameter& parameter) override;
24 ResultCode StartImpl(const Service::APT::AppletStartupParameter& parameter) override; 68 ResultCode StartImpl(const Service::APT::AppletStartupParameter& parameter) override;
25 void Update() override; 69 void Update() override;
26 bool IsRunning() const override { return started; } 70 bool IsRunning() const override { return started; }
27 71
28 /// TODO(Subv): Find out what this is actually used for. 72 /// This SharedMemory will be created when we receive the LibAppJustStarted message.
29 /// It is believed that the application stores the current screen image here. 73 /// It holds the framebuffer info retrieved by the application with GSPGPU::ImportDisplayCaptureInfo
30 Kernel::SharedPtr<Kernel::SharedMemory> framebuffer_memory; 74 Kernel::SharedPtr<Kernel::SharedMemory> framebuffer_memory;
31 75
32 /// Whether this applet is currently running instead of the host application or not. 76 /// Whether this applet is currently running instead of the host application or not.
diff --git a/src/core/hle/applets/swkbd.cpp b/src/core/hle/applets/swkbd.cpp
index 1db6b5a17..87238aa1c 100644
--- a/src/core/hle/applets/swkbd.cpp
+++ b/src/core/hle/applets/swkbd.cpp
@@ -24,13 +24,6 @@
24namespace HLE { 24namespace HLE {
25namespace Applets { 25namespace Applets {
26 26
27SoftwareKeyboard::SoftwareKeyboard(Service::APT::AppletId id) : Applet(id), started(false) {
28 // Create the SharedMemory that will hold the framebuffer data
29 // TODO(Subv): What size should we use here?
30 using Kernel::MemoryPermission;
31 framebuffer_memory = Kernel::SharedMemory::Create(0x1000, MemoryPermission::ReadWrite, MemoryPermission::ReadWrite, "SoftwareKeyboard Memory");
32}
33
34ResultCode SoftwareKeyboard::ReceiveParameter(Service::APT::MessageParameter const& parameter) { 27ResultCode SoftwareKeyboard::ReceiveParameter(Service::APT::MessageParameter const& parameter) {
35 if (parameter.signal != static_cast<u32>(Service::APT::SignalType::LibAppJustStarted)) { 28 if (parameter.signal != static_cast<u32>(Service::APT::SignalType::LibAppJustStarted)) {
36 LOG_ERROR(Service_APT, "unsupported signal %u", parameter.signal); 29 LOG_ERROR(Service_APT, "unsupported signal %u", parameter.signal);
@@ -39,8 +32,19 @@ ResultCode SoftwareKeyboard::ReceiveParameter(Service::APT::MessageParameter con
39 return ResultCode(-1); 32 return ResultCode(-1);
40 } 33 }
41 34
35 // The LibAppJustStarted message contains a buffer with the size of the framebuffer shared memory.
36 // Create the SharedMemory that will hold the framebuffer data
37 Service::APT::CaptureBufferInfo capture_info;
38 ASSERT(sizeof(capture_info) == parameter.buffer_size);
39
40 memcpy(&capture_info, parameter.data, sizeof(capture_info));
41
42 using Kernel::MemoryPermission;
43 framebuffer_memory = Kernel::SharedMemory::Create(capture_info.size, MemoryPermission::ReadWrite,
44 MemoryPermission::ReadWrite, "SoftwareKeyboard Memory");
45
46 // Send the response message with the newly created SharedMemory
42 Service::APT::MessageParameter result; 47 Service::APT::MessageParameter result;
43 // The buffer passed in parameter contains the data returned by GSPGPU::ImportDisplayCaptureInfo
44 result.signal = static_cast<u32>(Service::APT::SignalType::LibAppFinished); 48 result.signal = static_cast<u32>(Service::APT::SignalType::LibAppFinished);
45 result.data = nullptr; 49 result.data = nullptr;
46 result.buffer_size = 0; 50 result.buffer_size = 0;
diff --git a/src/core/hle/applets/swkbd.h b/src/core/hle/applets/swkbd.h
index cb95b8d90..cf26a8fb7 100644
--- a/src/core/hle/applets/swkbd.h
+++ b/src/core/hle/applets/swkbd.h
@@ -53,8 +53,7 @@ static_assert(sizeof(SoftwareKeyboardConfig) == 0x400, "Software Keyboard Config
53 53
54class SoftwareKeyboard final : public Applet { 54class SoftwareKeyboard final : public Applet {
55public: 55public:
56 SoftwareKeyboard(Service::APT::AppletId id); 56 SoftwareKeyboard(Service::APT::AppletId id) : Applet(id), started(false) { }
57 ~SoftwareKeyboard() {}
58 57
59 ResultCode ReceiveParameter(const Service::APT::MessageParameter& parameter) override; 58 ResultCode ReceiveParameter(const Service::APT::MessageParameter& parameter) override;
60 ResultCode StartImpl(const Service::APT::AppletStartupParameter& parameter) override; 59 ResultCode StartImpl(const Service::APT::AppletStartupParameter& parameter) override;
@@ -72,8 +71,8 @@ public:
72 */ 71 */
73 void Finalize(); 72 void Finalize();
74 73
75 /// TODO(Subv): Find out what this is actually used for. 74 /// This SharedMemory will be created when we receive the LibAppJustStarted message.
76 /// It is believed that the application stores the current screen image here. 75 /// It holds the framebuffer info retrieved by the application with GSPGPU::ImportDisplayCaptureInfo
77 Kernel::SharedPtr<Kernel::SharedMemory> framebuffer_memory; 76 Kernel::SharedPtr<Kernel::SharedMemory> framebuffer_memory;
78 77
79 /// SharedMemory where the output text will be stored 78 /// SharedMemory where the output text will be stored
diff --git a/src/core/hle/hle.cpp b/src/core/hle/hle.cpp
index e545de3b5..5c5373517 100644
--- a/src/core/hle/hle.cpp
+++ b/src/core/hle/hle.cpp
@@ -12,9 +12,13 @@
12 12
13//////////////////////////////////////////////////////////////////////////////////////////////////// 13////////////////////////////////////////////////////////////////////////////////////////////////////
14 14
15namespace HLE { 15namespace {
16
17bool reschedule; ///< If true, immediately reschedules the CPU to a new thread
16 18
17bool g_reschedule; ///< If true, immediately reschedules the CPU to a new thread 19}
20
21namespace HLE {
18 22
19void Reschedule(const char *reason) { 23void Reschedule(const char *reason) {
20 DEBUG_ASSERT_MSG(reason != nullptr && strlen(reason) < 256, "Reschedule: Invalid or too long reason."); 24 DEBUG_ASSERT_MSG(reason != nullptr && strlen(reason) < 256, "Reschedule: Invalid or too long reason.");
@@ -27,13 +31,21 @@ void Reschedule(const char *reason) {
27 31
28 Core::g_app_core->PrepareReschedule(); 32 Core::g_app_core->PrepareReschedule();
29 33
30 g_reschedule = true; 34 reschedule = true;
35}
36
37bool IsReschedulePending() {
38 return reschedule;
39}
40
41void DoneRescheduling() {
42 reschedule = false;
31} 43}
32 44
33void Init() { 45void Init() {
34 Service::Init(); 46 Service::Init();
35 47
36 g_reschedule = false; 48 reschedule = false;
37 49
38 LOG_DEBUG(Kernel, "initialized OK"); 50 LOG_DEBUG(Kernel, "initialized OK");
39} 51}
diff --git a/src/core/hle/hle.h b/src/core/hle/hle.h
index e0b97797c..69ac0ade6 100644
--- a/src/core/hle/hle.h
+++ b/src/core/hle/hle.h
@@ -13,9 +13,9 @@ const Handle INVALID_HANDLE = 0;
13 13
14namespace HLE { 14namespace HLE {
15 15
16extern bool g_reschedule; ///< If true, immediately reschedules the CPU to a new thread
17
18void Reschedule(const char *reason); 16void Reschedule(const char *reason);
17bool IsReschedulePending();
18void DoneRescheduling();
19 19
20void Init(); 20void Init();
21void Shutdown(); 21void Shutdown();
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index 532ce3020..d781ef32c 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -107,6 +107,8 @@ public:
107 ProcessFlags flags; 107 ProcessFlags flags;
108 /// Kernel compatibility version for this process 108 /// Kernel compatibility version for this process
109 u16 kernel_version = 0; 109 u16 kernel_version = 0;
110 /// The default CPU for this process, threads are scheduled on this cpu by default.
111 u8 ideal_processor = 0;
110 112
111 /// The id of this process 113 /// The id of this process
112 u32 process_id = next_process_id++; 114 u32 process_id = next_process_id++;
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index e611eec72..68f026918 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -537,7 +537,8 @@ void Reschedule() {
537 537
538 Thread* cur = GetCurrentThread(); 538 Thread* cur = GetCurrentThread();
539 Thread* next = PopNextReadyThread(); 539 Thread* next = PopNextReadyThread();
540 HLE::g_reschedule = false; 540
541 HLE::DoneRescheduling();
541 542
542 // Don't bother switching to the same thread 543 // Don't bother switching to the same thread
543 if (next == cur) 544 if (next == cur)
diff --git a/src/core/hle/result.h b/src/core/hle/result.h
index 2d22652d9..3fc1ab4ee 100644
--- a/src/core/hle/result.h
+++ b/src/core/hle/result.h
@@ -5,7 +5,6 @@
5#pragma once 5#pragma once
6 6
7#include <new> 7#include <new>
8#include <type_traits>
9#include <utility> 8#include <utility>
10 9
11#include "common/assert.h" 10#include "common/assert.h"
@@ -18,6 +17,7 @@
18/// Detailed description of the error. This listing is likely incomplete. 17/// Detailed description of the error. This listing is likely incomplete.
19enum class ErrorDescription : u32 { 18enum class ErrorDescription : u32 {
20 Success = 0, 19 Success = 0,
20 OS_InvalidBufferDescriptor = 48,
21 WrongAddress = 53, 21 WrongAddress = 53,
22 FS_NotFound = 120, 22 FS_NotFound = 120,
23 FS_AlreadyExists = 190, 23 FS_AlreadyExists = 190,
diff --git a/src/core/hle/service/ac_u.cpp b/src/core/hle/service/ac_u.cpp
index d67325506..5241dd3e7 100644
--- a/src/core/hle/service/ac_u.cpp
+++ b/src/core/hle/service/ac_u.cpp
@@ -3,6 +3,8 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/logging/log.h" 5#include "common/logging/log.h"
6
7#include "core/hle/kernel/event.h"
6#include "core/hle/service/ac_u.h" 8#include "core/hle/service/ac_u.h"
7 9
8//////////////////////////////////////////////////////////////////////////////////////////////////// 10////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -11,6 +13,28 @@
11namespace AC_U { 13namespace AC_U {
12 14
13/** 15/**
16 * AC_U::CloseAsync service function
17 * Inputs:
18 * 1 : Always 0x20
19 * 3 : Always 0
20 * 4 : Event handle, should be signaled when AC connection is closed
21 * Outputs:
22 * 1 : Result of function, 0 on success, otherwise error code
23 */
24static void CloseAsync(Service::Interface* self) {
25 u32* cmd_buff = Kernel::GetCommandBuffer();
26
27 auto evt = Kernel::g_handle_table.Get<Kernel::Event>(cmd_buff[4]);
28
29 if (evt) {
30 evt->name = "AC_U:close_event";
31 evt->Signal();
32 }
33 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
34
35 LOG_WARNING(Service_AC, "(STUBBED) called");
36}
37/**
14 * AC_U::GetWifiStatus service function 38 * AC_U::GetWifiStatus service function
15 * Outputs: 39 * Outputs:
16 * 1 : Result of function, 0 on success, otherwise error code 40 * 1 : Result of function, 0 on success, otherwise error code
@@ -47,7 +71,7 @@ const Interface::FunctionInfo FunctionTable[] = {
47 {0x00010000, nullptr, "CreateDefaultConfig"}, 71 {0x00010000, nullptr, "CreateDefaultConfig"},
48 {0x00040006, nullptr, "ConnectAsync"}, 72 {0x00040006, nullptr, "ConnectAsync"},
49 {0x00050002, nullptr, "GetConnectResult"}, 73 {0x00050002, nullptr, "GetConnectResult"},
50 {0x00080004, nullptr, "CloseAsync"}, 74 {0x00080004, CloseAsync, "CloseAsync"},
51 {0x00090002, nullptr, "GetCloseResult"}, 75 {0x00090002, nullptr, "GetCloseResult"},
52 {0x000A0000, nullptr, "GetLastErrorCode"}, 76 {0x000A0000, nullptr, "GetLastErrorCode"},
53 {0x000D0000, GetWifiStatus, "GetWifiStatus"}, 77 {0x000D0000, GetWifiStatus, "GetWifiStatus"},
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index 9591522e5..3f71e7f2b 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -43,7 +43,7 @@ void FindContentInfos(Service::Interface* self) {
43 am_content_count[media_type] = cmd_buff[4]; 43 am_content_count[media_type] = cmd_buff[4];
44 44
45 cmd_buff[1] = RESULT_SUCCESS.raw; 45 cmd_buff[1] = RESULT_SUCCESS.raw;
46 LOG_WARNING(Service_AM, "(STUBBED) media_type=%u, title_id=0x%016lx, content_cound=%u, content_ids_pointer=0x%08x, content_info_pointer=0x%08x", 46 LOG_WARNING(Service_AM, "(STUBBED) media_type=%u, title_id=0x%016llx, content_cound=%u, content_ids_pointer=0x%08x, content_info_pointer=0x%08x",
47 media_type, title_id, am_content_count[media_type], content_ids_pointer, content_info_pointer); 47 media_type, title_id, am_content_count[media_type], content_ids_pointer, content_info_pointer);
48} 48}
49 49
diff --git a/src/core/hle/service/apt/apt.h b/src/core/hle/service/apt/apt.h
index 668b4a66f..1a1034fcc 100644
--- a/src/core/hle/service/apt/apt.h
+++ b/src/core/hle/service/apt/apt.h
@@ -5,6 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include "common/common_types.h" 7#include "common/common_types.h"
8#include "common/swap.h"
8 9
9#include "core/hle/kernel/kernel.h" 10#include "core/hle/kernel/kernel.h"
10 11
@@ -31,6 +32,20 @@ struct AppletStartupParameter {
31 u8* data = nullptr; 32 u8* data = nullptr;
32}; 33};
33 34
35/// Used by the application to pass information about the current framebuffer to applets.
36struct CaptureBufferInfo {
37 u32_le size;
38 u8 is_3d;
39 INSERT_PADDING_BYTES(0x3); // Padding for alignment
40 u32_le top_screen_left_offset;
41 u32_le top_screen_right_offset;
42 u32_le top_screen_format;
43 u32_le bottom_screen_left_offset;
44 u32_le bottom_screen_right_offset;
45 u32_le bottom_screen_format;
46};
47static_assert(sizeof(CaptureBufferInfo) == 0x20, "CaptureBufferInfo struct has incorrect size");
48
34/// Signals used by APT functions 49/// Signals used by APT functions
35enum class SignalType : u32 { 50enum class SignalType : u32 {
36 None = 0x0, 51 None = 0x0,
diff --git a/src/core/hle/service/cfg/cfg.cpp b/src/core/hle/service/cfg/cfg.cpp
index 525432957..b9322c55d 100644
--- a/src/core/hle/service/cfg/cfg.cpp
+++ b/src/core/hle/service/cfg/cfg.cpp
@@ -389,6 +389,10 @@ ResultCode FormatConfig() {
389 res = CreateConfigInfoBlk(0x000F0004, sizeof(CONSOLE_MODEL), 0xC, &CONSOLE_MODEL); 389 res = CreateConfigInfoBlk(0x000F0004, sizeof(CONSOLE_MODEL), 0xC, &CONSOLE_MODEL);
390 if (!res.IsSuccess()) return res; 390 if (!res.IsSuccess()) return res;
391 391
392 // 0x00170000 - Unknown
393 res = CreateConfigInfoBlk(0x00170000, 0x4, 0xE, zero_buffer);
394 if (!res.IsSuccess()) return res;
395
392 // Save the buffer to the file 396 // Save the buffer to the file
393 res = UpdateConfigNANDSavegame(); 397 res = UpdateConfigNANDSavegame();
394 if (!res.IsSuccess()) 398 if (!res.IsSuccess())
diff --git a/src/core/hle/service/cfg/cfg.h b/src/core/hle/service/cfg/cfg.h
index 606ab99cf..c01806836 100644
--- a/src/core/hle/service/cfg/cfg.h
+++ b/src/core/hle/service/cfg/cfg.h
@@ -98,19 +98,6 @@ void GetCountryCodeString(Service::Interface* self);
98void GetCountryCodeID(Service::Interface* self); 98void GetCountryCodeID(Service::Interface* self);
99 99
100/** 100/**
101 * CFG::GetConfigInfoBlk2 service function
102 * Inputs:
103 * 0 : 0x00010082
104 * 1 : Size
105 * 2 : Block ID
106 * 3 : Descriptor for the output buffer
107 * 4 : Output buffer pointer
108 * Outputs:
109 * 1 : Result of function, 0 on success, otherwise error code
110 */
111void GetConfigInfoBlk2(Service::Interface* self);
112
113/**
114 * CFG::SecureInfoGetRegion service function 101 * CFG::SecureInfoGetRegion service function
115 * Inputs: 102 * Inputs:
116 * 1 : None 103 * 1 : None
diff --git a/src/core/hle/service/dsp_dsp.cpp b/src/core/hle/service/dsp_dsp.cpp
index 08e437125..274fc751a 100644
--- a/src/core/hle/service/dsp_dsp.cpp
+++ b/src/core/hle/service/dsp_dsp.cpp
@@ -2,6 +2,7 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
5#include <cinttypes> 6#include <cinttypes>
6 7
7#include "audio_core/hle/pipe.h" 8#include "audio_core/hle/pipe.h"
@@ -12,37 +13,80 @@
12#include "core/hle/kernel/event.h" 13#include "core/hle/kernel/event.h"
13#include "core/hle/service/dsp_dsp.h" 14#include "core/hle/service/dsp_dsp.h"
14 15
16using DspPipe = DSP::HLE::DspPipe;
17
15//////////////////////////////////////////////////////////////////////////////////////////////////// 18////////////////////////////////////////////////////////////////////////////////////////////////////
16// Namespace DSP_DSP 19// Namespace DSP_DSP
17 20
18namespace DSP_DSP { 21namespace DSP_DSP {
19 22
20static u32 read_pipe_count;
21static Kernel::SharedPtr<Kernel::Event> semaphore_event; 23static Kernel::SharedPtr<Kernel::Event> semaphore_event;
22 24
23struct PairHash { 25/// There are three types of interrupts
24 template <typename T, typename U> 26enum class InterruptType {
25 std::size_t operator()(const std::pair<T, U> &x) const { 27 Zero, One, Pipe
26 // TODO(yuriks): Replace with better hash combining function. 28};
27 return std::hash<T>()(x.first) ^ std::hash<U>()(x.second); 29constexpr size_t NUM_INTERRUPT_TYPE = 3;
30
31class InterruptEvents final {
32public:
33 void Signal(InterruptType type, DspPipe pipe) {
34 Kernel::SharedPtr<Kernel::Event>& event = Get(type, pipe);
35 if (event) {
36 event->Signal();
37 }
28 } 38 }
39
40 Kernel::SharedPtr<Kernel::Event>& Get(InterruptType type, DspPipe dsp_pipe) {
41 switch (type) {
42 case InterruptType::Zero:
43 return zero;
44 case InterruptType::One:
45 return one;
46 case InterruptType::Pipe: {
47 const size_t pipe_index = static_cast<size_t>(dsp_pipe);
48 ASSERT(pipe_index < DSP::HLE::NUM_DSP_PIPE);
49 return pipe[pipe_index];
50 }
51 }
52
53 UNREACHABLE_MSG("Invalid interrupt type = %zu", static_cast<size_t>(type));
54 }
55
56 bool HasTooManyEventsRegistered() const {
57 // Actual service implementation only has 6 'slots' for interrupts.
58 constexpr size_t max_number_of_interrupt_events = 6;
59
60 size_t number = std::count_if(pipe.begin(), pipe.end(), [](const auto& evt) {
61 return evt != nullptr;
62 });
63
64 if (zero != nullptr)
65 number++;
66 if (one != nullptr)
67 number++;
68
69 return number >= max_number_of_interrupt_events;
70 }
71
72private:
73 /// Currently unknown purpose
74 Kernel::SharedPtr<Kernel::Event> zero = nullptr;
75 /// Currently unknown purpose
76 Kernel::SharedPtr<Kernel::Event> one = nullptr;
77 /// Each DSP pipe has an associated interrupt
78 std::array<Kernel::SharedPtr<Kernel::Event>, DSP::HLE::NUM_DSP_PIPE> pipe = {{}};
29}; 79};
30 80
31/// Map of (audio interrupt number, channel number) to Kernel::Events. See: RegisterInterruptEvents 81static InterruptEvents interrupt_events;
32static std::unordered_map<std::pair<u32, u32>, Kernel::SharedPtr<Kernel::Event>, PairHash> interrupt_events;
33 82
34// DSP Interrupts: 83// DSP Interrupts:
35// Interrupt #2 occurs every frame tick. Userland programs normally have a thread that's waiting 84// The audio-pipe interrupt occurs every frame tick. Userland programs normally have a thread
36// for an interrupt event. Immediately after this interrupt event, userland normally updates the 85// that's waiting for an interrupt event. Immediately after this interrupt event, userland
37// state in the next region and increments the relevant frame counter by two. 86// normally updates the state in the next region and increments the relevant frame counter by
38void SignalAllInterrupts() { 87// two.
39 // HACK: The other interrupts have currently unknown purpose, we trigger them each tick in any case. 88void SignalPipeInterrupt(DspPipe pipe) {
40 for (auto& interrupt_event : interrupt_events) 89 interrupt_events.Signal(InterruptType::Pipe, pipe);
41 interrupt_event.second->Signal();
42}
43
44void SignalInterrupt(u32 interrupt, u32 channel) {
45 interrupt_events[std::make_pair(interrupt, channel)]->Signal();
46} 90}
47 91
48/** 92/**
@@ -58,7 +102,10 @@ static void ConvertProcessAddressFromDspDram(Service::Interface* self) {
58 102
59 u32 addr = cmd_buff[1]; 103 u32 addr = cmd_buff[1];
60 104
105 cmd_buff[0] = IPC::MakeHeader(0xC, 2, 0);
61 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 106 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
107
108 // TODO(merry): There is a per-region offset missing in this calculation (that seems to be always zero).
62 cmd_buff[2] = (addr << 1) + (Memory::DSP_RAM_VADDR + 0x40000); 109 cmd_buff[2] = (addr << 1) + (Memory::DSP_RAM_VADDR + 0x40000);
63 110
64 LOG_DEBUG(Service_DSP, "addr=0x%08X", addr); 111 LOG_DEBUG(Service_DSP, "addr=0x%08X", addr);
@@ -113,7 +160,9 @@ static void LoadComponent(Service::Interface* self) {
113static void GetSemaphoreEventHandle(Service::Interface* self) { 160static void GetSemaphoreEventHandle(Service::Interface* self) {
114 u32* cmd_buff = Kernel::GetCommandBuffer(); 161 u32* cmd_buff = Kernel::GetCommandBuffer();
115 162
163 cmd_buff[0] = IPC::MakeHeader(0x16, 1, 2);
116 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 164 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
165 // cmd_buff[2] not set
117 cmd_buff[3] = Kernel::g_handle_table.Create(semaphore_event).MoveFrom(); // Event handle 166 cmd_buff[3] = Kernel::g_handle_table.Create(semaphore_event).MoveFrom(); // Event handle
118 167
119 LOG_WARNING(Service_DSP, "(STUBBED) called"); 168 LOG_WARNING(Service_DSP, "(STUBBED) called");
@@ -138,8 +187,7 @@ static void FlushDataCache(Service::Interface* self) {
138 u32 size = cmd_buff[2]; 187 u32 size = cmd_buff[2];
139 u32 process = cmd_buff[4]; 188 u32 process = cmd_buff[4];
140 189
141 // TODO(purpasmart96): Verify return header on HW 190 cmd_buff[0] = IPC::MakeHeader(0x13, 1, 0);
142
143 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 191 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
144 192
145 LOG_TRACE(Service_DSP, "called address=0x%08X, size=0x%X, process=0x%08X", address, size, process); 193 LOG_TRACE(Service_DSP, "called address=0x%08X, size=0x%X, process=0x%08X", address, size, process);
@@ -148,8 +196,8 @@ static void FlushDataCache(Service::Interface* self) {
148/** 196/**
149 * DSP_DSP::RegisterInterruptEvents service function 197 * DSP_DSP::RegisterInterruptEvents service function
150 * Inputs: 198 * Inputs:
151 * 1 : Interrupt Number 199 * 1 : Interrupt Type
152 * 2 : Channel Number 200 * 2 : Pipe Number
153 * 4 : Interrupt event handle 201 * 4 : Interrupt event handle
154 * Outputs: 202 * Outputs:
155 * 1 : Result of function, 0 on success, otherwise error code 203 * 1 : Result of function, 0 on success, otherwise error code
@@ -157,23 +205,40 @@ static void FlushDataCache(Service::Interface* self) {
157static void RegisterInterruptEvents(Service::Interface* self) { 205static void RegisterInterruptEvents(Service::Interface* self) {
158 u32* cmd_buff = Kernel::GetCommandBuffer(); 206 u32* cmd_buff = Kernel::GetCommandBuffer();
159 207
160 u32 interrupt = cmd_buff[1]; 208 u32 type_index = cmd_buff[1];
161 u32 channel = cmd_buff[2]; 209 u32 pipe_index = cmd_buff[2];
162 u32 event_handle = cmd_buff[4]; 210 u32 event_handle = cmd_buff[4];
163 211
212 ASSERT_MSG(type_index < NUM_INTERRUPT_TYPE && pipe_index < DSP::HLE::NUM_DSP_PIPE,
213 "Invalid type or pipe: type = %u, pipe = %u", type_index, pipe_index);
214
215 InterruptType type = static_cast<InterruptType>(cmd_buff[1]);
216 DspPipe pipe = static_cast<DspPipe>(cmd_buff[2]);
217
218 cmd_buff[0] = IPC::MakeHeader(0x15, 1, 0);
219
164 if (event_handle) { 220 if (event_handle) {
165 auto evt = Kernel::g_handle_table.Get<Kernel::Event>(cmd_buff[4]); 221 auto evt = Kernel::g_handle_table.Get<Kernel::Event>(cmd_buff[4]);
166 if (evt) { 222
167 interrupt_events[std::make_pair(interrupt, channel)] = evt; 223 if (!evt) {
168 cmd_buff[1] = RESULT_SUCCESS.raw; 224 LOG_INFO(Service_DSP, "Invalid event handle! type=%u, pipe=%u, event_handle=0x%08X", type_index, pipe_index, event_handle);
169 LOG_INFO(Service_DSP, "Registered interrupt=%u, channel=%u, event_handle=0x%08X", interrupt, channel, event_handle); 225 ASSERT(false); // TODO: This should really be handled at an IPC translation layer.
170 } else { 226 }
171 LOG_CRITICAL(Service_DSP, "Invalid event handle! interrupt=%u, channel=%u, event_handle=0x%08X", interrupt, channel, event_handle); 227
172 ASSERT(false); // This should really be handled at a IPC translation layer. 228 if (interrupt_events.HasTooManyEventsRegistered()) {
229 LOG_INFO(Service_DSP, "Ran out of space to register interrupts (Attempted to register type=%u, pipe=%u, event_handle=0x%08X)",
230 type_index, pipe_index, event_handle);
231 cmd_buff[1] = ResultCode(ErrorDescription::InvalidResultValue, ErrorModule::DSP, ErrorSummary::OutOfResource, ErrorLevel::Status).raw;
232 return;
173 } 233 }
234
235 interrupt_events.Get(type, pipe) = evt;
236 LOG_INFO(Service_DSP, "Registered type=%u, pipe=%u, event_handle=0x%08X", type_index, pipe_index, event_handle);
237 cmd_buff[1] = RESULT_SUCCESS.raw;
174 } else { 238 } else {
175 interrupt_events.erase(std::make_pair(interrupt, channel)); 239 interrupt_events.Get(type, pipe) = nullptr;
176 LOG_INFO(Service_DSP, "Unregistered interrupt=%u, channel=%u, event_handle=0x%08X", interrupt, channel, event_handle); 240 LOG_INFO(Service_DSP, "Unregistered interrupt=%u, channel=%u, event_handle=0x%08X", type_index, pipe_index, event_handle);
241 cmd_buff[1] = RESULT_SUCCESS.raw;
177 } 242 }
178} 243}
179 244
@@ -187,6 +252,7 @@ static void RegisterInterruptEvents(Service::Interface* self) {
187static void SetSemaphore(Service::Interface* self) { 252static void SetSemaphore(Service::Interface* self) {
188 u32* cmd_buff = Kernel::GetCommandBuffer(); 253 u32* cmd_buff = Kernel::GetCommandBuffer();
189 254
255 cmd_buff[0] = IPC::MakeHeader(0x7, 1, 0);
190 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 256 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
191 257
192 LOG_WARNING(Service_DSP, "(STUBBED) called"); 258 LOG_WARNING(Service_DSP, "(STUBBED) called");
@@ -195,7 +261,7 @@ static void SetSemaphore(Service::Interface* self) {
195/** 261/**
196 * DSP_DSP::WriteProcessPipe service function 262 * DSP_DSP::WriteProcessPipe service function
197 * Inputs: 263 * Inputs:
198 * 1 : Channel 264 * 1 : Pipe Number
199 * 2 : Size 265 * 2 : Size
200 * 3 : (size << 14) | 0x402 266 * 3 : (size << 14) | 0x402
201 * 4 : Buffer 267 * 4 : Buffer
@@ -206,24 +272,32 @@ static void SetSemaphore(Service::Interface* self) {
206static void WriteProcessPipe(Service::Interface* self) { 272static void WriteProcessPipe(Service::Interface* self) {
207 u32* cmd_buff = Kernel::GetCommandBuffer(); 273 u32* cmd_buff = Kernel::GetCommandBuffer();
208 274
209 DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(cmd_buff[1]); 275 u32 pipe_index = cmd_buff[1];
210 u32 size = cmd_buff[2]; 276 u32 size = cmd_buff[2];
211 u32 buffer = cmd_buff[4]; 277 u32 buffer = cmd_buff[4];
212 278
213 ASSERT_MSG(IPC::StaticBufferDesc(size, 1) == cmd_buff[3], "IPC static buffer descriptor failed validation (0x%X). pipe=%u, size=0x%X, buffer=0x%08X", cmd_buff[3], pipe, size, buffer); 279 DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(pipe_index);
214 ASSERT_MSG(Memory::GetPointer(buffer) != nullptr, "Invalid Buffer: pipe=%u, size=0x%X, buffer=0x%08X", pipe, size, buffer);
215 280
216 std::vector<u8> message(size); 281 if (IPC::StaticBufferDesc(size, 1) != cmd_buff[3]) {
282 LOG_ERROR(Service_DSP, "IPC static buffer descriptor failed validation (0x%X). pipe=%u, size=0x%X, buffer=0x%08X", cmd_buff[3], pipe_index, size, buffer);
283 cmd_buff[0] = IPC::MakeHeader(0, 1, 0);
284 cmd_buff[1] = ResultCode(ErrorDescription::OS_InvalidBufferDescriptor, ErrorModule::OS, ErrorSummary::WrongArgument, ErrorLevel::Permanent).raw;
285 return;
286 }
287
288 ASSERT_MSG(Memory::GetPointer(buffer) != nullptr, "Invalid Buffer: pipe=%u, size=0x%X, buffer=0x%08X", pipe_index, size, buffer);
217 289
218 for (size_t i = 0; i < size; i++) { 290 std::vector<u8> message(size);
291 for (u32 i = 0; i < size; i++) {
219 message[i] = Memory::Read8(buffer + i); 292 message[i] = Memory::Read8(buffer + i);
220 } 293 }
221 294
222 DSP::HLE::PipeWrite(pipe, message); 295 DSP::HLE::PipeWrite(pipe, message);
223 296
297 cmd_buff[0] = IPC::MakeHeader(0xD, 1, 0);
224 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 298 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
225 299
226 LOG_DEBUG(Service_DSP, "pipe=%u, size=0x%X, buffer=0x%08X", pipe, size, buffer); 300 LOG_DEBUG(Service_DSP, "pipe=%u, size=0x%X, buffer=0x%08X", pipe_index, size, buffer);
227} 301}
228 302
229/** 303/**
@@ -243,13 +317,16 @@ static void WriteProcessPipe(Service::Interface* self) {
243static void ReadPipeIfPossible(Service::Interface* self) { 317static void ReadPipeIfPossible(Service::Interface* self) {
244 u32* cmd_buff = Kernel::GetCommandBuffer(); 318 u32* cmd_buff = Kernel::GetCommandBuffer();
245 319
246 DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(cmd_buff[1]); 320 u32 pipe_index = cmd_buff[1];
247 u32 unknown = cmd_buff[2]; 321 u32 unknown = cmd_buff[2];
248 u32 size = cmd_buff[3] & 0xFFFF; // Lower 16 bits are size 322 u32 size = cmd_buff[3] & 0xFFFF; // Lower 16 bits are size
249 VAddr addr = cmd_buff[0x41]; 323 VAddr addr = cmd_buff[0x41];
250 324
251 ASSERT_MSG(Memory::GetPointer(addr) != nullptr, "Invalid addr: pipe=0x%08X, unknown=0x%08X, size=0x%X, buffer=0x%08X", pipe, unknown, size, addr); 325 DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(pipe_index);
326
327 ASSERT_MSG(Memory::GetPointer(addr) != nullptr, "Invalid addr: pipe=%u, unknown=0x%08X, size=0x%X, buffer=0x%08X", pipe_index, unknown, size, addr);
252 328
329 cmd_buff[0] = IPC::MakeHeader(0x10, 1, 2);
253 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 330 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
254 if (DSP::HLE::GetPipeReadableSize(pipe) >= size) { 331 if (DSP::HLE::GetPipeReadableSize(pipe) >= size) {
255 std::vector<u8> response = DSP::HLE::PipeRead(pipe, size); 332 std::vector<u8> response = DSP::HLE::PipeRead(pipe, size);
@@ -260,8 +337,10 @@ static void ReadPipeIfPossible(Service::Interface* self) {
260 } else { 337 } else {
261 cmd_buff[2] = 0; // Return no data 338 cmd_buff[2] = 0; // Return no data
262 } 339 }
340 cmd_buff[3] = IPC::StaticBufferDesc(size, 0);
341 cmd_buff[4] = addr;
263 342
264 LOG_DEBUG(Service_DSP, "pipe=0x%08X, unknown=0x%08X, size=0x%X, buffer=0x%08X, return cmd_buff[2]=0x%08X", pipe, unknown, size, addr, cmd_buff[2]); 343 LOG_DEBUG(Service_DSP, "pipe=%u, unknown=0x%08X, size=0x%X, buffer=0x%08X, return cmd_buff[2]=0x%08X", pipe_index, unknown, size, addr, cmd_buff[2]);
265} 344}
266 345
267/** 346/**
@@ -278,26 +357,31 @@ static void ReadPipeIfPossible(Service::Interface* self) {
278static void ReadPipe(Service::Interface* self) { 357static void ReadPipe(Service::Interface* self) {
279 u32* cmd_buff = Kernel::GetCommandBuffer(); 358 u32* cmd_buff = Kernel::GetCommandBuffer();
280 359
281 DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(cmd_buff[1]); 360 u32 pipe_index = cmd_buff[1];
282 u32 unknown = cmd_buff[2]; 361 u32 unknown = cmd_buff[2];
283 u32 size = cmd_buff[3] & 0xFFFF; // Lower 16 bits are size 362 u32 size = cmd_buff[3] & 0xFFFF; // Lower 16 bits are size
284 VAddr addr = cmd_buff[0x41]; 363 VAddr addr = cmd_buff[0x41];
285 364
286 ASSERT_MSG(Memory::GetPointer(addr) != nullptr, "Invalid addr: pipe=0x%08X, unknown=0x%08X, size=0x%X, buffer=0x%08X", pipe, unknown, size, addr); 365 DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(pipe_index);
366
367 ASSERT_MSG(Memory::GetPointer(addr) != nullptr, "Invalid addr: pipe=%u, unknown=0x%08X, size=0x%X, buffer=0x%08X", pipe_index, unknown, size, addr);
287 368
288 if (DSP::HLE::GetPipeReadableSize(pipe) >= size) { 369 if (DSP::HLE::GetPipeReadableSize(pipe) >= size) {
289 std::vector<u8> response = DSP::HLE::PipeRead(pipe, size); 370 std::vector<u8> response = DSP::HLE::PipeRead(pipe, size);
290 371
291 Memory::WriteBlock(addr, response.data(), response.size()); 372 Memory::WriteBlock(addr, response.data(), response.size());
292 373
374 cmd_buff[0] = IPC::MakeHeader(0xE, 2, 2);
293 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 375 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
294 cmd_buff[2] = static_cast<u32>(response.size()); 376 cmd_buff[2] = static_cast<u32>(response.size());
377 cmd_buff[3] = IPC::StaticBufferDesc(size, 0);
378 cmd_buff[4] = addr;
295 } else { 379 } else {
296 // No more data is in pipe. Hardware hangs in this case; this should never happen. 380 // No more data is in pipe. Hardware hangs in this case; this should never happen.
297 UNREACHABLE(); 381 UNREACHABLE();
298 } 382 }
299 383
300 LOG_DEBUG(Service_DSP, "pipe=0x%08X, unknown=0x%08X, size=0x%X, buffer=0x%08X, return cmd_buff[2]=0x%08X", pipe, unknown, size, addr, cmd_buff[2]); 384 LOG_DEBUG(Service_DSP, "pipe=%u, unknown=0x%08X, size=0x%X, buffer=0x%08X, return cmd_buff[2]=0x%08X", pipe_index, unknown, size, addr, cmd_buff[2]);
301} 385}
302 386
303/** 387/**
@@ -312,13 +396,16 @@ static void ReadPipe(Service::Interface* self) {
312static void GetPipeReadableSize(Service::Interface* self) { 396static void GetPipeReadableSize(Service::Interface* self) {
313 u32* cmd_buff = Kernel::GetCommandBuffer(); 397 u32* cmd_buff = Kernel::GetCommandBuffer();
314 398
315 DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(cmd_buff[1]); 399 u32 pipe_index = cmd_buff[1];
316 u32 unknown = cmd_buff[2]; 400 u32 unknown = cmd_buff[2];
317 401
402 DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(pipe_index);
403
404 cmd_buff[0] = IPC::MakeHeader(0xF, 2, 0);
318 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 405 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
319 cmd_buff[2] = DSP::HLE::GetPipeReadableSize(pipe); 406 cmd_buff[2] = static_cast<u32>(DSP::HLE::GetPipeReadableSize(pipe));
320 407
321 LOG_DEBUG(Service_DSP, "pipe=0x%08X, unknown=0x%08X, return cmd_buff[2]=0x%08X", pipe, unknown, cmd_buff[2]); 408 LOG_DEBUG(Service_DSP, "pipe=%u, unknown=0x%08X, return cmd_buff[2]=0x%08X", pipe_index, unknown, cmd_buff[2]);
322} 409}
323 410
324/** 411/**
@@ -333,6 +420,7 @@ static void SetSemaphoreMask(Service::Interface* self) {
333 420
334 u32 mask = cmd_buff[1]; 421 u32 mask = cmd_buff[1];
335 422
423 cmd_buff[0] = IPC::MakeHeader(0x17, 1, 0);
336 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 424 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
337 425
338 LOG_WARNING(Service_DSP, "(STUBBED) called mask=0x%08X", mask); 426 LOG_WARNING(Service_DSP, "(STUBBED) called mask=0x%08X", mask);
@@ -350,6 +438,7 @@ static void SetSemaphoreMask(Service::Interface* self) {
350static void GetHeadphoneStatus(Service::Interface* self) { 438static void GetHeadphoneStatus(Service::Interface* self) {
351 u32* cmd_buff = Kernel::GetCommandBuffer(); 439 u32* cmd_buff = Kernel::GetCommandBuffer();
352 440
441 cmd_buff[0] = IPC::MakeHeader(0x1F, 2, 0);
353 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 442 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
354 cmd_buff[2] = 0; // Not using headphones? 443 cmd_buff[2] = 0; // Not using headphones?
355 444
@@ -376,6 +465,7 @@ static void RecvData(Service::Interface* self) {
376 465
377 // Application reads this after requesting DSP shutdown, to verify the DSP has indeed shutdown or slept. 466 // Application reads this after requesting DSP shutdown, to verify the DSP has indeed shutdown or slept.
378 467
468 cmd_buff[0] = IPC::MakeHeader(0x1, 2, 0);
379 cmd_buff[1] = RESULT_SUCCESS.raw; 469 cmd_buff[1] = RESULT_SUCCESS.raw;
380 switch (DSP::HLE::GetDspState()) { 470 switch (DSP::HLE::GetDspState()) {
381 case DSP::HLE::DspState::On: 471 case DSP::HLE::DspState::On:
@@ -411,6 +501,7 @@ static void RecvDataIsReady(Service::Interface* self) {
411 501
412 ASSERT_MSG(register_number == 0, "Unknown register_number %u", register_number); 502 ASSERT_MSG(register_number == 0, "Unknown register_number %u", register_number);
413 503
504 cmd_buff[0] = IPC::MakeHeader(0x2, 2, 0);
414 cmd_buff[1] = RESULT_SUCCESS.raw; 505 cmd_buff[1] = RESULT_SUCCESS.raw;
415 cmd_buff[2] = 1; // Ready to read 506 cmd_buff[2] = 1; // Ready to read
416 507
@@ -458,14 +549,14 @@ const Interface::FunctionInfo FunctionTable[] = {
458 549
459Interface::Interface() { 550Interface::Interface() {
460 semaphore_event = Kernel::Event::Create(Kernel::ResetType::OneShot, "DSP_DSP::semaphore_event"); 551 semaphore_event = Kernel::Event::Create(Kernel::ResetType::OneShot, "DSP_DSP::semaphore_event");
461 read_pipe_count = 0; 552 interrupt_events = {};
462 553
463 Register(FunctionTable); 554 Register(FunctionTable);
464} 555}
465 556
466Interface::~Interface() { 557Interface::~Interface() {
467 semaphore_event = nullptr; 558 semaphore_event = nullptr;
468 interrupt_events.clear(); 559 interrupt_events = {};
469} 560}
470 561
471} // namespace 562} // namespace
diff --git a/src/core/hle/service/dsp_dsp.h b/src/core/hle/service/dsp_dsp.h
index 32b89e9bb..22f6687cc 100644
--- a/src/core/hle/service/dsp_dsp.h
+++ b/src/core/hle/service/dsp_dsp.h
@@ -8,6 +8,12 @@
8 8
9#include "core/hle/service/service.h" 9#include "core/hle/service/service.h"
10 10
11namespace DSP {
12namespace HLE {
13enum class DspPipe;
14}
15}
16
11//////////////////////////////////////////////////////////////////////////////////////////////////// 17////////////////////////////////////////////////////////////////////////////////////////////////////
12// Namespace DSP_DSP 18// Namespace DSP_DSP
13 19
@@ -23,15 +29,10 @@ public:
23 } 29 }
24}; 30};
25 31
26/// Signal all audio related interrupts.
27void SignalAllInterrupts();
28
29/** 32/**
30 * Signal a specific audio related interrupt based on interrupt id and channel id. 33 * Signal a specific DSP related interrupt of type == InterruptType::Pipe, pipe == pipe.
31 * @param interrupt_id The interrupt id 34 * @param pipe The DSP pipe for which to signal an interrupt for.
32 * @param channel_id The channel id
33 * The significance of various values of interrupt_id and channel_id is not yet known.
34 */ 35 */
35void SignalInterrupt(u32 interrupt_id, u32 channel_id); 36void SignalPipeInterrupt(DSP::HLE::DspPipe pipe);
36 37
37} // namespace 38} // namespace DSP_DSP
diff --git a/src/core/hle/service/fs/archive.cpp b/src/core/hle/service/fs/archive.cpp
index e9588cb72..cc51ede0c 100644
--- a/src/core/hle/service/fs/archive.cpp
+++ b/src/core/hle/service/fs/archive.cpp
@@ -114,6 +114,7 @@ ResultVal<bool> File::SyncRequest() {
114 return read.Code(); 114 return read.Code();
115 } 115 }
116 cmd_buff[2] = static_cast<u32>(*read); 116 cmd_buff[2] = static_cast<u32>(*read);
117 Memory::RasterizerFlushAndInvalidateRegion(Memory::VirtualToPhysicalAddress(address), length);
117 break; 118 break;
118 } 119 }
119 120
diff --git a/src/core/hle/service/fs/fs_user.cpp b/src/core/hle/service/fs/fs_user.cpp
index 3ec7ceb30..7df7da5a4 100644
--- a/src/core/hle/service/fs/fs_user.cpp
+++ b/src/core/hle/service/fs/fs_user.cpp
@@ -250,7 +250,7 @@ static void CreateFile(Service::Interface* self) {
250 250
251 FileSys::Path file_path(filename_type, filename_size, filename_ptr); 251 FileSys::Path file_path(filename_type, filename_size, filename_ptr);
252 252
253 LOG_DEBUG(Service_FS, "type=%d size=%llu data=%s", filename_type, filename_size, file_path.DebugStr().c_str()); 253 LOG_DEBUG(Service_FS, "type=%d size=%llu data=%s", filename_type, file_size, file_path.DebugStr().c_str());
254 254
255 cmd_buff[1] = CreateFileInArchive(archive_handle, file_path, file_size).raw; 255 cmd_buff[1] = CreateFileInArchive(archive_handle, file_path, file_size).raw;
256} 256}
diff --git a/src/core/hle/service/gsp_gpu.cpp b/src/core/hle/service/gsp_gpu.cpp
index 0c655395e..b4c146e08 100644
--- a/src/core/hle/service/gsp_gpu.cpp
+++ b/src/core/hle/service/gsp_gpu.cpp
@@ -15,8 +15,6 @@
15 15
16#include "video_core/gpu_debugger.h" 16#include "video_core/gpu_debugger.h"
17#include "video_core/debug_utils/debug_utils.h" 17#include "video_core/debug_utils/debug_utils.h"
18#include "video_core/renderer_base.h"
19#include "video_core/video_core.h"
20 18
21#include "gsp_gpu.h" 19#include "gsp_gpu.h"
22 20
@@ -45,6 +43,8 @@ Kernel::SharedPtr<Kernel::SharedMemory> g_shared_memory;
45/// Thread index into interrupt relay queue 43/// Thread index into interrupt relay queue
46u32 g_thread_id = 0; 44u32 g_thread_id = 0;
47 45
46static bool gpu_right_acquired = false;
47
48/// Gets a pointer to a thread command buffer in GSP shared memory 48/// Gets a pointer to a thread command buffer in GSP shared memory
49static inline u8* GetCommandBuffer(u32 thread_id) { 49static inline u8* GetCommandBuffer(u32 thread_id) {
50 return g_shared_memory->GetPointer(0x800 + (thread_id * sizeof(CommandBuffer))); 50 return g_shared_memory->GetPointer(0x800 + (thread_id * sizeof(CommandBuffer)));
@@ -291,8 +291,6 @@ static void FlushDataCache(Service::Interface* self) {
291 u32 size = cmd_buff[2]; 291 u32 size = cmd_buff[2];
292 u32 process = cmd_buff[4]; 292 u32 process = cmd_buff[4];
293 293
294 VideoCore::g_renderer->Rasterizer()->InvalidateRegion(Memory::VirtualToPhysicalAddress(address), size);
295
296 // TODO(purpasmart96): Verify return header on HW 294 // TODO(purpasmart96): Verify return header on HW
297 295
298 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 296 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
@@ -374,6 +372,9 @@ static void UnregisterInterruptRelayQueue(Service::Interface* self) {
374 * @todo This probably does not belong in the GSP module, instead move to video_core 372 * @todo This probably does not belong in the GSP module, instead move to video_core
375 */ 373 */
376void SignalInterrupt(InterruptId interrupt_id) { 374void SignalInterrupt(InterruptId interrupt_id) {
375 if (!gpu_right_acquired) {
376 return;
377 }
377 if (nullptr == g_interrupt_event) { 378 if (nullptr == g_interrupt_event) {
378 LOG_WARNING(Service_GSP, "cannot synchronize until GSP event has been created!"); 379 LOG_WARNING(Service_GSP, "cannot synchronize until GSP event has been created!");
379 return; 380 return;
@@ -408,6 +409,8 @@ void SignalInterrupt(InterruptId interrupt_id) {
408 g_interrupt_event->Signal(); 409 g_interrupt_event->Signal();
409} 410}
410 411
412MICROPROFILE_DEFINE(GPU_GSP_DMA, "GPU", "GSP DMA", MP_RGB(100, 0, 255));
413
411/// Executes the next GSP command 414/// Executes the next GSP command
412static void ExecuteCommand(const Command& command, u32 thread_id) { 415static void ExecuteCommand(const Command& command, u32 thread_id) {
413 // Utility function to convert register ID to address 416 // Utility function to convert register ID to address
@@ -419,18 +422,21 @@ static void ExecuteCommand(const Command& command, u32 thread_id) {
419 422
420 // GX request DMA - typically used for copying memory from GSP heap to VRAM 423 // GX request DMA - typically used for copying memory from GSP heap to VRAM
421 case CommandId::REQUEST_DMA: 424 case CommandId::REQUEST_DMA:
422 VideoCore::g_renderer->Rasterizer()->FlushRegion(Memory::VirtualToPhysicalAddress(command.dma_request.source_address), 425 {
423 command.dma_request.size); 426 MICROPROFILE_SCOPE(GPU_GSP_DMA);
427
428 // TODO: Consider attempting rasterizer-accelerated surface blit if that usage is ever possible/likely
429 Memory::RasterizerFlushRegion(Memory::VirtualToPhysicalAddress(command.dma_request.source_address),
430 command.dma_request.size);
431 Memory::RasterizerFlushAndInvalidateRegion(Memory::VirtualToPhysicalAddress(command.dma_request.dest_address),
432 command.dma_request.size);
424 433
425 memcpy(Memory::GetPointer(command.dma_request.dest_address), 434 memcpy(Memory::GetPointer(command.dma_request.dest_address),
426 Memory::GetPointer(command.dma_request.source_address), 435 Memory::GetPointer(command.dma_request.source_address),
427 command.dma_request.size); 436 command.dma_request.size);
428 SignalInterrupt(InterruptId::DMA); 437 SignalInterrupt(InterruptId::DMA);
429
430 VideoCore::g_renderer->Rasterizer()->InvalidateRegion(Memory::VirtualToPhysicalAddress(command.dma_request.dest_address),
431 command.dma_request.size);
432 break; 438 break;
433 439 }
434 // TODO: This will need some rework in the future. (why?) 440 // TODO: This will need some rework in the future. (why?)
435 case CommandId::SUBMIT_GPU_CMDLIST: 441 case CommandId::SUBMIT_GPU_CMDLIST:
436 { 442 {
@@ -517,13 +523,8 @@ static void ExecuteCommand(const Command& command, u32 thread_id) {
517 523
518 case CommandId::CACHE_FLUSH: 524 case CommandId::CACHE_FLUSH:
519 { 525 {
520 for (auto& region : command.cache_flush.regions) { 526 // NOTE: Rasterizer flushing handled elsewhere in CPU read/write and other GPU handlers
521 if (region.size == 0) 527 // Use command.cache_flush.regions to implement this handler
522 break;
523
524 VideoCore::g_renderer->Rasterizer()->InvalidateRegion(
525 Memory::VirtualToPhysicalAddress(region.address), region.size);
526 }
527 break; 528 break;
528 } 529 }
529 530
@@ -628,6 +629,35 @@ static void ImportDisplayCaptureInfo(Service::Interface* self) {
628 LOG_WARNING(Service_GSP, "called"); 629 LOG_WARNING(Service_GSP, "called");
629} 630}
630 631
632/**
633 * GSP_GPU::AcquireRight service function
634 * Outputs:
635 * 1: Result code
636 */
637static void AcquireRight(Service::Interface* self) {
638 u32* cmd_buff = Kernel::GetCommandBuffer();
639
640 gpu_right_acquired = true;
641
642 cmd_buff[1] = RESULT_SUCCESS.raw;
643
644 LOG_WARNING(Service_GSP, "called");
645}
646
647/**
648 * GSP_GPU::ReleaseRight service function
649 * Outputs:
650 * 1: Result code
651 */
652static void ReleaseRight(Service::Interface* self) {
653 u32* cmd_buff = Kernel::GetCommandBuffer();
654
655 gpu_right_acquired = false;
656
657 cmd_buff[1] = RESULT_SUCCESS.raw;
658
659 LOG_WARNING(Service_GSP, "called");
660}
631 661
632const Interface::FunctionInfo FunctionTable[] = { 662const Interface::FunctionInfo FunctionTable[] = {
633 {0x00010082, WriteHWRegs, "WriteHWRegs"}, 663 {0x00010082, WriteHWRegs, "WriteHWRegs"},
@@ -651,8 +681,8 @@ const Interface::FunctionInfo FunctionTable[] = {
651 {0x00130042, RegisterInterruptRelayQueue, "RegisterInterruptRelayQueue"}, 681 {0x00130042, RegisterInterruptRelayQueue, "RegisterInterruptRelayQueue"},
652 {0x00140000, UnregisterInterruptRelayQueue, "UnregisterInterruptRelayQueue"}, 682 {0x00140000, UnregisterInterruptRelayQueue, "UnregisterInterruptRelayQueue"},
653 {0x00150002, nullptr, "TryAcquireRight"}, 683 {0x00150002, nullptr, "TryAcquireRight"},
654 {0x00160042, nullptr, "AcquireRight"}, 684 {0x00160042, AcquireRight, "AcquireRight"},
655 {0x00170000, nullptr, "ReleaseRight"}, 685 {0x00170000, ReleaseRight, "ReleaseRight"},
656 {0x00180000, ImportDisplayCaptureInfo, "ImportDisplayCaptureInfo"}, 686 {0x00180000, ImportDisplayCaptureInfo, "ImportDisplayCaptureInfo"},
657 {0x00190000, nullptr, "SaveVramSysArea"}, 687 {0x00190000, nullptr, "SaveVramSysArea"},
658 {0x001A0000, nullptr, "RestoreVramSysArea"}, 688 {0x001A0000, nullptr, "RestoreVramSysArea"},
@@ -673,11 +703,13 @@ Interface::Interface() {
673 g_shared_memory = nullptr; 703 g_shared_memory = nullptr;
674 704
675 g_thread_id = 0; 705 g_thread_id = 0;
706 gpu_right_acquired = false;
676} 707}
677 708
678Interface::~Interface() { 709Interface::~Interface() {
679 g_interrupt_event = nullptr; 710 g_interrupt_event = nullptr;
680 g_shared_memory = nullptr; 711 g_shared_memory = nullptr;
712 gpu_right_acquired = false;
681} 713}
682 714
683} // namespace 715} // namespace
diff --git a/src/core/hle/service/gsp_gpu.h b/src/core/hle/service/gsp_gpu.h
index 55a993bb8..3b4b678a3 100644
--- a/src/core/hle/service/gsp_gpu.h
+++ b/src/core/hle/service/gsp_gpu.h
@@ -10,6 +10,7 @@
10#include "common/bit_field.h" 10#include "common/bit_field.h"
11#include "common/common_types.h" 11#include "common/common_types.h"
12 12
13#include "core/hle/result.h"
13#include "core/hle/service/service.h" 14#include "core/hle/service/service.h"
14 15
15//////////////////////////////////////////////////////////////////////////////////////////////////// 16////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/core/hle/service/ndm/ndm.cpp b/src/core/hle/service/ndm/ndm.cpp
index 47076a7b8..bc9c3413d 100644
--- a/src/core/hle/service/ndm/ndm.cpp
+++ b/src/core/hle/service/ndm/ndm.cpp
@@ -11,28 +11,217 @@
11namespace Service { 11namespace Service {
12namespace NDM { 12namespace NDM {
13 13
14void SuspendDaemons(Service::Interface* self) { 14enum : u32 {
15 DEFAULT_RETRY_INTERVAL = 10,
16 DEFAULT_SCAN_INTERVAL = 30
17};
18
19static DaemonMask daemon_bit_mask = DaemonMask::Default;
20static DaemonMask default_daemon_bit_mask = DaemonMask::Default;
21static std::array<DaemonStatus, 4> daemon_status = { DaemonStatus::Idle, DaemonStatus::Idle, DaemonStatus::Idle, DaemonStatus::Idle };
22static ExclusiveState exclusive_state = ExclusiveState::None;
23static u32 scan_interval = DEFAULT_SCAN_INTERVAL;
24static u32 retry_interval = DEFAULT_RETRY_INTERVAL;
25static bool daemon_lock_enabled = false;
26
27void EnterExclusiveState(Service::Interface* self) {
28 u32* cmd_buff = Kernel::GetCommandBuffer();
29 exclusive_state = static_cast<ExclusiveState>(cmd_buff[1]);
30
31 cmd_buff[0] = IPC::MakeHeader(0x1, 1, 0);
32 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
33 LOG_WARNING(Service_NDM, "(STUBBED) exclusive_state=0x%08X ", exclusive_state);
34}
35
36void LeaveExclusiveState(Service::Interface* self) {
37 u32* cmd_buff = Kernel::GetCommandBuffer();
38 exclusive_state = ExclusiveState::None;
39
40 cmd_buff[0] = IPC::MakeHeader(0x2, 1, 0);
41 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
42 LOG_WARNING(Service_NDM, "(STUBBED) exclusive_state=0x%08X ", exclusive_state);
43}
44
45void QueryExclusiveMode(Service::Interface* self) {
15 u32* cmd_buff = Kernel::GetCommandBuffer(); 46 u32* cmd_buff = Kernel::GetCommandBuffer();
16 47
17 LOG_WARNING(Service_NDM, "(STUBBED) bit_mask=0x%08X ", cmd_buff[1]); 48 cmd_buff[0] = IPC::MakeHeader(0x3, 2, 0);
49 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
50 cmd_buff[2] = static_cast<u32>(exclusive_state);
51 LOG_WARNING(Service_NDM, "(STUBBED) exclusive_state=0x%08X ", exclusive_state);
52}
53
54void LockState(Service::Interface* self) {
55 u32* cmd_buff = Kernel::GetCommandBuffer();
56 daemon_lock_enabled = true;
57
58 cmd_buff[0] = IPC::MakeHeader(0x4, 1, 0);
59 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
60 LOG_WARNING(Service_NDM, "(STUBBED) daemon_lock_enabled=0x%08X ", daemon_lock_enabled);
61}
62
63void UnlockState(Service::Interface* self) {
64 u32* cmd_buff = Kernel::GetCommandBuffer();
65 daemon_lock_enabled = false;
18 66
67 cmd_buff[0] = IPC::MakeHeader(0x5, 1, 0);
19 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 68 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
69 LOG_WARNING(Service_NDM, "(STUBBED) daemon_lock_enabled=0x%08X ", daemon_lock_enabled);
70}
71
72void SuspendDaemons(Service::Interface* self) {
73 u32* cmd_buff = Kernel::GetCommandBuffer();
74 u32 bit_mask = cmd_buff[1] & 0xF;
75 daemon_bit_mask = static_cast<DaemonMask>(static_cast<u32>(default_daemon_bit_mask) & ~bit_mask);
76 for (size_t index = 0; index < daemon_status.size(); ++index) {
77 if (bit_mask & (1 << index)) {
78 daemon_status[index] = DaemonStatus::Suspended;
79 }
80 }
81
82 cmd_buff[0] = IPC::MakeHeader(0x6, 1, 0);
83 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
84 LOG_WARNING(Service_NDM, "(STUBBED) daemon_bit_mask=0x%08X ", daemon_bit_mask);
20} 85}
21 86
22void ResumeDaemons(Service::Interface* self) { 87void ResumeDaemons(Service::Interface* self) {
23 u32* cmd_buff = Kernel::GetCommandBuffer(); 88 u32* cmd_buff = Kernel::GetCommandBuffer();
89 u32 bit_mask = cmd_buff[1] & 0xF;
90 daemon_bit_mask = static_cast<DaemonMask>(static_cast<u32>(daemon_bit_mask) | bit_mask);
91 for (size_t index = 0; index < daemon_status.size(); ++index) {
92 if (bit_mask & (1 << index)) {
93 daemon_status[index] = DaemonStatus::Idle;
94 }
95 }
96
97 cmd_buff[0] = IPC::MakeHeader(0x7, 1, 0);
98 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
99 LOG_WARNING(Service_NDM, "(STUBBED) daemon_bit_mask=0x%08X ", daemon_bit_mask);
100}
101
102void SuspendScheduler(Service::Interface* self) {
103 u32* cmd_buff = Kernel::GetCommandBuffer();
104
105 cmd_buff[0] = IPC::MakeHeader(0x8, 1, 0);
106 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
107 LOG_WARNING(Service_NDM, "(STUBBED) called");
108}
109
110void ResumeScheduler(Service::Interface* self) {
111 u32* cmd_buff = Kernel::GetCommandBuffer();
112
113 cmd_buff[0] = IPC::MakeHeader(0x9, 1, 0);
114 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
115 LOG_WARNING(Service_NDM, "(STUBBED) called");
116}
117
118void QueryStatus(Service::Interface* self) {
119 u32* cmd_buff = Kernel::GetCommandBuffer();
120 u32 daemon = cmd_buff[1] & 0xF;
24 121
25 LOG_WARNING(Service_NDM, "(STUBBED) bit_mask=0x%08X ", cmd_buff[1]); 122 cmd_buff[0] = IPC::MakeHeader(0xD, 2, 0);
123 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
124 cmd_buff[2] = static_cast<u32>(daemon_status.at(daemon));
125 LOG_WARNING(Service_NDM, "(STUBBED) daemon=0x%08X, daemon_status=0x%08X", daemon, cmd_buff[2]);
126}
127
128void GetDaemonDisableCount(Service::Interface* self) {
129 u32* cmd_buff = Kernel::GetCommandBuffer();
130 u32 daemon = cmd_buff[1] & 0xF;
131
132 cmd_buff[0] = IPC::MakeHeader(0xE, 3, 0);
133 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
134 cmd_buff[2] = 0;
135 cmd_buff[3] = 0;
136 LOG_WARNING(Service_NDM, "(STUBBED) daemon=0x%08X", daemon);
137}
138
139void GetSchedulerDisableCount(Service::Interface* self) {
140 u32* cmd_buff = Kernel::GetCommandBuffer();
141
142 cmd_buff[0] = IPC::MakeHeader(0xF, 3, 0);
143 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
144 cmd_buff[2] = 0;
145 cmd_buff[3] = 0;
146 LOG_WARNING(Service_NDM, "(STUBBED) called");
147}
148
149void SetScanInterval(Service::Interface* self) {
150 u32* cmd_buff = Kernel::GetCommandBuffer();
151 scan_interval = cmd_buff[1];
26 152
153 cmd_buff[0] = IPC::MakeHeader(0x10, 1, 0);
27 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 154 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
155 LOG_WARNING(Service_NDM, "(STUBBED) scan_interval=0x%08X ", scan_interval);
156}
157
158void GetScanInterval(Service::Interface* self) {
159 u32* cmd_buff = Kernel::GetCommandBuffer();
160
161 cmd_buff[0] = IPC::MakeHeader(0x11, 2, 0);
162 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
163 cmd_buff[2] = scan_interval;
164 LOG_WARNING(Service_NDM, "(STUBBED) scan_interval=0x%08X ", scan_interval);
165}
166
167void SetRetryInterval(Service::Interface* self) {
168 u32* cmd_buff = Kernel::GetCommandBuffer();
169 retry_interval = cmd_buff[1];
170
171 cmd_buff[0] = IPC::MakeHeader(0x12, 1, 0);
172 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
173 LOG_WARNING(Service_NDM, "(STUBBED) retry_interval=0x%08X ", retry_interval);
174}
175
176void GetRetryInterval(Service::Interface* self) {
177 u32* cmd_buff = Kernel::GetCommandBuffer();
178
179 cmd_buff[0] = IPC::MakeHeader(0x13, 2, 0);
180 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
181 cmd_buff[2] = retry_interval;
182 LOG_WARNING(Service_NDM, "(STUBBED) retry_interval=0x%08X ", retry_interval);
28} 183}
29 184
30void OverrideDefaultDaemons(Service::Interface* self) { 185void OverrideDefaultDaemons(Service::Interface* self) {
31 u32* cmd_buff = Kernel::GetCommandBuffer(); 186 u32* cmd_buff = Kernel::GetCommandBuffer();
187 u32 bit_mask = cmd_buff[1] & 0xF;
188 default_daemon_bit_mask = static_cast<DaemonMask>(bit_mask);
189 daemon_bit_mask = default_daemon_bit_mask;
190 for (size_t index = 0; index < daemon_status.size(); ++index) {
191 if (bit_mask & (1 << index)) {
192 daemon_status[index] = DaemonStatus::Idle;
193 }
194 }
32 195
33 LOG_WARNING(Service_NDM, "(STUBBED) bit_mask=0x%08X ", cmd_buff[1]); 196 cmd_buff[0] = IPC::MakeHeader(0x14, 1, 0);
197 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
198 LOG_WARNING(Service_NDM, "(STUBBED) default_daemon_bit_mask=0x%08X ", default_daemon_bit_mask);
199}
200
201void ResetDefaultDaemons(Service::Interface* self) {
202 u32* cmd_buff = Kernel::GetCommandBuffer();
203 default_daemon_bit_mask = DaemonMask::Default;
204
205 cmd_buff[0] = IPC::MakeHeader(0x15, 1, 0);
206 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
207 LOG_WARNING(Service_NDM, "(STUBBED) default_daemon_bit_mask=0x%08X ", default_daemon_bit_mask);
208}
209
210void GetDefaultDaemons(Service::Interface* self) {
211 u32* cmd_buff = Kernel::GetCommandBuffer();
212
213 cmd_buff[0] = IPC::MakeHeader(0x16, 2, 0);
214 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
215 cmd_buff[2] = static_cast<u32>(default_daemon_bit_mask);
216 LOG_WARNING(Service_NDM, "(STUBBED) default_daemon_bit_mask=0x%08X ", default_daemon_bit_mask);
217}
218
219void ClearHalfAwakeMacFilter(Service::Interface* self) {
220 u32* cmd_buff = Kernel::GetCommandBuffer();
34 221
222 cmd_buff[0] = IPC::MakeHeader(0x17, 1, 0);
35 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 223 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
224 LOG_WARNING(Service_NDM, "(STUBBED) called");
36} 225}
37 226
38void Init() { 227void Init() {
diff --git a/src/core/hle/service/ndm/ndm.h b/src/core/hle/service/ndm/ndm.h
index 734730f8c..5c2b968dc 100644
--- a/src/core/hle/service/ndm/ndm.h
+++ b/src/core/hle/service/ndm/ndm.h
@@ -12,10 +12,91 @@ class Interface;
12 12
13namespace NDM { 13namespace NDM {
14 14
15enum class Daemon : u32 {
16 Cec = 0,
17 Boss = 1,
18 Nim = 2,
19 Friend = 3
20};
21
22enum class DaemonMask : u32 {
23 None = 0,
24 Cec = (1 << static_cast<u32>(Daemon::Cec)),
25 Boss = (1 << static_cast<u32>(Daemon::Boss)),
26 Nim = (1 << static_cast<u32>(Daemon::Nim)),
27 Friend = (1 << static_cast<u32>(Daemon::Friend)),
28 Default = Cec | Friend,
29 All = Cec | Boss | Nim | Friend
30};
31
32enum class DaemonStatus : u32 {
33 Busy = 0,
34 Idle = 1,
35 Suspending = 2,
36 Suspended = 3
37};
38
39enum class ExclusiveState : u32 {
40 None = 0,
41 Infrastructure = 1,
42 LocalCommunications = 2,
43 Streetpass = 3,
44 StreetpassData = 4,
45};
46
47/**
48 * NDM::EnterExclusiveState service function
49 * Inputs:
50 * 0 : Header code [0x00010042]
51 * 1 : Exclusive State
52 * 2 : 0x20
53 * Outputs:
54 * 1 : Result, 0 on success, otherwise error code
55 */
56void EnterExclusiveState(Service::Interface* self);
57
58/**
59 * NDM::LeaveExclusiveState service function
60 * Inputs:
61 * 0 : Header code [0x00020002]
62 * 1 : 0x20
63 * Outputs:
64 * 1 : Result, 0 on success, otherwise error code
65 */
66void LeaveExclusiveState(Service::Interface* self);
67
68/**
69 * NDM::QueryExclusiveMode service function
70 * Inputs:
71 * 0 : Header code [0x00030000]
72 * Outputs:
73 * 1 : Result, 0 on success, otherwise error code
74 * 2 : Current Exclusive State
75 */
76void QueryExclusiveMode(Service::Interface* self);
77
78/**
79 * NDM::LockState service function
80 * Inputs:
81 * 0 : Header code [0x00040002]
82 * Outputs:
83 * 1 : Result, 0 on success, otherwise error code
84 */
85void LockState(Service::Interface* self);
86
87/**
88 * NDM::UnlockState service function
89 * Inputs:
90 * 0 : Header code [0x00050002]
91 * Outputs:
92 * 1 : Result, 0 on success, otherwise error code
93 */
94void UnlockState(Service::Interface* self);
95
15/** 96/**
16 * SuspendDaemons 97 * NDM::SuspendDaemons service function
17 * Inputs: 98 * Inputs:
18 * 0 : Command header (0x00020082) 99 * 0 : Header code [0x00060040]
19 * 1 : Daemon bit mask 100 * 1 : Daemon bit mask
20 * Outputs: 101 * Outputs:
21 * 1 : Result, 0 on success, otherwise error code 102 * 1 : Result, 0 on success, otherwise error code
@@ -23,9 +104,9 @@ namespace NDM {
23void SuspendDaemons(Service::Interface* self); 104void SuspendDaemons(Service::Interface* self);
24 105
25/** 106/**
26 * ResumeDaemons 107 * NDM::ResumeDaemons service function
27 * Inputs: 108 * Inputs:
28 * 0 : Command header (0x00020082) 109 * 0 : Header code [0x00070040]
29 * 1 : Daemon bit mask 110 * 1 : Daemon bit mask
30 * Outputs: 111 * Outputs:
31 * 1 : Result, 0 on success, otherwise error code 112 * 1 : Result, 0 on success, otherwise error code
@@ -33,15 +114,138 @@ void SuspendDaemons(Service::Interface* self);
33void ResumeDaemons(Service::Interface* self); 114void ResumeDaemons(Service::Interface* self);
34 115
35/** 116/**
36 * OverrideDefaultDaemons 117 * NDM::SuspendScheduler service function
37 * Inputs: 118 * Inputs:
38 * 0 : Command header (0x00020082) 119 * 0 : Header code [0x00080040]
120 * Outputs:
121 * 1 : Result, 0 on success, otherwise error code
122 */
123void SuspendScheduler(Service::Interface* self);
124
125/**
126 * NDM::ResumeScheduler service function
127 * Inputs:
128 * 0 : Header code [0x00090000]
129 * Outputs:
130 * 1 : Result, 0 on success, otherwise error code
131 */
132void ResumeScheduler(Service::Interface* self);
133
134/**
135 * NDM::QueryStatus service function
136 * Inputs:
137 * 0 : Header code [0x000D0040]
138 * 1 : Daemon
139 * Outputs:
140 * 1 : Result, 0 on success, otherwise error code
141 * 2 : Daemon status
142 */
143void QueryStatus(Service::Interface* self);
144
145/**
146 * NDM::GetDaemonDisableCount service function
147 * Inputs:
148 * 0 : Header code [0x000E0040]
149 * 1 : Daemon
150 * Outputs:
151 * 1 : Result, 0 on success, otherwise error code
152 * 2 : Current process disable count
153 * 3 : Total disable count
154 */
155void GetDaemonDisableCount(Service::Interface* self);
156
157/**
158 * NDM::GetSchedulerDisableCount service function
159 * Inputs:
160 * 0 : Header code [0x000F0000]
161 * Outputs:
162 * 1 : Result, 0 on success, otherwise error code
163 * 2 : Current process disable count
164 * 3 : Total disable count
165 */
166void GetSchedulerDisableCount(Service::Interface* self);
167
168/**
169 * NDM::SetScanInterval service function
170 * Inputs:
171 * 0 : Header code [0x00100040]
172 * 1 : Interval (default = 30)
173 * Outputs:
174 * 1 : Result, 0 on success, otherwise error code
175 */
176void SetScanInterval(Service::Interface* self);
177
178/**
179 * NDM::GetScanInterval service function
180 * Inputs:
181 * 0 : Header code [0x00110000]
182 * Outputs:
183 * 1 : Result, 0 on success, otherwise error code
184 * 2 : Interval (default = 30)
185 */
186void GetScanInterval(Service::Interface* self);
187
188/**
189 * NDM::SetRetryInterval service function
190 * Inputs:
191 * 0 : Header code [0x00120040]
192 * 1 : Interval (default = 10)
193 * Outputs:
194 * 1 : Result, 0 on success, otherwise error code
195 */
196void SetRetryInterval(Service::Interface* self);
197
198/**
199 * NDM::GetRetryInterval service function
200 * Inputs:
201 * 0 : Header code [0x00130000]
202 * Outputs:
203 * 1 : Result, 0 on success, otherwise error code
204 * 2 : Interval (default = 10)
205 */
206void GetRetryInterval(Service::Interface* self);
207
208
209/**
210 * NDM::OverrideDefaultDaemons service function
211 * Inputs:
212 * 0 : Header code [0x00140040]
39 * 1 : Daemon bit mask 213 * 1 : Daemon bit mask
40 * Outputs: 214 * Outputs:
41 * 1 : Result, 0 on success, otherwise error code 215 * 1 : Result, 0 on success, otherwise error code
42 */ 216 */
43void OverrideDefaultDaemons(Service::Interface* self); 217void OverrideDefaultDaemons(Service::Interface* self);
44 218
219/**
220 * NDM::ResetDefaultDaemons service function
221 * Inputs:
222 * 0 : Header code [0x00150000]
223 * Outputs:
224 * 1 : Result, 0 on success, otherwise error code
225 */
226void ResetDefaultDaemons(Service::Interface* self);
227
228/**
229 * NDM::GetDefaultDaemons service function
230 * Inputs:
231 * 0 : Header code [0x00160000]
232 * Outputs:
233 * 1 : Result, 0 on success, otherwise error code
234 * 2 : Daemon bit mask
235 * Note:
236 * Gets the current default daemon bit mask. The default value is (DAEMONMASK_CEC | DAEMONMASK_FRIENDS)
237 */
238void GetDefaultDaemons(Service::Interface* self);
239
240/**
241 * NDM::ClearHalfAwakeMacFilter service function
242 * Inputs:
243 * 0 : Header code [0x00170000]
244 * Outputs:
245 * 1 : Result, 0 on success, otherwise error code
246 */
247void ClearHalfAwakeMacFilter(Service::Interface* self);
248
45/// Initialize NDM service 249/// Initialize NDM service
46void Init(); 250void Init();
47 251
diff --git a/src/core/hle/service/ndm/ndm_u.cpp b/src/core/hle/service/ndm/ndm_u.cpp
index bf95cc7aa..3ff0744ee 100644
--- a/src/core/hle/service/ndm/ndm_u.cpp
+++ b/src/core/hle/service/ndm/ndm_u.cpp
@@ -9,29 +9,29 @@ namespace Service {
9namespace NDM { 9namespace NDM {
10 10
11const Interface::FunctionInfo FunctionTable[] = { 11const Interface::FunctionInfo FunctionTable[] = {
12 {0x00010042, nullptr, "EnterExclusiveState"}, 12 {0x00010042, EnterExclusiveState, "EnterExclusiveState"},
13 {0x00020002, nullptr, "LeaveExclusiveState"}, 13 {0x00020002, LeaveExclusiveState, "LeaveExclusiveState"},
14 {0x00030000, nullptr, "QueryExclusiveMode"}, 14 {0x00030000, QueryExclusiveMode, "QueryExclusiveMode"},
15 {0x00040002, nullptr, "LockState"}, 15 {0x00040002, LockState, "LockState"},
16 {0x00050002, nullptr, "UnlockState"}, 16 {0x00050002, UnlockState, "UnlockState"},
17 {0x00060040, SuspendDaemons, "SuspendDaemons"}, 17 {0x00060040, SuspendDaemons, "SuspendDaemons"},
18 {0x00070040, ResumeDaemons, "ResumeDaemons"}, 18 {0x00070040, ResumeDaemons, "ResumeDaemons"},
19 {0x00080040, nullptr, "DisableWifiUsage"}, 19 {0x00080040, SuspendScheduler, "SuspendScheduler"},
20 {0x00090000, nullptr, "EnableWifiUsage"}, 20 {0x00090000, ResumeScheduler, "ResumeScheduler"},
21 {0x000A0000, nullptr, "GetCurrentState"}, 21 {0x000A0000, nullptr, "GetCurrentState"},
22 {0x000B0000, nullptr, "GetTargetState"}, 22 {0x000B0000, nullptr, "GetTargetState"},
23 {0x000C0000, nullptr, "<Stubbed>"}, 23 {0x000C0000, nullptr, "<Stubbed>"},
24 {0x000D0040, nullptr, "QueryStatus"}, 24 {0x000D0040, QueryStatus, "QueryStatus"},
25 {0x000E0040, nullptr, "GetDaemonDisableCount"}, 25 {0x000E0040, GetDaemonDisableCount, "GetDaemonDisableCount"},
26 {0x000F0000, nullptr, "GetSchedulerDisableCount"}, 26 {0x000F0000, GetSchedulerDisableCount,"GetSchedulerDisableCount"},
27 {0x00100040, nullptr, "SetScanInterval"}, 27 {0x00100040, SetScanInterval, "SetScanInterval"},
28 {0x00110000, nullptr, "GetScanInterval"}, 28 {0x00110000, GetScanInterval, "GetScanInterval"},
29 {0x00120040, nullptr, "SetRetryInterval"}, 29 {0x00120040, SetRetryInterval, "SetRetryInterval"},
30 {0x00130000, nullptr, "GetRetryInterval"}, 30 {0x00130000, GetRetryInterval, "GetRetryInterval"},
31 {0x00140040, OverrideDefaultDaemons, "OverrideDefaultDaemons"}, 31 {0x00140040, OverrideDefaultDaemons, "OverrideDefaultDaemons"},
32 {0x00150000, nullptr, "ResetDefaultDaemons"}, 32 {0x00150000, ResetDefaultDaemons, "ResetDefaultDaemons"},
33 {0x00160000, nullptr, "GetDefaultDaemons"}, 33 {0x00160000, GetDefaultDaemons, "GetDefaultDaemons"},
34 {0x00170000, nullptr, "ClearHalfAwakeMacFilter"}, 34 {0x00170000, ClearHalfAwakeMacFilter, "ClearHalfAwakeMacFilter"},
35}; 35};
36 36
37NDM_U_Interface::NDM_U_Interface() { 37NDM_U_Interface::NDM_U_Interface() {
diff --git a/src/core/hle/service/soc_u.cpp b/src/core/hle/service/soc_u.cpp
index ff0af8f12..d3e5d4bca 100644
--- a/src/core/hle/service/soc_u.cpp
+++ b/src/core/hle/service/soc_u.cpp
@@ -151,6 +151,34 @@ static int TranslateError(int error) {
151 return error; 151 return error;
152} 152}
153 153
154/// Holds the translation from system network socket options to 3DS network socket options
155/// Note: -1 = No effect/unavailable
156static const std::unordered_map<int, int> sockopt_map = { {
157 { 0x0004, SO_REUSEADDR },
158 { 0x0080, -1 },
159 { 0x0100, -1 },
160 { 0x1001, SO_SNDBUF },
161 { 0x1002, SO_RCVBUF },
162 { 0x1003, -1 },
163#ifdef _WIN32
164 /// Unsupported in WinSock2
165 { 0x1004, -1 },
166#else
167 { 0x1004, SO_RCVLOWAT },
168#endif
169 { 0x1008, SO_TYPE },
170 { 0x1009, SO_ERROR },
171}};
172
173/// Converts a socket option from 3ds-specific to platform-specific
174static int TranslateSockOpt(int console_opt_name) {
175 auto found = sockopt_map.find(console_opt_name);
176 if (found != sockopt_map.end()) {
177 return found->second;
178 }
179 return console_opt_name;
180}
181
154/// Holds information about a particular socket 182/// Holds information about a particular socket
155struct SocketHolder { 183struct SocketHolder {
156 u32 socket_fd; ///< The socket descriptor 184 u32 socket_fd; ///< The socket descriptor
@@ -568,7 +596,7 @@ static void RecvFrom(Service::Interface* self) {
568 socklen_t src_addr_len = sizeof(src_addr); 596 socklen_t src_addr_len = sizeof(src_addr);
569 int ret = ::recvfrom(socket_handle, (char*)output_buff, len, flags, &src_addr, &src_addr_len); 597 int ret = ::recvfrom(socket_handle, (char*)output_buff, len, flags, &src_addr, &src_addr_len);
570 598
571 if (buffer_parameters.output_src_address_buffer != 0) { 599 if (ret >= 0 && buffer_parameters.output_src_address_buffer != 0 && src_addr_len > 0) {
572 CTRSockAddr* ctr_src_addr = reinterpret_cast<CTRSockAddr*>(Memory::GetPointer(buffer_parameters.output_src_address_buffer)); 600 CTRSockAddr* ctr_src_addr = reinterpret_cast<CTRSockAddr*>(Memory::GetPointer(buffer_parameters.output_src_address_buffer));
573 *ctr_src_addr = CTRSockAddr::FromPlatform(src_addr); 601 *ctr_src_addr = CTRSockAddr::FromPlatform(src_addr);
574 } 602 }
@@ -724,6 +752,72 @@ static void ShutdownSockets(Service::Interface* self) {
724 cmd_buffer[1] = 0; 752 cmd_buffer[1] = 0;
725} 753}
726 754
755static void GetSockOpt(Service::Interface* self) {
756 u32* cmd_buffer = Kernel::GetCommandBuffer();
757 u32 socket_handle = cmd_buffer[1];
758 u32 level = cmd_buffer[2];
759 int optname = TranslateSockOpt(cmd_buffer[3]);
760 socklen_t optlen = (socklen_t)cmd_buffer[4];
761
762 int ret = -1;
763 int err = 0;
764
765 if(optname < 0) {
766#ifdef _WIN32
767 err = WSAEINVAL;
768#else
769 err = EINVAL;
770#endif
771 } else {
772 // 0x100 = static buffer offset (bytes)
773 // + 0x4 = 2nd pointer (u32) position
774 // >> 2 = convert to u32 offset instead of byte offset (cmd_buffer = u32*)
775 char* optval = reinterpret_cast<char *>(Memory::GetPointer(cmd_buffer[0x104 >> 2]));
776
777 ret = ::getsockopt(socket_handle, level, optname, optval, &optlen);
778 err = 0;
779 if (ret == SOCKET_ERROR_VALUE) {
780 err = TranslateError(GET_ERRNO);
781 }
782 }
783
784 cmd_buffer[0] = IPC::MakeHeader(0x11, 4, 2);
785 cmd_buffer[1] = ret;
786 cmd_buffer[2] = err;
787 cmd_buffer[3] = optlen;
788}
789
790static void SetSockOpt(Service::Interface* self) {
791 u32* cmd_buffer = Kernel::GetCommandBuffer();
792 u32 socket_handle = cmd_buffer[1];
793 u32 level = cmd_buffer[2];
794 int optname = TranslateSockOpt(cmd_buffer[3]);
795
796 int ret = -1;
797 int err = 0;
798
799 if(optname < 0) {
800#ifdef _WIN32
801 err = WSAEINVAL;
802#else
803 err = EINVAL;
804#endif
805 } else {
806 socklen_t optlen = static_cast<socklen_t>(cmd_buffer[4]);
807 const char* optval = reinterpret_cast<const char *>(Memory::GetPointer(cmd_buffer[8]));
808
809 ret = static_cast<u32>(::setsockopt(socket_handle, level, optname, optval, optlen));
810 err = 0;
811 if (ret == SOCKET_ERROR_VALUE) {
812 err = TranslateError(GET_ERRNO);
813 }
814 }
815
816 cmd_buffer[0] = IPC::MakeHeader(0x12, 4, 4);
817 cmd_buffer[1] = ret;
818 cmd_buffer[2] = err;
819}
820
727const Interface::FunctionInfo FunctionTable[] = { 821const Interface::FunctionInfo FunctionTable[] = {
728 {0x00010044, InitializeSockets, "InitializeSockets"}, 822 {0x00010044, InitializeSockets, "InitializeSockets"},
729 {0x000200C2, Socket, "Socket"}, 823 {0x000200C2, Socket, "Socket"},
@@ -741,8 +835,8 @@ const Interface::FunctionInfo FunctionTable[] = {
741 {0x000E00C2, nullptr, "GetHostByAddr"}, 835 {0x000E00C2, nullptr, "GetHostByAddr"},
742 {0x000F0106, nullptr, "GetAddrInfo"}, 836 {0x000F0106, nullptr, "GetAddrInfo"},
743 {0x00100102, nullptr, "GetNameInfo"}, 837 {0x00100102, nullptr, "GetNameInfo"},
744 {0x00110102, nullptr, "GetSockOpt"}, 838 {0x00110102, GetSockOpt, "GetSockOpt"},
745 {0x00120104, nullptr, "SetSockOpt"}, 839 {0x00120104, SetSockOpt, "SetSockOpt"},
746 {0x001300C2, Fcntl, "Fcntl"}, 840 {0x001300C2, Fcntl, "Fcntl"},
747 {0x00140084, Poll, "Poll"}, 841 {0x00140084, Poll, "Poll"},
748 {0x00150042, nullptr, "SockAtMark"}, 842 {0x00150042, nullptr, "SockAtMark"},
diff --git a/src/core/hle/service/y2r_u.cpp b/src/core/hle/service/y2r_u.cpp
index 22f373adf..d16578f87 100644
--- a/src/core/hle/service/y2r_u.cpp
+++ b/src/core/hle/service/y2r_u.cpp
@@ -4,6 +4,7 @@
4 4
5#include <cstring> 5#include <cstring>
6 6
7#include "common/common_funcs.h"
7#include "common/common_types.h" 8#include "common/common_types.h"
8#include "common/logging/log.h" 9#include "common/logging/log.h"
9 10
@@ -12,9 +13,6 @@
12#include "core/hle/service/y2r_u.h" 13#include "core/hle/service/y2r_u.h"
13#include "core/hw/y2r.h" 14#include "core/hw/y2r.h"
14 15
15#include "video_core/renderer_base.h"
16#include "video_core/video_core.h"
17
18//////////////////////////////////////////////////////////////////////////////////////////////////// 16////////////////////////////////////////////////////////////////////////////////////////////////////
19// Namespace Y2R_U 17// Namespace Y2R_U
20 18
@@ -28,13 +26,17 @@ struct ConversionParameters {
28 u16 input_line_width; 26 u16 input_line_width;
29 u16 input_lines; 27 u16 input_lines;
30 StandardCoefficient standard_coefficient; 28 StandardCoefficient standard_coefficient;
31 u8 reserved; 29 u8 padding;
32 u16 alpha; 30 u16 alpha;
33}; 31};
34static_assert(sizeof(ConversionParameters) == 12, "ConversionParameters struct has incorrect size"); 32static_assert(sizeof(ConversionParameters) == 12, "ConversionParameters struct has incorrect size");
35 33
36static Kernel::SharedPtr<Kernel::Event> completion_event; 34static Kernel::SharedPtr<Kernel::Event> completion_event;
37static ConversionConfiguration conversion; 35static ConversionConfiguration conversion;
36static DitheringWeightParams dithering_weight_params;
37static u32 temporal_dithering_enabled = 0;
38static u32 transfer_end_interrupt_enabled = 0;
39static u32 spacial_dithering_enabled = 0;
38 40
39static const CoefficientSet standard_coefficients[4] = { 41static const CoefficientSet standard_coefficients[4] = {
40 {{ 0x100, 0x166, 0xB6, 0x58, 0x1C5, -0x166F, 0x10EE, -0x1C5B }}, // ITU_Rec601 42 {{ 0x100, 0x166, 0xB6, 0x58, 0x1C5, -0x166F, 0x10EE, -0x1C5B }}, // ITU_Rec601
@@ -73,7 +75,7 @@ ResultCode ConversionConfiguration::SetInputLines(u16 lines) {
73 75
74ResultCode ConversionConfiguration::SetStandardCoefficient(StandardCoefficient standard_coefficient) { 76ResultCode ConversionConfiguration::SetStandardCoefficient(StandardCoefficient standard_coefficient) {
75 size_t index = static_cast<size_t>(standard_coefficient); 77 size_t index = static_cast<size_t>(standard_coefficient);
76 if (index >= 4) { 78 if (index >= ARRAY_SIZE(standard_coefficients)) {
77 return ResultCode(ErrorDescription::InvalidEnumValue, ErrorModule::CAM, 79 return ResultCode(ErrorDescription::InvalidEnumValue, ErrorModule::CAM,
78 ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E053ED 80 ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E053ED
79 } 81 }
@@ -86,44 +88,183 @@ static void SetInputFormat(Service::Interface* self) {
86 u32* cmd_buff = Kernel::GetCommandBuffer(); 88 u32* cmd_buff = Kernel::GetCommandBuffer();
87 89
88 conversion.input_format = static_cast<InputFormat>(cmd_buff[1]); 90 conversion.input_format = static_cast<InputFormat>(cmd_buff[1]);
91
92 cmd_buff[0] = IPC::MakeHeader(0x1, 1, 0);
93 cmd_buff[1] = RESULT_SUCCESS.raw;
94
89 LOG_DEBUG(Service_Y2R, "called input_format=%hhu", conversion.input_format); 95 LOG_DEBUG(Service_Y2R, "called input_format=%hhu", conversion.input_format);
96}
97
98static void GetInputFormat(Service::Interface* self) {
99 u32* cmd_buff = Kernel::GetCommandBuffer();
90 100
101 cmd_buff[0] = IPC::MakeHeader(0x2, 2, 0);
91 cmd_buff[1] = RESULT_SUCCESS.raw; 102 cmd_buff[1] = RESULT_SUCCESS.raw;
103 cmd_buff[2] = static_cast<u32>(conversion.input_format);
104
105 LOG_DEBUG(Service_Y2R, "called input_format=%hhu", conversion.input_format);
92} 106}
93 107
94static void SetOutputFormat(Service::Interface* self) { 108static void SetOutputFormat(Service::Interface* self) {
95 u32* cmd_buff = Kernel::GetCommandBuffer(); 109 u32* cmd_buff = Kernel::GetCommandBuffer();
96 110
97 conversion.output_format = static_cast<OutputFormat>(cmd_buff[1]); 111 conversion.output_format = static_cast<OutputFormat>(cmd_buff[1]);
112
113 cmd_buff[0] = IPC::MakeHeader(0x3, 1, 0);
114 cmd_buff[1] = RESULT_SUCCESS.raw;
115
98 LOG_DEBUG(Service_Y2R, "called output_format=%hhu", conversion.output_format); 116 LOG_DEBUG(Service_Y2R, "called output_format=%hhu", conversion.output_format);
117}
118
119static void GetOutputFormat(Service::Interface* self) {
120 u32* cmd_buff = Kernel::GetCommandBuffer();
99 121
122 cmd_buff[0] = IPC::MakeHeader(0x4, 2, 0);
100 cmd_buff[1] = RESULT_SUCCESS.raw; 123 cmd_buff[1] = RESULT_SUCCESS.raw;
124 cmd_buff[2] = static_cast<u32>(conversion.output_format);
125
126 LOG_DEBUG(Service_Y2R, "called output_format=%hhu", conversion.output_format);
101} 127}
102 128
103static void SetRotation(Service::Interface* self) { 129static void SetRotation(Service::Interface* self) {
104 u32* cmd_buff = Kernel::GetCommandBuffer(); 130 u32* cmd_buff = Kernel::GetCommandBuffer();
105 131
106 conversion.rotation = static_cast<Rotation>(cmd_buff[1]); 132 conversion.rotation = static_cast<Rotation>(cmd_buff[1]);
133
134 cmd_buff[0] = IPC::MakeHeader(0x5, 1, 0);
135 cmd_buff[1] = RESULT_SUCCESS.raw;
136
107 LOG_DEBUG(Service_Y2R, "called rotation=%hhu", conversion.rotation); 137 LOG_DEBUG(Service_Y2R, "called rotation=%hhu", conversion.rotation);
138}
139
140static void GetRotation(Service::Interface* self) {
141 u32* cmd_buff = Kernel::GetCommandBuffer();
108 142
143 cmd_buff[0] = IPC::MakeHeader(0x6, 2, 0);
109 cmd_buff[1] = RESULT_SUCCESS.raw; 144 cmd_buff[1] = RESULT_SUCCESS.raw;
145 cmd_buff[2] = static_cast<u32>(conversion.rotation);
146
147 LOG_DEBUG(Service_Y2R, "called rotation=%hhu", conversion.rotation);
110} 148}
111 149
112static void SetBlockAlignment(Service::Interface* self) { 150static void SetBlockAlignment(Service::Interface* self) {
113 u32* cmd_buff = Kernel::GetCommandBuffer(); 151 u32* cmd_buff = Kernel::GetCommandBuffer();
114 152
115 conversion.block_alignment = static_cast<BlockAlignment>(cmd_buff[1]); 153 conversion.block_alignment = static_cast<BlockAlignment>(cmd_buff[1]);
116 LOG_DEBUG(Service_Y2R, "called alignment=%hhu", conversion.block_alignment);
117 154
155 cmd_buff[0] = IPC::MakeHeader(0x7, 1, 0);
156 cmd_buff[1] = RESULT_SUCCESS.raw;
157
158 LOG_DEBUG(Service_Y2R, "called block_alignment=%hhu", conversion.block_alignment);
159}
160
161static void GetBlockAlignment(Service::Interface* self) {
162 u32* cmd_buff = Kernel::GetCommandBuffer();
163
164 cmd_buff[0] = IPC::MakeHeader(0x8, 2, 0);
165 cmd_buff[1] = RESULT_SUCCESS.raw;
166 cmd_buff[2] = static_cast<u32>(conversion.block_alignment);
167
168 LOG_DEBUG(Service_Y2R, "called block_alignment=%hhu", conversion.block_alignment);
169}
170
171/**
172 * Y2R_U::SetSpacialDithering service function
173 * Inputs:
174 * 1 : u8, 0 = Disabled, 1 = Enabled
175 * Outputs:
176 * 1 : Result of function, 0 on success, otherwise error code
177 */
178static void SetSpacialDithering(Service::Interface* self) {
179 u32* cmd_buff = Kernel::GetCommandBuffer();
180 spacial_dithering_enabled = cmd_buff[1] & 0xF;
181
182 cmd_buff[0] = IPC::MakeHeader(0x9, 1, 0);
183 cmd_buff[1] = RESULT_SUCCESS.raw;
184
185 LOG_WARNING(Service_Y2R, "(STUBBED) called");
186}
187
188/**
189 * Y2R_U::GetSpacialDithering service function
190 * Outputs:
191 * 1 : Result of function, 0 on success, otherwise error code
192 * 2 : u8, 0 = Disabled, 1 = Enabled
193 */
194static void GetSpacialDithering(Service::Interface* self) {
195 u32* cmd_buff = Kernel::GetCommandBuffer();
196
197 cmd_buff[0] = IPC::MakeHeader(0xA, 2, 0);
198 cmd_buff[1] = RESULT_SUCCESS.raw;
199 cmd_buff[2] = spacial_dithering_enabled;
200
201 LOG_WARNING(Service_Y2R, "(STUBBED) called");
202}
203
204/**
205 * Y2R_U::SetTemporalDithering service function
206 * Inputs:
207 * 1 : u8, 0 = Disabled, 1 = Enabled
208 * Outputs:
209 * 1 : Result of function, 0 on success, otherwise error code
210 */
211static void SetTemporalDithering(Service::Interface* self) {
212 u32* cmd_buff = Kernel::GetCommandBuffer();
213 temporal_dithering_enabled = cmd_buff[1] & 0xF;
214
215 cmd_buff[0] = IPC::MakeHeader(0xB, 1, 0);
118 cmd_buff[1] = RESULT_SUCCESS.raw; 216 cmd_buff[1] = RESULT_SUCCESS.raw;
217
218 LOG_WARNING(Service_Y2R, "(STUBBED) called");
119} 219}
120 220
221/**
222 * Y2R_U::GetTemporalDithering service function
223 * Outputs:
224 * 1 : Result of function, 0 on success, otherwise error code
225 * 2 : u8, 0 = Disabled, 1 = Enabled
226 */
227static void GetTemporalDithering(Service::Interface* self) {
228 u32* cmd_buff = Kernel::GetCommandBuffer();
229
230 cmd_buff[0] = IPC::MakeHeader(0xC, 2, 0);
231 cmd_buff[1] = RESULT_SUCCESS.raw;
232 cmd_buff[2] = temporal_dithering_enabled;
233
234 LOG_WARNING(Service_Y2R, "(STUBBED) called");
235}
236
237/**
238 * Y2R_U::SetTransferEndInterrupt service function
239 * Inputs:
240 * 1 : u8, 0 = Disabled, 1 = Enabled
241 * Outputs:
242 * 1 : Result of function, 0 on success, otherwise error code
243 */
121static void SetTransferEndInterrupt(Service::Interface* self) { 244static void SetTransferEndInterrupt(Service::Interface* self) {
122 u32* cmd_buff = Kernel::GetCommandBuffer(); 245 u32* cmd_buff = Kernel::GetCommandBuffer();
246 transfer_end_interrupt_enabled = cmd_buff[1] & 0xf;
123 247
124 cmd_buff[0] = IPC::MakeHeader(0xD, 1, 0); 248 cmd_buff[0] = IPC::MakeHeader(0xD, 1, 0);
125 cmd_buff[1] = RESULT_SUCCESS.raw; 249 cmd_buff[1] = RESULT_SUCCESS.raw;
126 LOG_DEBUG(Service_Y2R, "(STUBBED) called"); 250
251 LOG_WARNING(Service_Y2R, "(STUBBED) called");
252}
253
254/**
255 * Y2R_U::GetTransferEndInterrupt service function
256 * Outputs:
257 * 1 : Result of function, 0 on success, otherwise error code
258 * 2 : u8, 0 = Disabled, 1 = Enabled
259 */
260static void GetTransferEndInterrupt(Service::Interface* self) {
261 u32* cmd_buff = Kernel::GetCommandBuffer();
262
263 cmd_buff[0] = IPC::MakeHeader(0xE, 2, 0);
264 cmd_buff[1] = RESULT_SUCCESS.raw;
265 cmd_buff[2] = transfer_end_interrupt_enabled;
266
267 LOG_WARNING(Service_Y2R, "(STUBBED) called");
127} 268}
128 269
129/** 270/**
@@ -135,8 +276,10 @@ static void SetTransferEndInterrupt(Service::Interface* self) {
135static void GetTransferEndEvent(Service::Interface* self) { 276static void GetTransferEndEvent(Service::Interface* self) {
136 u32* cmd_buff = Kernel::GetCommandBuffer(); 277 u32* cmd_buff = Kernel::GetCommandBuffer();
137 278
279 cmd_buff[0] = IPC::MakeHeader(0xF, 2, 0);
138 cmd_buff[1] = RESULT_SUCCESS.raw; 280 cmd_buff[1] = RESULT_SUCCESS.raw;
139 cmd_buff[3] = Kernel::g_handle_table.Create(completion_event).MoveFrom(); 281 cmd_buff[3] = Kernel::g_handle_table.Create(completion_event).MoveFrom();
282
140 LOG_DEBUG(Service_Y2R, "called"); 283 LOG_DEBUG(Service_Y2R, "called");
141} 284}
142 285
@@ -147,12 +290,12 @@ static void SetSendingY(Service::Interface* self) {
147 conversion.src_Y.image_size = cmd_buff[2]; 290 conversion.src_Y.image_size = cmd_buff[2];
148 conversion.src_Y.transfer_unit = cmd_buff[3]; 291 conversion.src_Y.transfer_unit = cmd_buff[3];
149 conversion.src_Y.gap = cmd_buff[4]; 292 conversion.src_Y.gap = cmd_buff[4];
150 u32 src_process_handle = cmd_buff[6];
151 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
152 "src_process_handle=0x%08X", conversion.src_Y.image_size,
153 conversion.src_Y.transfer_unit, conversion.src_Y.gap, src_process_handle);
154 293
294 cmd_buff[0] = IPC::MakeHeader(0x10, 1, 0);
155 cmd_buff[1] = RESULT_SUCCESS.raw; 295 cmd_buff[1] = RESULT_SUCCESS.raw;
296
297 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, src_process_handle=0x%08X",
298 conversion.src_Y.image_size, conversion.src_Y.transfer_unit, conversion.src_Y.gap, cmd_buff[6]);
156} 299}
157 300
158static void SetSendingU(Service::Interface* self) { 301static void SetSendingU(Service::Interface* self) {
@@ -162,12 +305,12 @@ static void SetSendingU(Service::Interface* self) {
162 conversion.src_U.image_size = cmd_buff[2]; 305 conversion.src_U.image_size = cmd_buff[2];
163 conversion.src_U.transfer_unit = cmd_buff[3]; 306 conversion.src_U.transfer_unit = cmd_buff[3];
164 conversion.src_U.gap = cmd_buff[4]; 307 conversion.src_U.gap = cmd_buff[4];
165 u32 src_process_handle = cmd_buff[6];
166 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
167 "src_process_handle=0x%08X", conversion.src_U.image_size,
168 conversion.src_U.transfer_unit, conversion.src_U.gap, src_process_handle);
169 308
309 cmd_buff[0] = IPC::MakeHeader(0x11, 1, 0);
170 cmd_buff[1] = RESULT_SUCCESS.raw; 310 cmd_buff[1] = RESULT_SUCCESS.raw;
311
312 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, src_process_handle=0x%08X",
313 conversion.src_U.image_size, conversion.src_U.transfer_unit, conversion.src_U.gap, cmd_buff[6]);
171} 314}
172 315
173static void SetSendingV(Service::Interface* self) { 316static void SetSendingV(Service::Interface* self) {
@@ -177,12 +320,12 @@ static void SetSendingV(Service::Interface* self) {
177 conversion.src_V.image_size = cmd_buff[2]; 320 conversion.src_V.image_size = cmd_buff[2];
178 conversion.src_V.transfer_unit = cmd_buff[3]; 321 conversion.src_V.transfer_unit = cmd_buff[3];
179 conversion.src_V.gap = cmd_buff[4]; 322 conversion.src_V.gap = cmd_buff[4];
180 u32 src_process_handle = cmd_buff[6];
181 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
182 "src_process_handle=0x%08X", conversion.src_V.image_size,
183 conversion.src_V.transfer_unit, conversion.src_V.gap, src_process_handle);
184 323
324 cmd_buff[0] = IPC::MakeHeader(0x12, 1, 0);
185 cmd_buff[1] = RESULT_SUCCESS.raw; 325 cmd_buff[1] = RESULT_SUCCESS.raw;
326
327 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, src_process_handle=0x%08X",
328 conversion.src_V.image_size, conversion.src_V.transfer_unit, conversion.src_V.gap, cmd_buff[6]);
186} 329}
187 330
188static void SetSendingYUYV(Service::Interface* self) { 331static void SetSendingYUYV(Service::Interface* self) {
@@ -192,12 +335,76 @@ static void SetSendingYUYV(Service::Interface* self) {
192 conversion.src_YUYV.image_size = cmd_buff[2]; 335 conversion.src_YUYV.image_size = cmd_buff[2];
193 conversion.src_YUYV.transfer_unit = cmd_buff[3]; 336 conversion.src_YUYV.transfer_unit = cmd_buff[3];
194 conversion.src_YUYV.gap = cmd_buff[4]; 337 conversion.src_YUYV.gap = cmd_buff[4];
195 u32 src_process_handle = cmd_buff[6];
196 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
197 "src_process_handle=0x%08X", conversion.src_YUYV.image_size,
198 conversion.src_YUYV.transfer_unit, conversion.src_YUYV.gap, src_process_handle);
199 338
339 cmd_buff[0] = IPC::MakeHeader(0x13, 1, 0);
340 cmd_buff[1] = RESULT_SUCCESS.raw;
341
342 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, src_process_handle=0x%08X",
343 conversion.src_YUYV.image_size, conversion.src_YUYV.transfer_unit, conversion.src_YUYV.gap, cmd_buff[6]);
344}
345
346/**
347 * Y2R::IsFinishedSendingYuv service function
348 * Output:
349 * 1 : Result of the function, 0 on success, otherwise error code
350 * 2 : u8, 0 = Not Finished, 1 = Finished
351 */
352static void IsFinishedSendingYuv(Service::Interface* self) {
353 u32* cmd_buff = Kernel::GetCommandBuffer();
354
355 cmd_buff[0] = IPC::MakeHeader(0x14, 2, 0);
356 cmd_buff[1] = RESULT_SUCCESS.raw;
357 cmd_buff[2] = 1;
358
359 LOG_WARNING(Service_Y2R, "(STUBBED) called");
360}
361
362/**
363 * Y2R::IsFinishedSendingY service function
364 * Output:
365 * 1 : Result of the function, 0 on success, otherwise error code
366 * 2 : u8, 0 = Not Finished, 1 = Finished
367 */
368static void IsFinishedSendingY(Service::Interface* self) {
369 u32* cmd_buff = Kernel::GetCommandBuffer();
370
371 cmd_buff[0] = IPC::MakeHeader(0x15, 2, 0);
200 cmd_buff[1] = RESULT_SUCCESS.raw; 372 cmd_buff[1] = RESULT_SUCCESS.raw;
373 cmd_buff[2] = 1;
374
375 LOG_WARNING(Service_Y2R, "(STUBBED) called");
376}
377
378/**
379 * Y2R::IsFinishedSendingU service function
380 * Output:
381 * 1 : Result of the function, 0 on success, otherwise error code
382 * 2 : u8, 0 = Not Finished, 1 = Finished
383 */
384static void IsFinishedSendingU(Service::Interface* self) {
385 u32* cmd_buff = Kernel::GetCommandBuffer();
386
387 cmd_buff[0] = IPC::MakeHeader(0x16, 2, 0);
388 cmd_buff[1] = RESULT_SUCCESS.raw;
389 cmd_buff[2] = 1;
390
391 LOG_WARNING(Service_Y2R, "(STUBBED) called");
392}
393
394/**
395 * Y2R::IsFinishedSendingV service function
396 * Output:
397 * 1 : Result of the function, 0 on success, otherwise error code
398 * 2 : u8, 0 = Not Finished, 1 = Finished
399 */
400static void IsFinishedSendingV(Service::Interface* self) {
401 u32* cmd_buff = Kernel::GetCommandBuffer();
402
403 cmd_buff[0] = IPC::MakeHeader(0x17, 2, 0);
404 cmd_buff[1] = RESULT_SUCCESS.raw;
405 cmd_buff[2] = 1;
406
407 LOG_WARNING(Service_Y2R, "(STUBBED) called");
201} 408}
202 409
203static void SetReceiving(Service::Interface* self) { 410static void SetReceiving(Service::Interface* self) {
@@ -207,27 +414,66 @@ static void SetReceiving(Service::Interface* self) {
207 conversion.dst.image_size = cmd_buff[2]; 414 conversion.dst.image_size = cmd_buff[2];
208 conversion.dst.transfer_unit = cmd_buff[3]; 415 conversion.dst.transfer_unit = cmd_buff[3];
209 conversion.dst.gap = cmd_buff[4]; 416 conversion.dst.gap = cmd_buff[4];
210 u32 dst_process_handle = cmd_buff[6];
211 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
212 "dst_process_handle=0x%08X", conversion.dst.image_size,
213 conversion.dst.transfer_unit, conversion.dst.gap,
214 dst_process_handle);
215 417
418 cmd_buff[0] = IPC::MakeHeader(0x18, 1, 0);
216 cmd_buff[1] = RESULT_SUCCESS.raw; 419 cmd_buff[1] = RESULT_SUCCESS.raw;
420
421 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, dst_process_handle=0x%08X",
422 conversion.dst.image_size, conversion.dst.transfer_unit, conversion.dst.gap, cmd_buff[6]);
423}
424
425/**
426 * Y2R::IsFinishedReceiving service function
427 * Output:
428 * 1 : Result of the function, 0 on success, otherwise error code
429 * 2 : u8, 0 = Not Finished, 1 = Finished
430 */
431static void IsFinishedReceiving(Service::Interface* self) {
432 u32* cmd_buff = Kernel::GetCommandBuffer();
433
434 cmd_buff[0] = IPC::MakeHeader(0x19, 2, 0);
435 cmd_buff[1] = RESULT_SUCCESS.raw;
436 cmd_buff[2] = 1;
437
438 LOG_WARNING(Service_Y2R, "(STUBBED) called");
217} 439}
218 440
219static void SetInputLineWidth(Service::Interface* self) { 441static void SetInputLineWidth(Service::Interface* self) {
220 u32* cmd_buff = Kernel::GetCommandBuffer(); 442 u32* cmd_buff = Kernel::GetCommandBuffer();
221 443
222 LOG_DEBUG(Service_Y2R, "called input_line_width=%u", cmd_buff[1]); 444 cmd_buff[0] = IPC::MakeHeader(0x1A, 1, 0);
223 cmd_buff[1] = conversion.SetInputLineWidth(cmd_buff[1]).raw; 445 cmd_buff[1] = conversion.SetInputLineWidth(cmd_buff[1]).raw;
446
447 LOG_DEBUG(Service_Y2R, "called input_line_width=%u", cmd_buff[1]);
448}
449
450static void GetInputLineWidth(Service::Interface* self) {
451 u32* cmd_buff = Kernel::GetCommandBuffer();
452
453 cmd_buff[0] = IPC::MakeHeader(0x1B, 2, 0);
454 cmd_buff[1] = RESULT_SUCCESS.raw;
455 cmd_buff[2] = conversion.input_line_width;
456
457 LOG_DEBUG(Service_Y2R, "called input_line_width=%u", conversion.input_line_width);
224} 458}
225 459
226static void SetInputLines(Service::Interface* self) { 460static void SetInputLines(Service::Interface* self) {
227 u32* cmd_buff = Kernel::GetCommandBuffer(); 461 u32* cmd_buff = Kernel::GetCommandBuffer();
228 462
229 LOG_DEBUG(Service_Y2R, "called input_line_number=%u", cmd_buff[1]); 463 cmd_buff[0] = IPC::MakeHeader(0x1C, 1, 0);
230 cmd_buff[1] = conversion.SetInputLines(cmd_buff[1]).raw; 464 cmd_buff[1] = conversion.SetInputLines(cmd_buff[1]).raw;
465
466 LOG_DEBUG(Service_Y2R, "called input_lines=%u", cmd_buff[1]);
467}
468
469static void GetInputLines(Service::Interface* self) {
470 u32* cmd_buff = Kernel::GetCommandBuffer();
471
472 cmd_buff[0] = IPC::MakeHeader(0x1D, 2, 0);
473 cmd_buff[1] = RESULT_SUCCESS.raw;
474 cmd_buff[2] = static_cast<u32>(conversion.input_lines);
475
476 LOG_DEBUG(Service_Y2R, "called input_lines=%u", conversion.input_lines);
231} 477}
232 478
233static void SetCoefficient(Service::Interface* self) { 479static void SetCoefficient(Service::Interface* self) {
@@ -235,45 +481,111 @@ static void SetCoefficient(Service::Interface* self) {
235 481
236 const u16* coefficients = reinterpret_cast<const u16*>(&cmd_buff[1]); 482 const u16* coefficients = reinterpret_cast<const u16*>(&cmd_buff[1]);
237 std::memcpy(conversion.coefficients.data(), coefficients, sizeof(CoefficientSet)); 483 std::memcpy(conversion.coefficients.data(), coefficients, sizeof(CoefficientSet));
484
485 cmd_buff[0] = IPC::MakeHeader(0x1E, 1, 0);
486 cmd_buff[1] = RESULT_SUCCESS.raw;
487
238 LOG_DEBUG(Service_Y2R, "called coefficients=[%hX, %hX, %hX, %hX, %hX, %hX, %hX, %hX]", 488 LOG_DEBUG(Service_Y2R, "called coefficients=[%hX, %hX, %hX, %hX, %hX, %hX, %hX, %hX]",
239 coefficients[0], coefficients[1], coefficients[2], coefficients[3], 489 coefficients[0], coefficients[1], coefficients[2], coefficients[3],
240 coefficients[4], coefficients[5], coefficients[6], coefficients[7]); 490 coefficients[4], coefficients[5], coefficients[6], coefficients[7]);
491}
241 492
493static void GetCoefficient(Service::Interface* self) {
494 u32* cmd_buff = Kernel::GetCommandBuffer();
495
496 cmd_buff[0] = IPC::MakeHeader(0x1F, 5, 0);
242 cmd_buff[1] = RESULT_SUCCESS.raw; 497 cmd_buff[1] = RESULT_SUCCESS.raw;
498 std::memcpy(&cmd_buff[2], conversion.coefficients.data(), sizeof(CoefficientSet));
499
500 LOG_DEBUG(Service_Y2R, "called");
243} 501}
244 502
245static void SetStandardCoefficient(Service::Interface* self) { 503static void SetStandardCoefficient(Service::Interface* self) {
246 u32* cmd_buff = Kernel::GetCommandBuffer(); 504 u32* cmd_buff = Kernel::GetCommandBuffer();
247 505
248 LOG_DEBUG(Service_Y2R, "called standard_coefficient=%u", cmd_buff[1]); 506 u32 index = cmd_buff[1];
507
508 cmd_buff[0] = IPC::MakeHeader(0x20, 1, 0);
509 cmd_buff[1] = conversion.SetStandardCoefficient((StandardCoefficient)index).raw;
510
511 LOG_DEBUG(Service_Y2R, "called standard_coefficient=%u", index);
512}
513
514static void GetStandardCoefficient(Service::Interface* self) {
515 u32* cmd_buff = Kernel::GetCommandBuffer();
516
517 u32 index = cmd_buff[1];
518
519 if (index < ARRAY_SIZE(standard_coefficients)) {
520 cmd_buff[0] = IPC::MakeHeader(0x21, 5, 0);
521 cmd_buff[1] = RESULT_SUCCESS.raw;
522 std::memcpy(&cmd_buff[2], &standard_coefficients[index], sizeof(CoefficientSet));
249 523
250 cmd_buff[1] = conversion.SetStandardCoefficient((StandardCoefficient)cmd_buff[1]).raw; 524 LOG_DEBUG(Service_Y2R, "called standard_coefficient=%u ", index);
525 } else {
526 cmd_buff[0] = IPC::MakeHeader(0x21, 1, 0);
527 cmd_buff[1] = -1; // TODO(bunnei): Identify the correct error code for this
528
529 LOG_ERROR(Service_Y2R, "called standard_coefficient=%u The argument is invalid!", index);
530 }
251} 531}
252 532
253static void SetAlpha(Service::Interface* self) { 533static void SetAlpha(Service::Interface* self) {
254 u32* cmd_buff = Kernel::GetCommandBuffer(); 534 u32* cmd_buff = Kernel::GetCommandBuffer();
255 535
256 conversion.alpha = cmd_buff[1]; 536 conversion.alpha = cmd_buff[1];
537
538 cmd_buff[0] = IPC::MakeHeader(0x22, 1, 0);
539 cmd_buff[1] = RESULT_SUCCESS.raw;
540
257 LOG_DEBUG(Service_Y2R, "called alpha=%hu", conversion.alpha); 541 LOG_DEBUG(Service_Y2R, "called alpha=%hu", conversion.alpha);
542}
543
544static void GetAlpha(Service::Interface* self) {
545 u32* cmd_buff = Kernel::GetCommandBuffer();
258 546
547 cmd_buff[0] = IPC::MakeHeader(0x23, 2, 0);
259 cmd_buff[1] = RESULT_SUCCESS.raw; 548 cmd_buff[1] = RESULT_SUCCESS.raw;
549 cmd_buff[2] = conversion.alpha;
550
551 LOG_DEBUG(Service_Y2R, "called alpha=%hu", conversion.alpha);
260} 552}
261 553
262static void StartConversion(Service::Interface* self) { 554static void SetDitheringWeightParams(Service::Interface* self) {
263 u32* cmd_buff = Kernel::GetCommandBuffer(); 555 u32* cmd_buff = Kernel::GetCommandBuffer();
556 std::memcpy(&dithering_weight_params, &cmd_buff[1], sizeof(DitheringWeightParams));
264 557
265 HW::Y2R::PerformConversion(conversion); 558 cmd_buff[0] = IPC::MakeHeader(0x24, 1, 0);
559 cmd_buff[1] = RESULT_SUCCESS.raw;
266 560
267 // dst_image_size would seem to be perfect for this, but it doesn't include the gap :( 561 LOG_DEBUG(Service_Y2R, "called");
268 u32 total_output_size = conversion.input_lines * 562}
269 (conversion.dst.transfer_unit + conversion.dst.gap); 563
270 VideoCore::g_renderer->Rasterizer()->InvalidateRegion( 564static void GetDitheringWeightParams(Service::Interface* self) {
271 Memory::VirtualToPhysicalAddress(conversion.dst.address), total_output_size); 565 u32* cmd_buff = Kernel::GetCommandBuffer();
566
567 cmd_buff[0] = IPC::MakeHeader(0x25, 9, 0);
568 cmd_buff[1] = RESULT_SUCCESS.raw;
569 std::memcpy(&cmd_buff[2], &dithering_weight_params, sizeof(DitheringWeightParams));
272 570
273 LOG_DEBUG(Service_Y2R, "called"); 571 LOG_DEBUG(Service_Y2R, "called");
572}
573
574static void StartConversion(Service::Interface* self) {
575 u32* cmd_buff = Kernel::GetCommandBuffer();
576
577 // dst_image_size would seem to be perfect for this, but it doesn't include the gap :(
578 u32 total_output_size = conversion.input_lines * (conversion.dst.transfer_unit + conversion.dst.gap);
579 Memory::RasterizerFlushAndInvalidateRegion(Memory::VirtualToPhysicalAddress(conversion.dst.address), total_output_size);
580
581 HW::Y2R::PerformConversion(conversion);
582
274 completion_event->Signal(); 583 completion_event->Signal();
275 584
585 cmd_buff[0] = IPC::MakeHeader(0x26, 1, 0);
276 cmd_buff[1] = RESULT_SUCCESS.raw; 586 cmd_buff[1] = RESULT_SUCCESS.raw;
587
588 LOG_DEBUG(Service_Y2R, "called");
277} 589}
278 590
279static void StopConversion(Service::Interface* self) { 591static void StopConversion(Service::Interface* self) {
@@ -281,6 +593,7 @@ static void StopConversion(Service::Interface* self) {
281 593
282 cmd_buff[0] = IPC::MakeHeader(0x27, 1, 0); 594 cmd_buff[0] = IPC::MakeHeader(0x27, 1, 0);
283 cmd_buff[1] = RESULT_SUCCESS.raw; 595 cmd_buff[1] = RESULT_SUCCESS.raw;
596
284 LOG_DEBUG(Service_Y2R, "called"); 597 LOG_DEBUG(Service_Y2R, "called");
285} 598}
286 599
@@ -293,50 +606,61 @@ static void StopConversion(Service::Interface* self) {
293static void IsBusyConversion(Service::Interface* self) { 606static void IsBusyConversion(Service::Interface* self) {
294 u32* cmd_buff = Kernel::GetCommandBuffer(); 607 u32* cmd_buff = Kernel::GetCommandBuffer();
295 608
609 cmd_buff[0] = IPC::MakeHeader(0x28, 2, 0);
296 cmd_buff[1] = RESULT_SUCCESS.raw; 610 cmd_buff[1] = RESULT_SUCCESS.raw;
297 cmd_buff[2] = 0; // StartConversion always finishes immediately 611 cmd_buff[2] = 0; // StartConversion always finishes immediately
612
298 LOG_DEBUG(Service_Y2R, "called"); 613 LOG_DEBUG(Service_Y2R, "called");
299} 614}
300 615
301/** 616/**
302 * Y2R_U::SetConversionParams service function 617 * Y2R_U::SetPackageParameter service function
303 */ 618 */
304static void SetConversionParams(Service::Interface* self) { 619static void SetPackageParameter(Service::Interface* self) {
305 u32* cmd_buff = Kernel::GetCommandBuffer(); 620 u32* cmd_buff = Kernel::GetCommandBuffer();
306 621
307 auto params = reinterpret_cast<const ConversionParameters*>(&cmd_buff[1]); 622 auto params = reinterpret_cast<const ConversionParameters*>(&cmd_buff[1]);
308 LOG_DEBUG(Service_Y2R,
309 "called input_format=%hhu output_format=%hhu rotation=%hhu block_alignment=%hhu "
310 "input_line_width=%hu input_lines=%hu standard_coefficient=%hhu "
311 "reserved=%hhu alpha=%hX",
312 params->input_format, params->output_format, params->rotation, params->block_alignment,
313 params->input_line_width, params->input_lines, params->standard_coefficient,
314 params->reserved, params->alpha);
315
316 ResultCode result = RESULT_SUCCESS;
317 623
318 conversion.input_format = params->input_format; 624 conversion.input_format = params->input_format;
319 conversion.output_format = params->output_format; 625 conversion.output_format = params->output_format;
320 conversion.rotation = params->rotation; 626 conversion.rotation = params->rotation;
321 conversion.block_alignment = params->block_alignment; 627 conversion.block_alignment = params->block_alignment;
322 result = conversion.SetInputLineWidth(params->input_line_width); 628
323 if (result.IsError()) goto cleanup; 629 ResultCode result = conversion.SetInputLineWidth(params->input_line_width);
630
631 if (result.IsError())
632 goto cleanup;
633
324 result = conversion.SetInputLines(params->input_lines); 634 result = conversion.SetInputLines(params->input_lines);
325 if (result.IsError()) goto cleanup; 635
636 if (result.IsError())
637 goto cleanup;
638
326 result = conversion.SetStandardCoefficient(params->standard_coefficient); 639 result = conversion.SetStandardCoefficient(params->standard_coefficient);
327 if (result.IsError()) goto cleanup; 640
641 if (result.IsError())
642 goto cleanup;
643
644 conversion.padding = params->padding;
328 conversion.alpha = params->alpha; 645 conversion.alpha = params->alpha;
329 646
330cleanup: 647cleanup:
331 cmd_buff[0] = IPC::MakeHeader(0x29, 1, 0); 648 cmd_buff[0] = IPC::MakeHeader(0x29, 1, 0);
332 cmd_buff[1] = result.raw; 649 cmd_buff[1] = result.raw;
650
651 LOG_DEBUG(Service_Y2R, "called input_format=%hhu output_format=%hhu rotation=%hhu block_alignment=%hhu "
652 "input_line_width=%hu input_lines=%hu standard_coefficient=%hhu reserved=%hhu alpha=%hX",
653 params->input_format, params->output_format, params->rotation, params->block_alignment,
654 params->input_line_width, params->input_lines, params->standard_coefficient, params->padding, params->alpha);
333} 655}
334 656
335static void PingProcess(Service::Interface* self) { 657static void PingProcess(Service::Interface* self) {
336 u32* cmd_buff = Kernel::GetCommandBuffer(); 658 u32* cmd_buff = Kernel::GetCommandBuffer();
337 659
660 cmd_buff[0] = IPC::MakeHeader(0x2A, 2, 0);
338 cmd_buff[1] = RESULT_SUCCESS.raw; 661 cmd_buff[1] = RESULT_SUCCESS.raw;
339 cmd_buff[2] = 0; 662 cmd_buff[2] = 0;
663
340 LOG_WARNING(Service_Y2R, "(STUBBED) called"); 664 LOG_WARNING(Service_Y2R, "(STUBBED) called");
341} 665}
342 666
@@ -362,6 +686,7 @@ static void DriverInitialize(Service::Interface* self) {
362 686
363 cmd_buff[0] = IPC::MakeHeader(0x2B, 1, 0); 687 cmd_buff[0] = IPC::MakeHeader(0x2B, 1, 0);
364 cmd_buff[1] = RESULT_SUCCESS.raw; 688 cmd_buff[1] = RESULT_SUCCESS.raw;
689
365 LOG_DEBUG(Service_Y2R, "called"); 690 LOG_DEBUG(Service_Y2R, "called");
366} 691}
367 692
@@ -370,54 +695,67 @@ static void DriverFinalize(Service::Interface* self) {
370 695
371 cmd_buff[0] = IPC::MakeHeader(0x2C, 1, 0); 696 cmd_buff[0] = IPC::MakeHeader(0x2C, 1, 0);
372 cmd_buff[1] = RESULT_SUCCESS.raw; 697 cmd_buff[1] = RESULT_SUCCESS.raw;
698
699 LOG_DEBUG(Service_Y2R, "called");
700}
701
702
703static void GetPackageParameter(Service::Interface* self) {
704 u32* cmd_buff = Kernel::GetCommandBuffer();
705
706 cmd_buff[0] = IPC::MakeHeader(0x2D, 4, 0);
707 cmd_buff[1] = RESULT_SUCCESS.raw;
708 std::memcpy(&cmd_buff[2], &conversion, sizeof(ConversionParameters));
709
373 LOG_DEBUG(Service_Y2R, "called"); 710 LOG_DEBUG(Service_Y2R, "called");
374} 711}
375 712
376const Interface::FunctionInfo FunctionTable[] = { 713const Interface::FunctionInfo FunctionTable[] = {
377 {0x00010040, SetInputFormat, "SetInputFormat"}, 714 {0x00010040, SetInputFormat, "SetInputFormat"},
378 {0x00020000, nullptr, "GetInputFormat"}, 715 {0x00020000, GetInputFormat, "GetInputFormat"},
379 {0x00030040, SetOutputFormat, "SetOutputFormat"}, 716 {0x00030040, SetOutputFormat, "SetOutputFormat"},
380 {0x00040000, nullptr, "GetOutputFormat"}, 717 {0x00040000, GetOutputFormat, "GetOutputFormat"},
381 {0x00050040, SetRotation, "SetRotation"}, 718 {0x00050040, SetRotation, "SetRotation"},
382 {0x00060000, nullptr, "GetRotation"}, 719 {0x00060000, GetRotation, "GetRotation"},
383 {0x00070040, SetBlockAlignment, "SetBlockAlignment"}, 720 {0x00070040, SetBlockAlignment, "SetBlockAlignment"},
384 {0x00080000, nullptr, "GetBlockAlignment"}, 721 {0x00080000, GetBlockAlignment, "GetBlockAlignment"},
385 {0x00090040, nullptr, "SetSpacialDithering"}, 722 {0x00090040, SetSpacialDithering, "SetSpacialDithering"},
386 {0x000A0000, nullptr, "GetSpacialDithering"}, 723 {0x000A0000, GetSpacialDithering, "GetSpacialDithering"},
387 {0x000B0040, nullptr, "SetTemporalDithering"}, 724 {0x000B0040, SetTemporalDithering, "SetTemporalDithering"},
388 {0x000C0000, nullptr, "GetTemporalDithering"}, 725 {0x000C0000, GetTemporalDithering, "GetTemporalDithering"},
389 {0x000D0040, SetTransferEndInterrupt, "SetTransferEndInterrupt"}, 726 {0x000D0040, SetTransferEndInterrupt, "SetTransferEndInterrupt"},
727 {0x000E0000, GetTransferEndInterrupt, "GetTransferEndInterrupt"},
390 {0x000F0000, GetTransferEndEvent, "GetTransferEndEvent"}, 728 {0x000F0000, GetTransferEndEvent, "GetTransferEndEvent"},
391 {0x00100102, SetSendingY, "SetSendingY"}, 729 {0x00100102, SetSendingY, "SetSendingY"},
392 {0x00110102, SetSendingU, "SetSendingU"}, 730 {0x00110102, SetSendingU, "SetSendingU"},
393 {0x00120102, SetSendingV, "SetSendingV"}, 731 {0x00120102, SetSendingV, "SetSendingV"},
394 {0x00130102, SetSendingYUYV, "SetSendingYUYV"}, 732 {0x00130102, SetSendingYUYV, "SetSendingYUYV"},
395 {0x00140000, nullptr, "IsFinishedSendingYuv"}, 733 {0x00140000, IsFinishedSendingYuv, "IsFinishedSendingYuv"},
396 {0x00150000, nullptr, "IsFinishedSendingY"}, 734 {0x00150000, IsFinishedSendingY, "IsFinishedSendingY"},
397 {0x00160000, nullptr, "IsFinishedSendingU"}, 735 {0x00160000, IsFinishedSendingU, "IsFinishedSendingU"},
398 {0x00170000, nullptr, "IsFinishedSendingV"}, 736 {0x00170000, IsFinishedSendingV, "IsFinishedSendingV"},
399 {0x00180102, SetReceiving, "SetReceiving"}, 737 {0x00180102, SetReceiving, "SetReceiving"},
400 {0x00190000, nullptr, "IsFinishedReceiving"}, 738 {0x00190000, IsFinishedReceiving, "IsFinishedReceiving"},
401 {0x001A0040, SetInputLineWidth, "SetInputLineWidth"}, 739 {0x001A0040, SetInputLineWidth, "SetInputLineWidth"},
402 {0x001B0000, nullptr, "GetInputLineWidth"}, 740 {0x001B0000, GetInputLineWidth, "GetInputLineWidth"},
403 {0x001C0040, SetInputLines, "SetInputLines"}, 741 {0x001C0040, SetInputLines, "SetInputLines"},
404 {0x001D0000, nullptr, "GetInputLines"}, 742 {0x001D0000, GetInputLines, "GetInputLines"},
405 {0x001E0100, SetCoefficient, "SetCoefficient"}, 743 {0x001E0100, SetCoefficient, "SetCoefficient"},
406 {0x001F0000, nullptr, "GetCoefficient"}, 744 {0x001F0000, GetCoefficient, "GetCoefficient"},
407 {0x00200040, SetStandardCoefficient, "SetStandardCoefficient"}, 745 {0x00200040, SetStandardCoefficient, "SetStandardCoefficient"},
408 {0x00210040, nullptr, "GetStandardCoefficientParams"}, 746 {0x00210040, GetStandardCoefficient, "GetStandardCoefficient"},
409 {0x00220040, SetAlpha, "SetAlpha"}, 747 {0x00220040, SetAlpha, "SetAlpha"},
410 {0x00230000, nullptr, "GetAlpha"}, 748 {0x00230000, GetAlpha, "GetAlpha"},
411 {0x00240200, nullptr, "SetDitheringWeightParams"}, 749 {0x00240200, SetDitheringWeightParams,"SetDitheringWeightParams"},
412 {0x00250000, nullptr, "GetDitheringWeightParams"}, 750 {0x00250000, GetDitheringWeightParams,"GetDitheringWeightParams"},
413 {0x00260000, StartConversion, "StartConversion"}, 751 {0x00260000, StartConversion, "StartConversion"},
414 {0x00270000, StopConversion, "StopConversion"}, 752 {0x00270000, StopConversion, "StopConversion"},
415 {0x00280000, IsBusyConversion, "IsBusyConversion"}, 753 {0x00280000, IsBusyConversion, "IsBusyConversion"},
416 {0x002901C0, SetConversionParams, "SetConversionParams"}, 754 {0x002901C0, SetPackageParameter, "SetPackageParameter"},
417 {0x002A0000, PingProcess, "PingProcess"}, 755 {0x002A0000, PingProcess, "PingProcess"},
418 {0x002B0000, DriverInitialize, "DriverInitialize"}, 756 {0x002B0000, DriverInitialize, "DriverInitialize"},
419 {0x002C0000, DriverFinalize, "DriverFinalize"}, 757 {0x002C0000, DriverFinalize, "DriverFinalize"},
420 {0x002D0000, nullptr, "GetPackageParameter"}, 758 {0x002D0000, GetPackageParameter, "GetPackageParameter"},
421}; 759};
422 760
423//////////////////////////////////////////////////////////////////////////////////////////////////// 761////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/core/hle/service/y2r_u.h b/src/core/hle/service/y2r_u.h
index 3965a5545..95fa2fdb7 100644
--- a/src/core/hle/service/y2r_u.h
+++ b/src/core/hle/service/y2r_u.h
@@ -97,6 +97,7 @@ struct ConversionConfiguration {
97 u16 input_line_width; 97 u16 input_line_width;
98 u16 input_lines; 98 u16 input_lines;
99 CoefficientSet coefficients; 99 CoefficientSet coefficients;
100 u8 padding;
100 u16 alpha; 101 u16 alpha;
101 102
102 /// Input parameters for the Y (luma) plane 103 /// Input parameters for the Y (luma) plane
@@ -109,6 +110,25 @@ struct ConversionConfiguration {
109 ResultCode SetStandardCoefficient(StandardCoefficient standard_coefficient); 110 ResultCode SetStandardCoefficient(StandardCoefficient standard_coefficient);
110}; 111};
111 112
113struct DitheringWeightParams {
114 u16 w0_xEven_yEven;
115 u16 w0_xOdd_yEven;
116 u16 w0_xEven_yOdd;
117 u16 w0_xOdd_yOdd;
118 u16 w1_xEven_yEven;
119 u16 w1_xOdd_yEven;
120 u16 w1_xEven_yOdd;
121 u16 w1_xOdd_yOdd;
122 u16 w2_xEven_yEven;
123 u16 w2_xOdd_yEven;
124 u16 w2_xEven_yOdd;
125 u16 w2_xOdd_yOdd;
126 u16 w3_xEven_yEven;
127 u16 w3_xOdd_yEven;
128 u16 w3_xEven_yOdd;
129 u16 w3_xOdd_yOdd;
130};
131
112class Interface : public Service::Interface { 132class Interface : public Service::Interface {
113public: 133public:
114 Interface(); 134 Interface();
diff --git a/src/core/hle/svc.cpp b/src/core/hle/svc.cpp
index ae54afb1c..60c8747f3 100644
--- a/src/core/hle/svc.cpp
+++ b/src/core/hle/svc.cpp
@@ -6,7 +6,6 @@
6 6
7#include "common/logging/log.h" 7#include "common/logging/log.h"
8#include "common/microprofile.h" 8#include "common/microprofile.h"
9#include "common/profiler.h"
10#include "common/string_util.h" 9#include "common/string_util.h"
11#include "common/symbols.h" 10#include "common/symbols.h"
12 11
@@ -497,6 +496,11 @@ static ResultCode CreateThread(Handle* out_handle, s32 priority, u32 entry_point
497 break; 496 break;
498 } 497 }
499 498
499 if (processor_id == THREADPROCESSORID_1 || processor_id == THREADPROCESSORID_ALL ||
500 (processor_id == THREADPROCESSORID_DEFAULT && Kernel::g_current_process->ideal_processor == THREADPROCESSORID_1)) {
501 LOG_WARNING(Kernel_SVC, "Newly created thread is allowed to be run in the SysCore, unimplemented.");
502 }
503
500 CASCADE_RESULT(SharedPtr<Thread> thread, Kernel::Thread::Create( 504 CASCADE_RESULT(SharedPtr<Thread> thread, Kernel::Thread::Create(
501 name, entry_point, priority, arg, processor_id, stack_top)); 505 name, entry_point, priority, arg, processor_id, stack_top));
502 CASCADE_RESULT(*out_handle, Kernel::g_handle_table.Create(std::move(thread))); 506 CASCADE_RESULT(*out_handle, Kernel::g_handle_table.Create(std::move(thread)));
@@ -860,6 +864,10 @@ static ResultCode GetProcessInfo(s64* out, Handle process_handle, u32 type) {
860 // TODO(yuriks): Type 0 returns a slightly higher number than type 2, but I'm not sure 864 // TODO(yuriks): Type 0 returns a slightly higher number than type 2, but I'm not sure
861 // what's the difference between them. 865 // what's the difference between them.
862 *out = process->heap_used + process->linear_heap_used + process->misc_memory_used; 866 *out = process->heap_used + process->linear_heap_used + process->misc_memory_used;
867 if(*out % Memory::PAGE_SIZE != 0) {
868 LOG_ERROR(Kernel_SVC, "called, memory size not page-aligned");
869 return ERR_MISALIGNED_SIZE;
870 }
863 break; 871 break;
864 case 1: 872 case 1:
865 case 3: 873 case 3:
@@ -1031,8 +1039,6 @@ static const FunctionDef SVC_Table[] = {
1031 {0x7D, HLE::Wrap<QueryProcessMemory>, "QueryProcessMemory"}, 1039 {0x7D, HLE::Wrap<QueryProcessMemory>, "QueryProcessMemory"},
1032}; 1040};
1033 1041
1034Common::Profiling::TimingCategory profiler_svc("SVC Calls");
1035
1036static const FunctionDef* GetSVCInfo(u32 func_num) { 1042static const FunctionDef* GetSVCInfo(u32 func_num) {
1037 if (func_num >= ARRAY_SIZE(SVC_Table)) { 1043 if (func_num >= ARRAY_SIZE(SVC_Table)) {
1038 LOG_ERROR(Kernel_SVC, "unknown svc=0x%02X", func_num); 1044 LOG_ERROR(Kernel_SVC, "unknown svc=0x%02X", func_num);
@@ -1044,7 +1050,6 @@ static const FunctionDef* GetSVCInfo(u32 func_num) {
1044MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70)); 1050MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70));
1045 1051
1046void CallSVC(u32 immediate) { 1052void CallSVC(u32 immediate) {
1047 Common::Profiling::ScopeTimer timer_svc(profiler_svc);
1048 MICROPROFILE_SCOPE(Kernel_SVC); 1053 MICROPROFILE_SCOPE(Kernel_SVC);
1049 1054
1050 const FunctionDef* info = GetSVCInfo(immediate); 1055 const FunctionDef* info = GetSVCInfo(immediate);
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index 7e2f9cdfa..a4dfb7e43 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -115,21 +115,39 @@ inline void Write(u32 addr, const T data) {
115 u8* start = Memory::GetPhysicalPointer(config.GetStartAddress()); 115 u8* start = Memory::GetPhysicalPointer(config.GetStartAddress());
116 u8* end = Memory::GetPhysicalPointer(config.GetEndAddress()); 116 u8* end = Memory::GetPhysicalPointer(config.GetEndAddress());
117 117
118 if (config.fill_24bit) { 118 // TODO: Consider always accelerating and returning vector of
119 // fill with 24-bit values 119 // regions that the accelerated fill did not cover to
120 for (u8* ptr = start; ptr < end; ptr += 3) { 120 // reduce/eliminate the fill that the cpu has to do.
121 ptr[0] = config.value_24bit_r; 121 // This would also mean that the flush below is not needed.
122 ptr[1] = config.value_24bit_g; 122 // Fill should first flush all surfaces that touch but are
123 ptr[2] = config.value_24bit_b; 123 // not completely within the fill range.
124 // Then fill all completely covered surfaces, and return the
125 // regions that were between surfaces or within the touching
126 // ones for cpu to manually fill here.
127 if (!VideoCore::g_renderer->Rasterizer()->AccelerateFill(config)) {
128 Memory::RasterizerFlushAndInvalidateRegion(config.GetStartAddress(), config.GetEndAddress() - config.GetStartAddress());
129
130 if (config.fill_24bit) {
131 // fill with 24-bit values
132 for (u8* ptr = start; ptr < end; ptr += 3) {
133 ptr[0] = config.value_24bit_r;
134 ptr[1] = config.value_24bit_g;
135 ptr[2] = config.value_24bit_b;
136 }
137 } else if (config.fill_32bit) {
138 // fill with 32-bit values
139 if (end > start) {
140 u32 value = config.value_32bit;
141 size_t len = (end - start) / sizeof(u32);
142 for (size_t i = 0; i < len; ++i)
143 memcpy(&start[i * sizeof(u32)], &value, sizeof(u32));
144 }
145 } else {
146 // fill with 16-bit values
147 u16 value_16bit = config.value_16bit.Value();
148 for (u8* ptr = start; ptr < end; ptr += sizeof(u16))
149 memcpy(ptr, &value_16bit, sizeof(u16));
124 } 150 }
125 } else if (config.fill_32bit) {
126 // fill with 32-bit values
127 for (u32* ptr = (u32*)start; ptr < (u32*)end; ++ptr)
128 *ptr = config.value_32bit;
129 } else {
130 // fill with 16-bit values
131 for (u16* ptr = (u16*)start; ptr < (u16*)end; ++ptr)
132 *ptr = config.value_16bit;
133 } 151 }
134 152
135 LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), config.GetEndAddress()); 153 LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), config.GetEndAddress());
@@ -139,8 +157,6 @@ inline void Write(u32 addr, const T data) {
139 } else { 157 } else {
140 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC1); 158 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC1);
141 } 159 }
142
143 VideoCore::g_renderer->Rasterizer()->InvalidateRegion(config.GetStartAddress(), config.GetEndAddress() - config.GetStartAddress());
144 } 160 }
145 161
146 // Reset "trigger" flag and set the "finish" flag 162 // Reset "trigger" flag and set the "finish" flag
@@ -161,184 +177,185 @@ inline void Write(u32 addr, const T data) {
161 if (Pica::g_debug_context) 177 if (Pica::g_debug_context)
162 Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer, nullptr); 178 Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer, nullptr);
163 179
164 u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress()); 180 if (!VideoCore::g_renderer->Rasterizer()->AccelerateDisplayTransfer(config)) {
165 u8* dst_pointer = Memory::GetPhysicalPointer(config.GetPhysicalOutputAddress()); 181 u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress());
166 182 u8* dst_pointer = Memory::GetPhysicalPointer(config.GetPhysicalOutputAddress());
167 if (config.is_texture_copy) {
168 u32 input_width = config.texture_copy.input_width * 16;
169 u32 input_gap = config.texture_copy.input_gap * 16;
170 u32 output_width = config.texture_copy.output_width * 16;
171 u32 output_gap = config.texture_copy.output_gap * 16;
172
173 size_t contiguous_input_size = config.texture_copy.size / input_width * (input_width + input_gap);
174 VideoCore::g_renderer->Rasterizer()->FlushRegion(config.GetPhysicalInputAddress(), contiguous_input_size);
175
176 u32 remaining_size = config.texture_copy.size;
177 u32 remaining_input = input_width;
178 u32 remaining_output = output_width;
179 while (remaining_size > 0) {
180 u32 copy_size = std::min({ remaining_input, remaining_output, remaining_size });
181 183
182 std::memcpy(dst_pointer, src_pointer, copy_size); 184 if (config.is_texture_copy) {
183 src_pointer += copy_size; 185 u32 input_width = config.texture_copy.input_width * 16;
184 dst_pointer += copy_size; 186 u32 input_gap = config.texture_copy.input_gap * 16;
187 u32 output_width = config.texture_copy.output_width * 16;
188 u32 output_gap = config.texture_copy.output_gap * 16;
185 189
186 remaining_input -= copy_size; 190 size_t contiguous_input_size = config.texture_copy.size / input_width * (input_width + input_gap);
187 remaining_output -= copy_size; 191 Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), static_cast<u32>(contiguous_input_size));
188 remaining_size -= copy_size;
189 192
190 if (remaining_input == 0) { 193 size_t contiguous_output_size = config.texture_copy.size / output_width * (output_width + output_gap);
191 remaining_input = input_width; 194 Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), static_cast<u32>(contiguous_output_size));
192 src_pointer += input_gap;
193 }
194 if (remaining_output == 0) {
195 remaining_output = output_width;
196 dst_pointer += output_gap;
197 }
198 }
199 195
200 LOG_TRACE(HW_GPU, "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X", 196 u32 remaining_size = config.texture_copy.size;
201 config.texture_copy.size, 197 u32 remaining_input = input_width;
202 config.GetPhysicalInputAddress(), input_width, input_gap, 198 u32 remaining_output = output_width;
203 config.GetPhysicalOutputAddress(), output_width, output_gap, 199 while (remaining_size > 0) {
204 config.flags); 200 u32 copy_size = std::min({ remaining_input, remaining_output, remaining_size });
205 201
206 size_t contiguous_output_size = config.texture_copy.size / output_width * (output_width + output_gap); 202 std::memcpy(dst_pointer, src_pointer, copy_size);
207 VideoCore::g_renderer->Rasterizer()->InvalidateRegion(config.GetPhysicalOutputAddress(), contiguous_output_size); 203 src_pointer += copy_size;
204 dst_pointer += copy_size;
208 205
209 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); 206 remaining_input -= copy_size;
210 break; 207 remaining_output -= copy_size;
211 } 208 remaining_size -= copy_size;
212 209
213 if (config.scaling > config.ScaleXY) { 210 if (remaining_input == 0) {
214 LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", config.scaling.Value()); 211 remaining_input = input_width;
215 UNIMPLEMENTED(); 212 src_pointer += input_gap;
216 break; 213 }
217 } 214 if (remaining_output == 0) {
215 remaining_output = output_width;
216 dst_pointer += output_gap;
217 }
218 }
218 219
219 if (config.input_linear && config.scaling != config.NoScale) { 220 LOG_TRACE(HW_GPU, "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X",
220 LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input"); 221 config.texture_copy.size,
221 UNIMPLEMENTED(); 222 config.GetPhysicalInputAddress(), input_width, input_gap,
222 break; 223 config.GetPhysicalOutputAddress(), output_width, output_gap,
223 } 224 config.flags);
224 225
225 bool horizontal_scale = config.scaling != config.NoScale; 226 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF);
226 bool vertical_scale = config.scaling == config.ScaleXY; 227 break;
228 }
227 229
228 u32 output_width = config.output_width >> horizontal_scale; 230 if (config.scaling > config.ScaleXY) {
229 u32 output_height = config.output_height >> vertical_scale; 231 LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", config.scaling.Value());
232 UNIMPLEMENTED();
233 break;
234 }
230 235
231 u32 input_size = config.input_width * config.input_height * GPU::Regs::BytesPerPixel(config.input_format); 236 if (config.input_linear && config.scaling != config.NoScale) {
232 u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format); 237 LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input");
238 UNIMPLEMENTED();
239 break;
240 }
233 241
234 VideoCore::g_renderer->Rasterizer()->FlushRegion(config.GetPhysicalInputAddress(), input_size); 242 int horizontal_scale = config.scaling != config.NoScale ? 1 : 0;
243 int vertical_scale = config.scaling == config.ScaleXY ? 1 : 0;
235 244
236 for (u32 y = 0; y < output_height; ++y) { 245 u32 output_width = config.output_width >> horizontal_scale;
237 for (u32 x = 0; x < output_width; ++x) { 246 u32 output_height = config.output_height >> vertical_scale;
238 Math::Vec4<u8> src_color;
239 247
240 // Calculate the [x,y] position of the input image 248 u32 input_size = config.input_width * config.input_height * GPU::Regs::BytesPerPixel(config.input_format);
241 // based on the current output position and the scale 249 u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format);
242 u32 input_x = x << horizontal_scale;
243 u32 input_y = y << vertical_scale;
244 250
245 if (config.flip_vertically) { 251 Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), input_size);
246 // Flip the y value of the output data, 252 Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), output_size);
247 // we do this after calculating the [x,y] position of the input image
248 // to account for the scaling options.
249 y = output_height - y - 1;
250 }
251 253
252 u32 dst_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.output_format); 254 for (u32 y = 0; y < output_height; ++y) {
253 u32 src_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.input_format); 255 for (u32 x = 0; x < output_width; ++x) {
254 u32 src_offset; 256 Math::Vec4<u8> src_color;
255 u32 dst_offset;
256 257
257 if (config.input_linear) { 258 // Calculate the [x,y] position of the input image
258 if (!config.dont_swizzle) { 259 // based on the current output position and the scale
259 // Interpret the input as linear and the output as tiled 260 u32 input_x = x << horizontal_scale;
260 u32 coarse_y = y & ~7; 261 u32 input_y = y << vertical_scale;
261 u32 stride = output_width * dst_bytes_per_pixel;
262 262
263 src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel; 263 if (config.flip_vertically) {
264 dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + coarse_y * stride; 264 // Flip the y value of the output data,
265 } else { 265 // we do this after calculating the [x,y] position of the input image
266 // Both input and output are linear 266 // to account for the scaling options.
267 src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel; 267 y = output_height - y - 1;
268 dst_offset = (x + y * output_width) * dst_bytes_per_pixel;
269 } 268 }
270 } else {
271 if (!config.dont_swizzle) {
272 // Interpret the input as tiled and the output as linear
273 u32 coarse_y = input_y & ~7;
274 u32 stride = config.input_width * src_bytes_per_pixel;
275 269
276 src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + coarse_y * stride; 270 u32 dst_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.output_format);
277 dst_offset = (x + y * output_width) * dst_bytes_per_pixel; 271 u32 src_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.input_format);
272 u32 src_offset;
273 u32 dst_offset;
274
275 if (config.input_linear) {
276 if (!config.dont_swizzle) {
277 // Interpret the input as linear and the output as tiled
278 u32 coarse_y = y & ~7;
279 u32 stride = output_width * dst_bytes_per_pixel;
280
281 src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel;
282 dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + coarse_y * stride;
283 } else {
284 // Both input and output are linear
285 src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel;
286 dst_offset = (x + y * output_width) * dst_bytes_per_pixel;
287 }
278 } else { 288 } else {
279 // Both input and output are tiled 289 if (!config.dont_swizzle) {
280 u32 out_coarse_y = y & ~7; 290 // Interpret the input as tiled and the output as linear
281 u32 out_stride = output_width * dst_bytes_per_pixel; 291 u32 coarse_y = input_y & ~7;
282 292 u32 stride = config.input_width * src_bytes_per_pixel;
283 u32 in_coarse_y = input_y & ~7; 293
284 u32 in_stride = config.input_width * src_bytes_per_pixel; 294 src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + coarse_y * stride;
285 295 dst_offset = (x + y * output_width) * dst_bytes_per_pixel;
286 src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + in_coarse_y * in_stride; 296 } else {
287 dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + out_coarse_y * out_stride; 297 // Both input and output are tiled
298 u32 out_coarse_y = y & ~7;
299 u32 out_stride = output_width * dst_bytes_per_pixel;
300
301 u32 in_coarse_y = input_y & ~7;
302 u32 in_stride = config.input_width * src_bytes_per_pixel;
303
304 src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + in_coarse_y * in_stride;
305 dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + out_coarse_y * out_stride;
306 }
288 } 307 }
289 }
290 308
291 const u8* src_pixel = src_pointer + src_offset; 309 const u8* src_pixel = src_pointer + src_offset;
292 src_color = DecodePixel(config.input_format, src_pixel); 310 src_color = DecodePixel(config.input_format, src_pixel);
293 if (config.scaling == config.ScaleX) { 311 if (config.scaling == config.ScaleX) {
294 Math::Vec4<u8> pixel = DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel); 312 Math::Vec4<u8> pixel = DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel);
295 src_color = ((src_color + pixel) / 2).Cast<u8>(); 313 src_color = ((src_color + pixel) / 2).Cast<u8>();
296 } else if (config.scaling == config.ScaleXY) { 314 } else if (config.scaling == config.ScaleXY) {
297 Math::Vec4<u8> pixel1 = DecodePixel(config.input_format, src_pixel + 1 * src_bytes_per_pixel); 315 Math::Vec4<u8> pixel1 = DecodePixel(config.input_format, src_pixel + 1 * src_bytes_per_pixel);
298 Math::Vec4<u8> pixel2 = DecodePixel(config.input_format, src_pixel + 2 * src_bytes_per_pixel); 316 Math::Vec4<u8> pixel2 = DecodePixel(config.input_format, src_pixel + 2 * src_bytes_per_pixel);
299 Math::Vec4<u8> pixel3 = DecodePixel(config.input_format, src_pixel + 3 * src_bytes_per_pixel); 317 Math::Vec4<u8> pixel3 = DecodePixel(config.input_format, src_pixel + 3 * src_bytes_per_pixel);
300 src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast<u8>(); 318 src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast<u8>();
301 } 319 }
302 320
303 u8* dst_pixel = dst_pointer + dst_offset; 321 u8* dst_pixel = dst_pointer + dst_offset;
304 switch (config.output_format) { 322 switch (config.output_format) {
305 case Regs::PixelFormat::RGBA8: 323 case Regs::PixelFormat::RGBA8:
306 Color::EncodeRGBA8(src_color, dst_pixel); 324 Color::EncodeRGBA8(src_color, dst_pixel);
307 break; 325 break;
308 326
309 case Regs::PixelFormat::RGB8: 327 case Regs::PixelFormat::RGB8:
310 Color::EncodeRGB8(src_color, dst_pixel); 328 Color::EncodeRGB8(src_color, dst_pixel);
311 break; 329 break;
312 330
313 case Regs::PixelFormat::RGB565: 331 case Regs::PixelFormat::RGB565:
314 Color::EncodeRGB565(src_color, dst_pixel); 332 Color::EncodeRGB565(src_color, dst_pixel);
315 break; 333 break;
316 334
317 case Regs::PixelFormat::RGB5A1: 335 case Regs::PixelFormat::RGB5A1:
318 Color::EncodeRGB5A1(src_color, dst_pixel); 336 Color::EncodeRGB5A1(src_color, dst_pixel);
319 break; 337 break;
320 338
321 case Regs::PixelFormat::RGBA4: 339 case Regs::PixelFormat::RGBA4:
322 Color::EncodeRGBA4(src_color, dst_pixel); 340 Color::EncodeRGBA4(src_color, dst_pixel);
323 break; 341 break;
324 342
325 default: 343 default:
326 LOG_ERROR(HW_GPU, "Unknown destination framebuffer format %x", config.output_format.Value()); 344 LOG_ERROR(HW_GPU, "Unknown destination framebuffer format %x", config.output_format.Value());
327 break; 345 break;
346 }
328 } 347 }
329 } 348 }
330 }
331 349
332 LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), dst format %x, flags 0x%08X", 350 LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), dst format %x, flags 0x%08X",
333 config.output_height * output_width * GPU::Regs::BytesPerPixel(config.output_format), 351 config.output_height * output_width * GPU::Regs::BytesPerPixel(config.output_format),
334 config.GetPhysicalInputAddress(), config.input_width.Value(), config.input_height.Value(), 352 config.GetPhysicalInputAddress(), config.input_width.Value(), config.input_height.Value(),
335 config.GetPhysicalOutputAddress(), output_width, output_height, 353 config.GetPhysicalOutputAddress(), output_width, output_height,
336 config.output_format.Value(), config.flags); 354 config.output_format.Value(), config.flags);
355 }
337 356
338 g_regs.display_transfer_config.trigger = 0; 357 g_regs.display_transfer_config.trigger = 0;
339 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); 358 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF);
340
341 VideoCore::g_renderer->Rasterizer()->InvalidateRegion(config.GetPhysicalOutputAddress(), output_size);
342 } 359 }
343 break; 360 break;
344 } 361 }
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h
index a00adbf53..da4c345b4 100644
--- a/src/core/hw/gpu.h
+++ b/src/core/hw/gpu.h
@@ -78,7 +78,7 @@ struct Regs {
78 78
79 INSERT_PADDING_WORDS(0x4); 79 INSERT_PADDING_WORDS(0x4);
80 80
81 struct { 81 struct MemoryFillConfig {
82 u32 address_start; 82 u32 address_start;
83 u32 address_end; 83 u32 address_end;
84 84
@@ -165,7 +165,7 @@ struct Regs {
165 165
166 INSERT_PADDING_WORDS(0x169); 166 INSERT_PADDING_WORDS(0x169);
167 167
168 struct { 168 struct DisplayTransferConfig {
169 u32 input_address; 169 u32 input_address;
170 u32 output_address; 170 u32 output_address;
171 171
diff --git a/src/core/hw/lcd.h b/src/core/hw/lcd.h
index 3dd877fbf..57029c5e8 100644
--- a/src/core/hw/lcd.h
+++ b/src/core/hw/lcd.h
@@ -52,8 +52,6 @@ struct Regs {
52 return content[index]; 52 return content[index];
53 } 53 }
54 54
55#undef ASSERT_MEMBER_SIZE
56
57}; 55};
58static_assert(std::is_standard_layout<Regs>::value, "Structure does not use standard layout"); 56static_assert(std::is_standard_layout<Regs>::value, "Structure does not use standard layout");
59 57
diff --git a/src/core/loader/3dsx.cpp b/src/core/loader/3dsx.cpp
index 5fb3b9e2b..98e7ab48f 100644
--- a/src/core/loader/3dsx.cpp
+++ b/src/core/loader/3dsx.cpp
@@ -178,11 +178,11 @@ static THREEDSX_Error Load3DSXFile(FileUtil::IOFile& file, u32 base_addr, Shared
178 for (unsigned current_inprogress = 0; current_inprogress < remaining && pos < end_pos; current_inprogress++) { 178 for (unsigned current_inprogress = 0; current_inprogress < remaining && pos < end_pos; current_inprogress++) {
179 const auto& table = reloc_table[current_inprogress]; 179 const auto& table = reloc_table[current_inprogress];
180 LOG_TRACE(Loader, "(t=%d,skip=%u,patch=%u)", current_segment_reloc_table, 180 LOG_TRACE(Loader, "(t=%d,skip=%u,patch=%u)", current_segment_reloc_table,
181 (u32)table.skip, (u32)table.patch); 181 static_cast<u32>(table.skip), static_cast<u32>(table.patch));
182 pos += table.skip; 182 pos += table.skip;
183 s32 num_patches = table.patch; 183 s32 num_patches = table.patch;
184 while (0 < num_patches && pos < end_pos) { 184 while (0 < num_patches && pos < end_pos) {
185 u32 in_addr = (u8*)pos - program_image.data(); 185 u32 in_addr = static_cast<u32>(reinterpret_cast<u8*>(pos) - program_image.data());
186 u32 addr = TranslateAddr(*pos, &loadinfo, offsets); 186 u32 addr = TranslateAddr(*pos, &loadinfo, offsets);
187 LOG_TRACE(Loader, "Patching %08X <-- rel(%08X,%d) (%08X)", 187 LOG_TRACE(Loader, "Patching %08X <-- rel(%08X,%d) (%08X)",
188 base_addr + in_addr, addr, current_segment_reloc_table, *pos); 188 base_addr + in_addr, addr, current_segment_reloc_table, *pos);
@@ -284,7 +284,7 @@ ResultStatus AppLoader_THREEDSX::ReadRomFS(std::shared_ptr<FileUtil::IOFile>& ro
284 // Check if the 3DSX has a RomFS... 284 // Check if the 3DSX has a RomFS...
285 if (hdr.fs_offset != 0) { 285 if (hdr.fs_offset != 0) {
286 u32 romfs_offset = hdr.fs_offset; 286 u32 romfs_offset = hdr.fs_offset;
287 u32 romfs_size = file.GetSize() - hdr.fs_offset; 287 u32 romfs_size = static_cast<u32>(file.GetSize()) - hdr.fs_offset;
288 288
289 LOG_DEBUG(Loader, "RomFS offset: 0x%08X", romfs_offset); 289 LOG_DEBUG(Loader, "RomFS offset: 0x%08X", romfs_offset);
290 LOG_DEBUG(Loader, "RomFS size: 0x%08X", romfs_size); 290 LOG_DEBUG(Loader, "RomFS size: 0x%08X", romfs_size);
@@ -303,4 +303,31 @@ ResultStatus AppLoader_THREEDSX::ReadRomFS(std::shared_ptr<FileUtil::IOFile>& ro
303 return ResultStatus::ErrorNotUsed; 303 return ResultStatus::ErrorNotUsed;
304} 304}
305 305
306ResultStatus AppLoader_THREEDSX::ReadIcon(std::vector<u8>& buffer) {
307 if (!file.IsOpen())
308 return ResultStatus::Error;
309
310 // Reset read pointer in case this file has been read before.
311 file.Seek(0, SEEK_SET);
312
313 THREEDSX_Header hdr;
314 if (file.ReadBytes(&hdr, sizeof(THREEDSX_Header)) != sizeof(THREEDSX_Header))
315 return ResultStatus::Error;
316
317 if (hdr.header_size != sizeof(THREEDSX_Header))
318 return ResultStatus::Error;
319
320 // Check if the 3DSX has a SMDH...
321 if (hdr.smdh_offset != 0) {
322 file.Seek(hdr.smdh_offset, SEEK_SET);
323 buffer.resize(hdr.smdh_size);
324
325 if (file.ReadBytes(&buffer[0], hdr.smdh_size) != hdr.smdh_size)
326 return ResultStatus::Error;
327
328 return ResultStatus::Success;
329 }
330 return ResultStatus::ErrorNotUsed;
331}
332
306} // namespace Loader 333} // namespace Loader
diff --git a/src/core/loader/3dsx.h b/src/core/loader/3dsx.h
index 365ddb7a5..3ee686703 100644
--- a/src/core/loader/3dsx.h
+++ b/src/core/loader/3dsx.h
@@ -17,7 +17,7 @@ namespace Loader {
17/// Loads an 3DSX file 17/// Loads an 3DSX file
18class AppLoader_THREEDSX final : public AppLoader { 18class AppLoader_THREEDSX final : public AppLoader {
19public: 19public:
20 AppLoader_THREEDSX(FileUtil::IOFile&& file, std::string filename, const std::string& filepath) 20 AppLoader_THREEDSX(FileUtil::IOFile&& file, const std::string& filename, const std::string& filepath)
21 : AppLoader(std::move(file)), filename(std::move(filename)), filepath(filepath) {} 21 : AppLoader(std::move(file)), filename(std::move(filename)), filepath(filepath) {}
22 22
23 /** 23 /**
@@ -34,6 +34,13 @@ public:
34 ResultStatus Load() override; 34 ResultStatus Load() override;
35 35
36 /** 36 /**
37 * Get the icon (typically icon section) of the application
38 * @param buffer Reference to buffer to store data
39 * @return ResultStatus result of function
40 */
41 ResultStatus ReadIcon(std::vector<u8>& buffer) override;
42
43 /**
37 * Get the RomFS of the application 44 * Get the RomFS of the application
38 * @param romfs_file Reference to buffer to store data 45 * @param romfs_file Reference to buffer to store data
39 * @param offset Offset in the file to the RomFS 46 * @param offset Offset in the file to the RomFS
diff --git a/src/core/loader/loader.cpp b/src/core/loader/loader.cpp
index 886501c41..af3f62248 100644
--- a/src/core/loader/loader.cpp
+++ b/src/core/loader/loader.cpp
@@ -90,6 +90,28 @@ const char* GetFileTypeString(FileType type) {
90 return "unknown"; 90 return "unknown";
91} 91}
92 92
93std::unique_ptr<AppLoader> GetLoader(FileUtil::IOFile&& file, FileType type,
94 const std::string& filename, const std::string& filepath) {
95 switch (type) {
96
97 // 3DSX file format.
98 case FileType::THREEDSX:
99 return std::make_unique<AppLoader_THREEDSX>(std::move(file), filename, filepath);
100
101 // Standard ELF file format.
102 case FileType::ELF:
103 return std::make_unique<AppLoader_ELF>(std::move(file), filename);
104
105 // NCCH/NCSD container formats.
106 case FileType::CXI:
107 case FileType::CCI:
108 return std::make_unique<AppLoader_NCCH>(std::move(file), filepath);
109
110 default:
111 return std::unique_ptr<AppLoader>();
112 }
113}
114
93ResultStatus LoadFile(const std::string& filename) { 115ResultStatus LoadFile(const std::string& filename) {
94 FileUtil::IOFile file(filename, "rb"); 116 FileUtil::IOFile file(filename, "rb");
95 if (!file.IsOpen()) { 117 if (!file.IsOpen()) {
@@ -111,38 +133,29 @@ ResultStatus LoadFile(const std::string& filename) {
111 133
112 LOG_INFO(Loader, "Loading file %s as %s...", filename.c_str(), GetFileTypeString(type)); 134 LOG_INFO(Loader, "Loading file %s as %s...", filename.c_str(), GetFileTypeString(type));
113 135
136 std::unique_ptr<AppLoader> app_loader = GetLoader(std::move(file), type, filename_filename, filename);
137
114 switch (type) { 138 switch (type) {
115 139
116 //3DSX file format... 140 // 3DSX file format...
141 // or NCCH/NCSD container formats...
117 case FileType::THREEDSX: 142 case FileType::THREEDSX:
118 {
119 AppLoader_THREEDSX app_loader(std::move(file), filename_filename, filename);
120 // Load application and RomFS
121 if (ResultStatus::Success == app_loader.Load()) {
122 Service::FS::RegisterArchiveType(std::make_unique<FileSys::ArchiveFactory_RomFS>(app_loader), Service::FS::ArchiveIdCode::RomFS);
123 return ResultStatus::Success;
124 }
125 break;
126 }
127
128 // Standard ELF file format...
129 case FileType::ELF:
130 return AppLoader_ELF(std::move(file), filename_filename).Load();
131
132 // NCCH/NCSD container formats...
133 case FileType::CXI: 143 case FileType::CXI:
134 case FileType::CCI: 144 case FileType::CCI:
135 { 145 {
136 AppLoader_NCCH app_loader(std::move(file), filename);
137
138 // Load application and RomFS 146 // Load application and RomFS
139 ResultStatus result = app_loader.Load(); 147 ResultStatus result = app_loader->Load();
140 if (ResultStatus::Success == result) { 148 if (ResultStatus::Success == result) {
141 Service::FS::RegisterArchiveType(std::make_unique<FileSys::ArchiveFactory_RomFS>(app_loader), Service::FS::ArchiveIdCode::RomFS); 149 Service::FS::RegisterArchiveType(std::make_unique<FileSys::ArchiveFactory_RomFS>(*app_loader), Service::FS::ArchiveIdCode::RomFS);
150 return ResultStatus::Success;
142 } 151 }
143 return result; 152 return result;
144 } 153 }
145 154
155 // Standard ELF file format...
156 case FileType::ELF:
157 return app_loader->Load();
158
146 // CIA file format... 159 // CIA file format...
147 case FileType::CIA: 160 case FileType::CIA:
148 return ResultStatus::ErrorNotImplemented; 161 return ResultStatus::ErrorNotImplemented;
diff --git a/src/core/loader/loader.h b/src/core/loader/loader.h
index 84a4ce5fc..9d3e9ed3b 100644
--- a/src/core/loader/loader.h
+++ b/src/core/loader/loader.h
@@ -10,8 +10,10 @@
10#include <string> 10#include <string>
11#include <vector> 11#include <vector>
12 12
13#include "common/common_funcs.h"
13#include "common/common_types.h" 14#include "common/common_types.h"
14#include "common/file_util.h" 15#include "common/file_util.h"
16#include "common/swap.h"
15 17
16namespace Kernel { 18namespace Kernel {
17struct AddressMapping; 19struct AddressMapping;
@@ -78,6 +80,51 @@ constexpr u32 MakeMagic(char a, char b, char c, char d) {
78 return a | b << 8 | c << 16 | d << 24; 80 return a | b << 8 | c << 16 | d << 24;
79} 81}
80 82
83/// SMDH data structure that contains titles, icons etc. See https://www.3dbrew.org/wiki/SMDH
84struct SMDH {
85 u32_le magic;
86 u16_le version;
87 INSERT_PADDING_BYTES(2);
88
89 struct Title {
90 std::array<u16, 0x40> short_title;
91 std::array<u16, 0x80> long_title;
92 std::array<u16, 0x40> publisher;
93 };
94 std::array<Title, 16> titles;
95
96 std::array<u8, 16> ratings;
97 u32_le region_lockout;
98 u32_le match_maker_id;
99 u64_le match_maker_bit_id;
100 u32_le flags;
101 u16_le eula_version;
102 INSERT_PADDING_BYTES(2);
103 float_le banner_animation_frame;
104 u32_le cec_id;
105 INSERT_PADDING_BYTES(8);
106
107 std::array<u8, 0x480> small_icon;
108 std::array<u8, 0x1200> large_icon;
109
110 /// indicates the language used for each title entry
111 enum class TitleLanguage {
112 Japanese = 0,
113 English = 1,
114 French = 2,
115 German = 3,
116 Italian = 4,
117 Spanish = 5,
118 SimplifiedChinese = 6,
119 Korean= 7,
120 Dutch = 8,
121 Portuguese = 9,
122 Russian = 10,
123 TraditionalChinese = 11
124 };
125};
126static_assert(sizeof(SMDH) == 0x36C0, "SMDH structure size is wrong");
127
81/// Interface for loading an application 128/// Interface for loading an application
82class AppLoader : NonCopyable { 129class AppLoader : NonCopyable {
83public: 130public:
@@ -150,6 +197,16 @@ protected:
150extern const std::initializer_list<Kernel::AddressMapping> default_address_mappings; 197extern const std::initializer_list<Kernel::AddressMapping> default_address_mappings;
151 198
152/** 199/**
200 * Get a loader for a file with a specific type
201 * @param file The file to load
202 * @param type The type of the file
203 * @param filename the file name (without path)
204 * @param filepath the file full path (with name)
205 * @return std::unique_ptr<AppLoader> a pointer to a loader object; nullptr for unsupported type
206 */
207std::unique_ptr<AppLoader> GetLoader(FileUtil::IOFile&& file, FileType type, const std::string& filename, const std::string& filepath);
208
209/**
153 * Identifies and loads a bootable file 210 * Identifies and loads a bootable file
154 * @param filename String filename of bootable file 211 * @param filename String filename of bootable file
155 * @return ResultStatus result of function 212 * @return ResultStatus result of function
diff --git a/src/core/loader/ncch.cpp b/src/core/loader/ncch.cpp
index a4b47ef8c..7391bdb26 100644
--- a/src/core/loader/ncch.cpp
+++ b/src/core/loader/ncch.cpp
@@ -156,6 +156,9 @@ ResultStatus AppLoader_NCCH::LoadExec() {
156 Kernel::g_current_process->resource_limit = Kernel::ResourceLimit::GetForCategory( 156 Kernel::g_current_process->resource_limit = Kernel::ResourceLimit::GetForCategory(
157 static_cast<Kernel::ResourceLimitCategory>(exheader_header.arm11_system_local_caps.resource_limit_category)); 157 static_cast<Kernel::ResourceLimitCategory>(exheader_header.arm11_system_local_caps.resource_limit_category));
158 158
159 // Set the default CPU core for this process
160 Kernel::g_current_process->ideal_processor = exheader_header.arm11_system_local_caps.ideal_processor;
161
159 // Copy data while converting endianess 162 // Copy data while converting endianess
160 std::array<u32, ARRAY_SIZE(exheader_header.arm11_kernel_caps.descriptors)> kernel_caps; 163 std::array<u32, ARRAY_SIZE(exheader_header.arm11_kernel_caps.descriptors)> kernel_caps;
161 std::copy_n(exheader_header.arm11_kernel_caps.descriptors, kernel_caps.size(), begin(kernel_caps)); 164 std::copy_n(exheader_header.arm11_kernel_caps.descriptors, kernel_caps.size(), begin(kernel_caps));
@@ -173,6 +176,10 @@ ResultStatus AppLoader_NCCH::LoadSectionExeFS(const char* name, std::vector<u8>&
173 if (!file.IsOpen()) 176 if (!file.IsOpen())
174 return ResultStatus::Error; 177 return ResultStatus::Error;
175 178
179 ResultStatus result = LoadExeFS();
180 if (result != ResultStatus::Success)
181 return result;
182
176 LOG_DEBUG(Loader, "%d sections:", kMaxSections); 183 LOG_DEBUG(Loader, "%d sections:", kMaxSections);
177 // Iterate through the ExeFs archive until we find a section with the specified name... 184 // Iterate through the ExeFs archive until we find a section with the specified name...
178 for (unsigned section_number = 0; section_number < kMaxSections; section_number++) { 185 for (unsigned section_number = 0; section_number < kMaxSections; section_number++) {
@@ -215,9 +222,9 @@ ResultStatus AppLoader_NCCH::LoadSectionExeFS(const char* name, std::vector<u8>&
215 return ResultStatus::ErrorNotUsed; 222 return ResultStatus::ErrorNotUsed;
216} 223}
217 224
218ResultStatus AppLoader_NCCH::Load() { 225ResultStatus AppLoader_NCCH::LoadExeFS() {
219 if (is_loaded) 226 if (is_exefs_loaded)
220 return ResultStatus::ErrorAlreadyLoaded; 227 return ResultStatus::Success;
221 228
222 if (!file.IsOpen()) 229 if (!file.IsOpen())
223 return ResultStatus::Error; 230 return ResultStatus::Error;
@@ -255,7 +262,7 @@ ResultStatus AppLoader_NCCH::Load() {
255 resource_limit_category = exheader_header.arm11_system_local_caps.resource_limit_category; 262 resource_limit_category = exheader_header.arm11_system_local_caps.resource_limit_category;
256 263
257 LOG_INFO(Loader, "Name: %s" , exheader_header.codeset_info.name); 264 LOG_INFO(Loader, "Name: %s" , exheader_header.codeset_info.name);
258 LOG_INFO(Loader, "Program ID: %016X" , ncch_header.program_id); 265 LOG_INFO(Loader, "Program ID: %016llX" , ncch_header.program_id);
259 LOG_DEBUG(Loader, "Code compressed: %s" , is_compressed ? "yes" : "no"); 266 LOG_DEBUG(Loader, "Code compressed: %s" , is_compressed ? "yes" : "no");
260 LOG_DEBUG(Loader, "Entry point: 0x%08X", entry_point); 267 LOG_DEBUG(Loader, "Entry point: 0x%08X", entry_point);
261 LOG_DEBUG(Loader, "Code size: 0x%08X", code_size); 268 LOG_DEBUG(Loader, "Code size: 0x%08X", code_size);
@@ -282,6 +289,18 @@ ResultStatus AppLoader_NCCH::Load() {
282 if (file.ReadBytes(&exefs_header, sizeof(ExeFs_Header)) != sizeof(ExeFs_Header)) 289 if (file.ReadBytes(&exefs_header, sizeof(ExeFs_Header)) != sizeof(ExeFs_Header))
283 return ResultStatus::Error; 290 return ResultStatus::Error;
284 291
292 is_exefs_loaded = true;
293 return ResultStatus::Success;
294}
295
296ResultStatus AppLoader_NCCH::Load() {
297 if (is_loaded)
298 return ResultStatus::ErrorAlreadyLoaded;
299
300 ResultStatus result = LoadExeFS();
301 if (result != ResultStatus::Success)
302 return result;
303
285 is_loaded = true; // Set state to loaded 304 is_loaded = true; // Set state to loaded
286 305
287 return LoadExec(); // Load the executable into memory for booting 306 return LoadExec(); // Load the executable into memory for booting
diff --git a/src/core/loader/ncch.h b/src/core/loader/ncch.h
index ca6772a78..fd852c3de 100644
--- a/src/core/loader/ncch.h
+++ b/src/core/loader/ncch.h
@@ -232,6 +232,13 @@ private:
232 */ 232 */
233 ResultStatus LoadExec(); 233 ResultStatus LoadExec();
234 234
235 /**
236 * Ensure ExeFS is loaded and ready for reading sections
237 * @return ResultStatus result of function
238 */
239 ResultStatus LoadExeFS();
240
241 bool is_exefs_loaded = false;
235 bool is_compressed = false; 242 bool is_compressed = false;
236 243
237 u32 entry_point = 0; 244 u32 entry_point = 0;
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 7de5bd15d..ee9b69f81 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -15,6 +15,9 @@
15#include "core/memory_setup.h" 15#include "core/memory_setup.h"
16#include "core/mmio.h" 16#include "core/mmio.h"
17 17
18#include "video_core/renderer_base.h"
19#include "video_core/video_core.h"
20
18namespace Memory { 21namespace Memory {
19 22
20enum class PageType { 23enum class PageType {
@@ -22,8 +25,12 @@ enum class PageType {
22 Unmapped, 25 Unmapped,
23 /// Page is mapped to regular memory. This is the only type you can get pointers to. 26 /// Page is mapped to regular memory. This is the only type you can get pointers to.
24 Memory, 27 Memory,
28 /// Page is mapped to regular memory, but also needs to check for rasterizer cache flushing and invalidation
29 RasterizerCachedMemory,
25 /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions. 30 /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions.
26 Special, 31 Special,
32 /// Page is mapped to a I/O region, but also needs to check for rasterizer cache flushing and invalidation
33 RasterizerCachedSpecial,
27}; 34};
28 35
29struct SpecialRegion { 36struct SpecialRegion {
@@ -57,6 +64,12 @@ struct PageTable {
57 * the corresponding entry in `pointers` MUST be set to null. 64 * the corresponding entry in `pointers` MUST be set to null.
58 */ 65 */
59 std::array<PageType, NUM_ENTRIES> attributes; 66 std::array<PageType, NUM_ENTRIES> attributes;
67
68 /**
69 * Indicates the number of externally cached resources touching a page that should be
70 * flushed before the memory is accessed
71 */
72 std::array<u8, NUM_ENTRIES> cached_res_count;
60}; 73};
61 74
62/// Singular page table used for the singleton process 75/// Singular page table used for the singleton process
@@ -72,8 +85,15 @@ static void MapPages(u32 base, u32 size, u8* memory, PageType type) {
72 while (base != end) { 85 while (base != end) {
73 ASSERT_MSG(base < PageTable::NUM_ENTRIES, "out of range mapping at %08X", base); 86 ASSERT_MSG(base < PageTable::NUM_ENTRIES, "out of range mapping at %08X", base);
74 87
88 // Since pages are unmapped on shutdown after video core is shutdown, the renderer may be null here
89 if (current_page_table->attributes[base] == PageType::RasterizerCachedMemory ||
90 current_page_table->attributes[base] == PageType::RasterizerCachedSpecial) {
91 RasterizerFlushAndInvalidateRegion(VirtualToPhysicalAddress(base << PAGE_BITS), PAGE_SIZE);
92 }
93
75 current_page_table->attributes[base] = type; 94 current_page_table->attributes[base] = type;
76 current_page_table->pointers[base] = memory; 95 current_page_table->pointers[base] = memory;
96 current_page_table->cached_res_count[base] = 0;
77 97
78 base += 1; 98 base += 1;
79 if (memory != nullptr) 99 if (memory != nullptr)
@@ -84,6 +104,7 @@ static void MapPages(u32 base, u32 size, u8* memory, PageType type) {
84void InitMemoryMap() { 104void InitMemoryMap() {
85 main_page_table.pointers.fill(nullptr); 105 main_page_table.pointers.fill(nullptr);
86 main_page_table.attributes.fill(PageType::Unmapped); 106 main_page_table.attributes.fill(PageType::Unmapped);
107 main_page_table.cached_res_count.fill(0);
87} 108}
88 109
89void MapMemoryRegion(VAddr base, u32 size, u8* target) { 110void MapMemoryRegion(VAddr base, u32 size, u8* target) {
@@ -107,6 +128,28 @@ void UnmapRegion(VAddr base, u32 size) {
107} 128}
108 129
109/** 130/**
131 * Gets a pointer to the exact memory at the virtual address (i.e. not page aligned)
132 * using a VMA from the current process
133 */
134static u8* GetPointerFromVMA(VAddr vaddr) {
135 u8* direct_pointer = nullptr;
136
137 auto& vma = Kernel::g_current_process->vm_manager.FindVMA(vaddr)->second;
138 switch (vma.type) {
139 case Kernel::VMAType::AllocatedMemoryBlock:
140 direct_pointer = vma.backing_block->data() + vma.offset;
141 break;
142 case Kernel::VMAType::BackingMemory:
143 direct_pointer = vma.backing_memory;
144 break;
145 default:
146 UNREACHABLE();
147 }
148
149 return direct_pointer + (vaddr - vma.base);
150}
151
152/**
110 * This function should only be called for virtual addreses with attribute `PageType::Special`. 153 * This function should only be called for virtual addreses with attribute `PageType::Special`.
111 */ 154 */
112static MMIORegionPointer GetMMIOHandler(VAddr vaddr) { 155static MMIORegionPointer GetMMIOHandler(VAddr vaddr) {
@@ -126,6 +169,7 @@ template <typename T>
126T Read(const VAddr vaddr) { 169T Read(const VAddr vaddr) {
127 const u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; 170 const u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS];
128 if (page_pointer) { 171 if (page_pointer) {
172 // NOTE: Avoid adding any extra logic to this fast-path block
129 T value; 173 T value;
130 std::memcpy(&value, &page_pointer[vaddr & PAGE_MASK], sizeof(T)); 174 std::memcpy(&value, &page_pointer[vaddr & PAGE_MASK], sizeof(T));
131 return value; 175 return value;
@@ -139,8 +183,22 @@ T Read(const VAddr vaddr) {
139 case PageType::Memory: 183 case PageType::Memory:
140 ASSERT_MSG(false, "Mapped memory page without a pointer @ %08X", vaddr); 184 ASSERT_MSG(false, "Mapped memory page without a pointer @ %08X", vaddr);
141 break; 185 break;
186 case PageType::RasterizerCachedMemory:
187 {
188 RasterizerFlushRegion(VirtualToPhysicalAddress(vaddr), sizeof(T));
189
190 T value;
191 std::memcpy(&value, GetPointerFromVMA(vaddr), sizeof(T));
192 return value;
193 }
142 case PageType::Special: 194 case PageType::Special:
143 return ReadMMIO<T>(GetMMIOHandler(vaddr), vaddr); 195 return ReadMMIO<T>(GetMMIOHandler(vaddr), vaddr);
196 case PageType::RasterizerCachedSpecial:
197 {
198 RasterizerFlushRegion(VirtualToPhysicalAddress(vaddr), sizeof(T));
199
200 return ReadMMIO<T>(GetMMIOHandler(vaddr), vaddr);
201 }
144 default: 202 default:
145 UNREACHABLE(); 203 UNREACHABLE();
146 } 204 }
@@ -153,6 +211,7 @@ template <typename T>
153void Write(const VAddr vaddr, const T data) { 211void Write(const VAddr vaddr, const T data) {
154 u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; 212 u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS];
155 if (page_pointer) { 213 if (page_pointer) {
214 // NOTE: Avoid adding any extra logic to this fast-path block
156 std::memcpy(&page_pointer[vaddr & PAGE_MASK], &data, sizeof(T)); 215 std::memcpy(&page_pointer[vaddr & PAGE_MASK], &data, sizeof(T));
157 return; 216 return;
158 } 217 }
@@ -165,9 +224,23 @@ void Write(const VAddr vaddr, const T data) {
165 case PageType::Memory: 224 case PageType::Memory:
166 ASSERT_MSG(false, "Mapped memory page without a pointer @ %08X", vaddr); 225 ASSERT_MSG(false, "Mapped memory page without a pointer @ %08X", vaddr);
167 break; 226 break;
227 case PageType::RasterizerCachedMemory:
228 {
229 RasterizerFlushAndInvalidateRegion(VirtualToPhysicalAddress(vaddr), sizeof(T));
230
231 std::memcpy(GetPointerFromVMA(vaddr), &data, sizeof(T));
232 break;
233 }
168 case PageType::Special: 234 case PageType::Special:
169 WriteMMIO<T>(GetMMIOHandler(vaddr), vaddr, data); 235 WriteMMIO<T>(GetMMIOHandler(vaddr), vaddr, data);
170 break; 236 break;
237 case PageType::RasterizerCachedSpecial:
238 {
239 RasterizerFlushAndInvalidateRegion(VirtualToPhysicalAddress(vaddr), sizeof(T));
240
241 WriteMMIO<T>(GetMMIOHandler(vaddr), vaddr, data);
242 break;
243 }
171 default: 244 default:
172 UNREACHABLE(); 245 UNREACHABLE();
173 } 246 }
@@ -179,6 +252,10 @@ u8* GetPointer(const VAddr vaddr) {
179 return page_pointer + (vaddr & PAGE_MASK); 252 return page_pointer + (vaddr & PAGE_MASK);
180 } 253 }
181 254
255 if (current_page_table->attributes[vaddr >> PAGE_BITS] == PageType::RasterizerCachedMemory) {
256 return GetPointerFromVMA(vaddr);
257 }
258
182 LOG_ERROR(HW_Memory, "unknown GetPointer @ 0x%08x", vaddr); 259 LOG_ERROR(HW_Memory, "unknown GetPointer @ 0x%08x", vaddr);
183 return nullptr; 260 return nullptr;
184} 261}
@@ -187,6 +264,69 @@ u8* GetPhysicalPointer(PAddr address) {
187 return GetPointer(PhysicalToVirtualAddress(address)); 264 return GetPointer(PhysicalToVirtualAddress(address));
188} 265}
189 266
267void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) {
268 if (start == 0) {
269 return;
270 }
271
272 u32 num_pages = ((start + size - 1) >> PAGE_BITS) - (start >> PAGE_BITS) + 1;
273 PAddr paddr = start;
274
275 for (unsigned i = 0; i < num_pages; ++i) {
276 VAddr vaddr = PhysicalToVirtualAddress(paddr);
277 u8& res_count = current_page_table->cached_res_count[vaddr >> PAGE_BITS];
278 ASSERT_MSG(count_delta <= UINT8_MAX - res_count, "Rasterizer resource cache counter overflow!");
279 ASSERT_MSG(count_delta >= -res_count, "Rasterizer resource cache counter underflow!");
280
281 // Switch page type to cached if now cached
282 if (res_count == 0) {
283 PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
284 switch (page_type) {
285 case PageType::Memory:
286 page_type = PageType::RasterizerCachedMemory;
287 current_page_table->pointers[vaddr >> PAGE_BITS] = nullptr;
288 break;
289 case PageType::Special:
290 page_type = PageType::RasterizerCachedSpecial;
291 break;
292 default:
293 UNREACHABLE();
294 }
295 }
296
297 res_count += count_delta;
298
299 // Switch page type to uncached if now uncached
300 if (res_count == 0) {
301 PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
302 switch (page_type) {
303 case PageType::RasterizerCachedMemory:
304 page_type = PageType::Memory;
305 current_page_table->pointers[vaddr >> PAGE_BITS] = GetPointerFromVMA(vaddr & ~PAGE_MASK);
306 break;
307 case PageType::RasterizerCachedSpecial:
308 page_type = PageType::Special;
309 break;
310 default:
311 UNREACHABLE();
312 }
313 }
314 paddr += PAGE_SIZE;
315 }
316}
317
318void RasterizerFlushRegion(PAddr start, u32 size) {
319 if (VideoCore::g_renderer != nullptr) {
320 VideoCore::g_renderer->Rasterizer()->FlushRegion(start, size);
321 }
322}
323
324void RasterizerFlushAndInvalidateRegion(PAddr start, u32 size) {
325 if (VideoCore::g_renderer != nullptr) {
326 VideoCore::g_renderer->Rasterizer()->FlushAndInvalidateRegion(start, size);
327 }
328}
329
190u8 Read8(const VAddr addr) { 330u8 Read8(const VAddr addr) {
191 return Read<u8>(addr); 331 return Read<u8>(addr);
192} 332}
diff --git a/src/core/memory.h b/src/core/memory.h
index 8002e0c41..126d60471 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -142,4 +142,20 @@ VAddr PhysicalToVirtualAddress(PAddr addr);
142 */ 142 */
143u8* GetPhysicalPointer(PAddr address); 143u8* GetPhysicalPointer(PAddr address);
144 144
145/**
146 * Adds the supplied value to the rasterizer resource cache counter of each
147 * page touching the region.
148 */
149void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta);
150
151/**
152 * Flushes any externally cached rasterizer resources touching the given region.
153 */
154void RasterizerFlushRegion(PAddr start, u32 size);
155
156/**
157 * Flushes and invalidates any externally cached rasterizer resources touching the given region.
158 */
159void RasterizerFlushAndInvalidateRegion(PAddr start, u32 size);
160
145} 161}
diff --git a/src/core/settings.cpp b/src/core/settings.cpp
index 1aa26fbd2..77261eafe 100644
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -4,6 +4,8 @@
4 4
5#include "settings.h" 5#include "settings.h"
6 6
7#include "audio_core/audio_core.h"
8
7#include "core/gdbstub/gdbstub.h" 9#include "core/gdbstub/gdbstub.h"
8 10
9#include "video_core/video_core.h" 11#include "video_core/video_core.h"
@@ -19,6 +21,9 @@ void Apply() {
19 21
20 VideoCore::g_hw_renderer_enabled = values.use_hw_renderer; 22 VideoCore::g_hw_renderer_enabled = values.use_hw_renderer;
21 VideoCore::g_shader_jit_enabled = values.use_shader_jit; 23 VideoCore::g_shader_jit_enabled = values.use_shader_jit;
24 VideoCore::g_scaled_resolution_enabled = values.use_scaled_resolution;
25
26 AudioCore::SelectSink(values.sink_id);
22 27
23} 28}
24 29
diff --git a/src/core/settings.h b/src/core/settings.h
index 4933a516d..ce2a31164 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -6,7 +6,8 @@
6 6
7#include <string> 7#include <string>
8#include <array> 8#include <array>
9#include <common/file_util.h> 9
10#include "common/common_types.h"
10 11
11namespace Settings { 12namespace Settings {
12 13
@@ -55,6 +56,7 @@ struct Values {
55 // Renderer 56 // Renderer
56 bool use_hw_renderer; 57 bool use_hw_renderer;
57 bool use_shader_jit; 58 bool use_shader_jit;
59 bool use_scaled_resolution;
58 60
59 float bg_red; 61 float bg_red;
60 float bg_green; 62 float bg_green;
@@ -62,6 +64,9 @@ struct Values {
62 64
63 std::string log_filter; 65 std::string log_filter;
64 66
67 // Audio
68 std::string sink_id;
69
65 // Debugging 70 // Debugging
66 bool use_gdbstub; 71 bool use_gdbstub;
67 u16 gdbstub_port; 72 u16 gdbstub_port;
diff --git a/src/core/tracer/recorder.cpp b/src/core/tracer/recorder.cpp
index c6dc35c83..7abaacf70 100644
--- a/src/core/tracer/recorder.cpp
+++ b/src/core/tracer/recorder.cpp
@@ -26,17 +26,17 @@ void Recorder::Finish(const std::string& filename) {
26 // Calculate file offsets 26 // Calculate file offsets
27 auto& initial = header.initial_state_offsets; 27 auto& initial = header.initial_state_offsets;
28 28
29 initial.gpu_registers_size = initial_state.gpu_registers.size(); 29 initial.gpu_registers_size = static_cast<u32>(initial_state.gpu_registers.size());
30 initial.lcd_registers_size = initial_state.lcd_registers.size(); 30 initial.lcd_registers_size = static_cast<u32>(initial_state.lcd_registers.size());
31 initial.pica_registers_size = initial_state.pica_registers.size(); 31 initial.pica_registers_size = static_cast<u32>(initial_state.pica_registers.size());
32 initial.default_attributes_size = initial_state.default_attributes.size(); 32 initial.default_attributes_size = static_cast<u32>(initial_state.default_attributes.size());
33 initial.vs_program_binary_size = initial_state.vs_program_binary.size(); 33 initial.vs_program_binary_size = static_cast<u32>(initial_state.vs_program_binary.size());
34 initial.vs_swizzle_data_size = initial_state.vs_swizzle_data.size(); 34 initial.vs_swizzle_data_size = static_cast<u32>(initial_state.vs_swizzle_data.size());
35 initial.vs_float_uniforms_size = initial_state.vs_float_uniforms.size(); 35 initial.vs_float_uniforms_size = static_cast<u32>(initial_state.vs_float_uniforms.size());
36 initial.gs_program_binary_size = initial_state.gs_program_binary.size(); 36 initial.gs_program_binary_size = static_cast<u32>(initial_state.gs_program_binary.size());
37 initial.gs_swizzle_data_size = initial_state.gs_swizzle_data.size(); 37 initial.gs_swizzle_data_size = static_cast<u32>(initial_state.gs_swizzle_data.size());
38 initial.gs_float_uniforms_size = initial_state.gs_float_uniforms.size(); 38 initial.gs_float_uniforms_size = static_cast<u32>(initial_state.gs_float_uniforms.size());
39 header.stream_size = stream.size(); 39 header.stream_size = static_cast<u32>(stream.size());
40 40
41 initial.gpu_registers = sizeof(header); 41 initial.gpu_registers = sizeof(header);
42 initial.lcd_registers = initial.gpu_registers + initial.gpu_registers_size * sizeof(u32); 42 initial.lcd_registers = initial.gpu_registers + initial.gpu_registers_size * sizeof(u32);
@@ -68,7 +68,7 @@ void Recorder::Finish(const std::string& filename) {
68 DEBUG_ASSERT(stream_element.extra_data.size() == 0); 68 DEBUG_ASSERT(stream_element.extra_data.size() == 0);
69 break; 69 break;
70 } 70 }
71 header.stream_offset += stream_element.extra_data.size(); 71 header.stream_offset += static_cast<u32>(stream_element.extra_data.size());
72 } 72 }
73 73
74 try { 74 try {
diff --git a/src/core/tracer/recorder.h b/src/core/tracer/recorder.h
index a42ccc45f..febf883c8 100644
--- a/src/core/tracer/recorder.h
+++ b/src/core/tracer/recorder.h
@@ -4,6 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <string>
7#include <unordered_map> 8#include <unordered_map>
8#include <vector> 9#include <vector>
9 10
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 76cfd4f7d..581a37897 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -15,7 +15,7 @@ set(SRCS
15 shader/shader.cpp 15 shader/shader.cpp
16 shader/shader_interpreter.cpp 16 shader/shader_interpreter.cpp
17 swrasterizer.cpp 17 swrasterizer.cpp
18 utils.cpp 18 vertex_loader.cpp
19 video_core.cpp 19 video_core.cpp
20 ) 20 )
21 21
@@ -43,6 +43,7 @@ set(HEADERS
43 shader/shader_interpreter.h 43 shader/shader_interpreter.h
44 swrasterizer.h 44 swrasterizer.h
45 utils.h 45 utils.h
46 vertex_loader.h
46 video_core.h 47 video_core.h
47 ) 48 )
48 49
diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp
index 3d503486e..db99ce666 100644
--- a/src/video_core/clipper.cpp
+++ b/src/video_core/clipper.cpp
@@ -2,13 +2,24 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
6#include <array>
7#include <cstddef>
8
5#include <boost/container/static_vector.hpp> 9#include <boost/container/static_vector.hpp>
10#include <boost/container/vector.hpp>
11
12#include "common/bit_field.h"
13#include "common/common_types.h"
14#include "common/logging/log.h"
15#include "common/vector_math.h"
6 16
7#include "video_core/clipper.h" 17#include "video_core/clipper.h"
8#include "video_core/pica.h" 18#include "video_core/pica.h"
9#include "video_core/pica_state.h" 19#include "video_core/pica_state.h"
20#include "video_core/pica_types.h"
10#include "video_core/rasterizer.h" 21#include "video_core/rasterizer.h"
11#include "video_core/shader/shader_interpreter.h" 22#include "video_core/shader/shader.h"
12 23
13namespace Pica { 24namespace Pica {
14 25
@@ -64,8 +75,6 @@ static void InitScreenCoordinates(OutputVertex& vtx)
64 viewport.halfsize_y = float24::FromRaw(regs.viewport_size_y); 75 viewport.halfsize_y = float24::FromRaw(regs.viewport_size_y);
65 viewport.offset_x = float24::FromFloat32(static_cast<float>(regs.viewport_corner.x)); 76 viewport.offset_x = float24::FromFloat32(static_cast<float>(regs.viewport_corner.x));
66 viewport.offset_y = float24::FromFloat32(static_cast<float>(regs.viewport_corner.y)); 77 viewport.offset_y = float24::FromFloat32(static_cast<float>(regs.viewport_corner.y));
67 viewport.zscale = float24::FromRaw(regs.viewport_depth_range);
68 viewport.offset_z = float24::FromRaw(regs.viewport_depth_far_plane);
69 78
70 float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w; 79 float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w;
71 vtx.color *= inv_w; 80 vtx.color *= inv_w;
@@ -78,7 +87,7 @@ static void InitScreenCoordinates(OutputVertex& vtx)
78 87
79 vtx.screenpos[0] = (vtx.pos.x * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x; 88 vtx.screenpos[0] = (vtx.pos.x * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x;
80 vtx.screenpos[1] = (vtx.pos.y * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y; 89 vtx.screenpos[1] = (vtx.pos.y * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y;
81 vtx.screenpos[2] = viewport.offset_z + vtx.pos.z * inv_w * viewport.zscale; 90 vtx.screenpos[2] = vtx.pos.z * inv_w;
82} 91}
83 92
84void ProcessTriangle(const OutputVertex &v0, const OutputVertex &v1, const OutputVertex &v2) { 93void ProcessTriangle(const OutputVertex &v0, const OutputVertex &v1, const OutputVertex &v2) {
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 3abe79c09..ad0da796e 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -2,26 +2,32 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cmath> 5#include <array>
6#include <boost/range/algorithm/fill.hpp> 6#include <cstddef>
7#include <memory>
8#include <utility>
7 9
8#include "common/alignment.h" 10#include "common/assert.h"
11#include "common/logging/log.h"
9#include "common/microprofile.h" 12#include "common/microprofile.h"
10#include "common/profiler.h" 13#include "common/vector_math.h"
11 14
12#include "core/settings.h"
13#include "core/hle/service/gsp_gpu.h" 15#include "core/hle/service/gsp_gpu.h"
14#include "core/hw/gpu.h" 16#include "core/hw/gpu.h"
17#include "core/memory.h"
18#include "core/tracer/recorder.h"
15 19
16#include "video_core/clipper.h"
17#include "video_core/command_processor.h" 20#include "video_core/command_processor.h"
21#include "video_core/debug_utils/debug_utils.h"
18#include "video_core/pica.h" 22#include "video_core/pica.h"
19#include "video_core/pica_state.h" 23#include "video_core/pica_state.h"
24#include "video_core/pica_types.h"
20#include "video_core/primitive_assembly.h" 25#include "video_core/primitive_assembly.h"
26#include "video_core/rasterizer_interface.h"
21#include "video_core/renderer_base.h" 27#include "video_core/renderer_base.h"
28#include "video_core/shader/shader.h"
29#include "video_core/vertex_loader.h"
22#include "video_core/video_core.h" 30#include "video_core/video_core.h"
23#include "video_core/debug_utils/debug_utils.h"
24#include "video_core/shader/shader_interpreter.h"
25 31
26namespace Pica { 32namespace Pica {
27 33
@@ -35,8 +41,6 @@ static int default_attr_counter = 0;
35 41
36static u32 default_attr_write_buffer[3]; 42static u32 default_attr_write_buffer[3];
37 43
38Common::Profiling::TimingCategory category_drawing("Drawing");
39
40// Expand a 4-bit mask to 4-byte mask, e.g. 0b0101 -> 0x00FF00FF 44// Expand a 4-bit mask to 4-byte mask, e.g. 0b0101 -> 0x00FF00FF
41static const u32 expand_bits_to_bytes[] = { 45static const u32 expand_bits_to_bytes[] = {
42 0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff, 46 0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff,
@@ -124,7 +128,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
124 128
125 // TODO: Verify that this actually modifies the register! 129 // TODO: Verify that this actually modifies the register!
126 if (setup.index < 15) { 130 if (setup.index < 15) {
127 g_state.vs.default_attributes[setup.index] = attribute; 131 g_state.vs_default_attributes[setup.index] = attribute;
128 setup.index++; 132 setup.index++;
129 } else { 133 } else {
130 // Put each attribute into an immediate input buffer. 134 // Put each attribute into an immediate input buffer.
@@ -140,13 +144,12 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
140 immediate_attribute_id = 0; 144 immediate_attribute_id = 0;
141 145
142 Shader::UnitState<false> shader_unit; 146 Shader::UnitState<false> shader_unit;
143 Shader::Setup(); 147 g_state.vs.Setup();
144
145 if (g_debug_context)
146 g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, static_cast<void*>(&immediate_input));
147 148
148 // Send to vertex shader 149 // Send to vertex shader
149 Shader::OutputVertex output = Shader::Run(shader_unit, immediate_input, regs.vs.num_input_attributes+1); 150 if (g_debug_context)
151 g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, static_cast<void*>(&immediate_input));
152 Shader::OutputVertex output = g_state.vs.Run(shader_unit, immediate_input, regs.vs.num_input_attributes+1);
150 153
151 // Send to renderer 154 // Send to renderer
152 using Pica::Shader::OutputVertex; 155 using Pica::Shader::OutputVertex;
@@ -186,60 +189,19 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
186 case PICA_REG_INDEX(trigger_draw): 189 case PICA_REG_INDEX(trigger_draw):
187 case PICA_REG_INDEX(trigger_draw_indexed): 190 case PICA_REG_INDEX(trigger_draw_indexed):
188 { 191 {
189 Common::Profiling::ScopeTimer scope_timer(category_drawing);
190 MICROPROFILE_SCOPE(GPU_Drawing); 192 MICROPROFILE_SCOPE(GPU_Drawing);
191 193
192#if PICA_LOG_TEV 194#if PICA_LOG_TEV
193 DebugUtils::DumpTevStageConfig(regs.GetTevStages()); 195 DebugUtils::DumpTevStageConfig(regs.GetTevStages());
194#endif 196#endif
195
196 if (g_debug_context) 197 if (g_debug_context)
197 g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr); 198 g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr);
198 199
199 const auto& attribute_config = regs.vertex_attributes; 200 // Processes information about internal vertex attributes to figure out how a vertex is loaded.
200 const u32 base_address = attribute_config.GetPhysicalBaseAddress(); 201 // Later, these can be compiled and cached.
201 202 VertexLoader loader;
202 // Information about internal vertex attributes 203 const u32 base_address = regs.vertex_attributes.GetPhysicalBaseAddress();
203 u32 vertex_attribute_sources[16]; 204 loader.Setup(regs);
204 boost::fill(vertex_attribute_sources, 0xdeadbeef);
205 u32 vertex_attribute_strides[16] = {};
206 Regs::VertexAttributeFormat vertex_attribute_formats[16] = {};
207
208 u32 vertex_attribute_elements[16] = {};
209 u32 vertex_attribute_element_size[16] = {};
210
211 // Setup attribute data from loaders
212 for (int loader = 0; loader < 12; ++loader) {
213 const auto& loader_config = attribute_config.attribute_loaders[loader];
214
215 u32 offset = 0;
216
217 // TODO: What happens if a loader overwrites a previous one's data?
218 for (unsigned component = 0; component < loader_config.component_count; ++component) {
219 if (component >= 12) {
220 LOG_ERROR(HW_GPU, "Overflow in the vertex attribute loader %u trying to load component %u", loader, component);
221 continue;
222 }
223
224 u32 attribute_index = loader_config.GetComponent(component);
225 if (attribute_index < 12) {
226 int element_size = attribute_config.GetElementSizeInBytes(attribute_index);
227 offset = Common::AlignUp(offset, element_size);
228 vertex_attribute_sources[attribute_index] = base_address + loader_config.data_offset + offset;
229 vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count);
230 vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index);
231 vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index);
232 vertex_attribute_element_size[attribute_index] = element_size;
233 offset += attribute_config.GetStride(attribute_index);
234 } else if (attribute_index < 16) {
235 // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively
236 offset = Common::AlignUp(offset, 4);
237 offset += (attribute_index - 11) * 4;
238 } else {
239 UNREACHABLE(); // This is truly unreachable due to the number of bits for each component
240 }
241 }
242 }
243 205
244 // Load vertices 206 // Load vertices
245 bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed)); 207 bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed));
@@ -263,32 +225,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
263 } 225 }
264 } 226 }
265 227
266 class { 228 DebugUtils::MemoryAccessTracker memory_accesses;
267 /// Combine overlapping and close ranges
268 void SimplifyRanges() {
269 for (auto it = ranges.begin(); it != ranges.end(); ++it) {
270 // NOTE: We add 32 to the range end address to make sure "close" ranges are combined, too
271 auto it2 = std::next(it);
272 while (it2 != ranges.end() && it->first + it->second + 32 >= it2->first) {
273 it->second = std::max(it->second, it2->first + it2->second - it->first);
274 it2 = ranges.erase(it2);
275 }
276 }
277 }
278
279 public:
280 /// Record a particular memory access in the list
281 void AddAccess(u32 paddr, u32 size) {
282 // Create new range or extend existing one
283 ranges[paddr] = std::max(ranges[paddr], size);
284
285 // Simplify ranges...
286 SimplifyRanges();
287 }
288
289 /// Map of accessed ranges (mapping start address to range size)
290 std::map<u32, u32> ranges;
291 } memory_accesses;
292 229
293 // Simple circular-replacement vertex cache 230 // Simple circular-replacement vertex cache
294 // The size has been tuned for optimal balance between hit-rate and the cost of lookup 231 // The size has been tuned for optimal balance between hit-rate and the cost of lookup
@@ -300,7 +237,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
300 vertex_cache_ids.fill(-1); 237 vertex_cache_ids.fill(-1);
301 238
302 Shader::UnitState<false> shader_unit; 239 Shader::UnitState<false> shader_unit;
303 Shader::Setup(); 240 g_state.vs.Setup();
304 241
305 for (unsigned int index = 0; index < regs.num_vertices; ++index) 242 for (unsigned int index = 0; index < regs.num_vertices; ++index)
306 { 243 {
@@ -332,60 +269,12 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
332 if (!vertex_cache_hit) { 269 if (!vertex_cache_hit) {
333 // Initialize data for the current vertex 270 // Initialize data for the current vertex
334 Shader::InputVertex input; 271 Shader::InputVertex input;
335 272 loader.LoadVertex(base_address, index, vertex, input, memory_accesses);
336 for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) {
337 if (vertex_attribute_elements[i] != 0) {
338 // Default attribute values set if array elements have < 4 components. This
339 // is *not* carried over from the default attribute settings even if they're
340 // enabled for this attribute.
341 static const float24 zero = float24::FromFloat32(0.0f);
342 static const float24 one = float24::FromFloat32(1.0f);
343 input.attr[i] = Math::Vec4<float24>(zero, zero, zero, one);
344
345 // Load per-vertex data from the loader arrays
346 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
347 u32 source_addr = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i];
348 const u8* srcdata = Memory::GetPhysicalPointer(source_addr);
349
350 if (g_debug_context && Pica::g_debug_context->recorder) {
351 memory_accesses.AddAccess(source_addr,
352 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4
353 : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1);
354 }
355
356 const float srcval =
357 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE) ? *reinterpret_cast<const s8*>(srcdata) :
358 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? *reinterpret_cast<const u8*>(srcdata) :
359 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? *reinterpret_cast<const s16*>(srcdata) :
360 *reinterpret_cast<const float*>(srcdata);
361
362 input.attr[i][comp] = float24::FromFloat32(srcval);
363 LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f",
364 comp, i, vertex, index,
365 attribute_config.GetPhysicalBaseAddress(),
366 vertex_attribute_sources[i] - base_address,
367 vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i],
368 input.attr[i][comp].ToFloat32());
369 }
370 } else if (attribute_config.IsDefaultAttribute(i)) {
371 // Load the default attribute if we're configured to do so
372 input.attr[i] = g_state.vs.default_attributes[i];
373 LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)",
374 i, vertex, index,
375 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(),
376 input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
377 } else {
378 // TODO(yuriks): In this case, no data gets loaded and the vertex
379 // remains with the last value it had. This isn't currently maintained
380 // as global state, however, and so won't work in Citra yet.
381 }
382 }
383
384 if (g_debug_context)
385 g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input);
386 273
387 // Send to vertex shader 274 // Send to vertex shader
388 output = Shader::Run(shader_unit, input, attribute_config.GetNumTotalAttributes()); 275 if (g_debug_context)
276 g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, (void*)&input);
277 output = g_state.vs.Run(shader_unit, input, loader.GetNumTotalAttributes());
389 278
390 if (is_indexed) { 279 if (is_indexed) {
391 vertex_cache[vertex_cache_pos] = output; 280 vertex_cache[vertex_cache_pos] = output;
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index c3a9c9598..2f645b441 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -4,35 +4,41 @@
4 4
5#include <algorithm> 5#include <algorithm>
6#include <condition_variable> 6#include <condition_variable>
7#include <cstdint>
7#include <cstring> 8#include <cstring>
8#include <fstream> 9#include <fstream>
9#include <list>
10#include <map> 10#include <map>
11#include <mutex> 11#include <mutex>
12#include <stdexcept>
12#include <string> 13#include <string>
13 14
14#ifdef HAVE_PNG 15#ifdef HAVE_PNG
15#include <png.h> 16#include <png.h>
17#include <setjmp.h>
16#endif 18#endif
17 19
20#include <nihstro/bit_field.h>
18#include <nihstro/float24.h> 21#include <nihstro/float24.h>
19#include <nihstro/shader_binary.h> 22#include <nihstro/shader_binary.h>
20 23
21#include "common/assert.h" 24#include "common/assert.h"
25#include "common/bit_field.h"
22#include "common/color.h" 26#include "common/color.h"
23#include "common/common_types.h" 27#include "common/common_types.h"
24#include "common/file_util.h" 28#include "common/file_util.h"
29#include "common/logging/log.h"
25#include "common/math_util.h" 30#include "common/math_util.h"
26#include "common/vector_math.h" 31#include "common/vector_math.h"
27 32
28#include "core/settings.h" 33#include "video_core/debug_utils/debug_utils.h"
29
30#include "video_core/pica.h" 34#include "video_core/pica.h"
31#include "video_core/pica_state.h" 35#include "video_core/pica_state.h"
36#include "video_core/pica_types.h"
37#include "video_core/rasterizer_interface.h"
32#include "video_core/renderer_base.h" 38#include "video_core/renderer_base.h"
39#include "video_core/shader/shader.h"
33#include "video_core/utils.h" 40#include "video_core/utils.h"
34#include "video_core/video_core.h" 41#include "video_core/video_core.h"
35#include "video_core/debug_utils/debug_utils.h"
36 42
37using nihstro::DVLBHeader; 43using nihstro::DVLBHeader;
38using nihstro::DVLEHeader; 44using nihstro::DVLEHeader;
@@ -40,15 +46,12 @@ using nihstro::DVLPHeader;
40 46
41namespace Pica { 47namespace Pica {
42 48
43void DebugContext::OnEvent(Event event, void* data) { 49void DebugContext::DoOnEvent(Event event, void* data) {
44 if (!breakpoints[event].enabled)
45 return;
46
47 { 50 {
48 std::unique_lock<std::mutex> lock(breakpoint_mutex); 51 std::unique_lock<std::mutex> lock(breakpoint_mutex);
49 52
50 // Commit the hardware renderer's framebuffer so it will show on debug widgets 53 // Commit the rasterizer's caches so framebuffers, render targets, etc. will show on debug widgets
51 VideoCore::g_renderer->Rasterizer()->FlushFramebuffer(); 54 VideoCore::g_renderer->Rasterizer()->FlushAll();
52 55
53 // TODO: Should stop the CPU thread here once we multithread emulation. 56 // TODO: Should stop the CPU thread here once we multithread emulation.
54 57
@@ -205,11 +208,12 @@ void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, c
205 208
206 // TODO: Reduce the amount of binary code written to relevant portions 209 // TODO: Reduce the amount of binary code written to relevant portions
207 dvlp.binary_offset = write_offset - dvlp_offset; 210 dvlp.binary_offset = write_offset - dvlp_offset;
208 dvlp.binary_size_words = setup.program_code.size(); 211 dvlp.binary_size_words = static_cast<uint32_t>(setup.program_code.size());
209 QueueForWriting(reinterpret_cast<const u8*>(setup.program_code.data()), setup.program_code.size() * sizeof(u32)); 212 QueueForWriting(reinterpret_cast<const u8*>(setup.program_code.data()),
213 static_cast<u32>(setup.program_code.size()) * sizeof(u32));
210 214
211 dvlp.swizzle_info_offset = write_offset - dvlp_offset; 215 dvlp.swizzle_info_offset = write_offset - dvlp_offset;
212 dvlp.swizzle_info_num_entries = setup.swizzle_data.size(); 216 dvlp.swizzle_info_num_entries = static_cast<uint32_t>(setup.swizzle_data.size());
213 u32 dummy = 0; 217 u32 dummy = 0;
214 for (unsigned int i = 0; i < setup.swizzle_data.size(); ++i) { 218 for (unsigned int i = 0; i < setup.swizzle_data.size(); ++i) {
215 QueueForWriting(reinterpret_cast<const u8*>(&setup.swizzle_data[i]), sizeof(setup.swizzle_data[i])); 219 QueueForWriting(reinterpret_cast<const u8*>(&setup.swizzle_data[i]), sizeof(setup.swizzle_data[i]));
@@ -261,7 +265,7 @@ void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, c
261 constant_table.emplace_back(constant); 265 constant_table.emplace_back(constant);
262 } 266 }
263 dvle.constant_table_offset = write_offset - dvlb.dvle_offset; 267 dvle.constant_table_offset = write_offset - dvlb.dvle_offset;
264 dvle.constant_table_size = constant_table.size(); 268 dvle.constant_table_size = static_cast<uint32_t>(constant_table.size());
265 for (const auto& constant : constant_table) { 269 for (const auto& constant : constant_table) {
266 QueueForWriting(reinterpret_cast<const u8*>(&constant), sizeof(constant)); 270 QueueForWriting(reinterpret_cast<const u8*>(&constant), sizeof(constant));
267 } 271 }
diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h
index 7df941619..f628292a4 100644
--- a/src/video_core/debug_utils/debug_utils.h
+++ b/src/video_core/debug_utils/debug_utils.h
@@ -4,23 +4,33 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <algorithm>
7#include <array> 8#include <array>
8#include <condition_variable> 9#include <condition_variable>
10#include <iterator>
9#include <list> 11#include <list>
10#include <map> 12#include <map>
11#include <memory> 13#include <memory>
12#include <mutex> 14#include <mutex>
15#include <string>
16#include <utility>
13#include <vector> 17#include <vector>
14 18
19#include "common/common_types.h"
15#include "common/vector_math.h" 20#include "common/vector_math.h"
16 21
17#include "core/tracer/recorder.h"
18
19#include "video_core/pica.h" 22#include "video_core/pica.h"
20#include "video_core/shader/shader.h" 23
24namespace CiTrace {
25class Recorder;
26}
21 27
22namespace Pica { 28namespace Pica {
23 29
30namespace Shader {
31struct ShaderSetup;
32}
33
24class DebugContext { 34class DebugContext {
25public: 35public:
26 enum class Event { 36 enum class Event {
@@ -30,7 +40,7 @@ public:
30 PicaCommandProcessed, 40 PicaCommandProcessed,
31 IncomingPrimitiveBatch, 41 IncomingPrimitiveBatch,
32 FinishedPrimitiveBatch, 42 FinishedPrimitiveBatch,
33 VertexLoaded, 43 VertexShaderInvocation,
34 IncomingDisplayTransfer, 44 IncomingDisplayTransfer,
35 GSPCommandProcessed, 45 GSPCommandProcessed,
36 BufferSwapped, 46 BufferSwapped,
@@ -114,7 +124,15 @@ public:
114 * @param event Event which has happened 124 * @param event Event which has happened
115 * @param data Optional data pointer (pass nullptr if unused). Needs to remain valid until Resume() is called. 125 * @param data Optional data pointer (pass nullptr if unused). Needs to remain valid until Resume() is called.
116 */ 126 */
117 void OnEvent(Event event, void* data); 127 void OnEvent(Event event, void* data) {
128 // This check is left in the header to allow the compiler to inline it.
129 if (!breakpoints[(int)event].enabled)
130 return;
131 // For the rest of event handling, call a separate function.
132 DoOnEvent(event, data);
133 }
134
135 void DoOnEvent(Event event, void *data);
118 136
119 /** 137 /**
120 * Resume from the current breakpoint. 138 * Resume from the current breakpoint.
@@ -126,12 +144,14 @@ public:
126 * Delete all set breakpoints and resume emulation. 144 * Delete all set breakpoints and resume emulation.
127 */ 145 */
128 void ClearBreakpoints() { 146 void ClearBreakpoints() {
129 breakpoints.clear(); 147 for (auto &bp : breakpoints) {
148 bp.enabled = false;
149 }
130 Resume(); 150 Resume();
131 } 151 }
132 152
133 // TODO: Evaluate if access to these members should be hidden behind a public interface. 153 // TODO: Evaluate if access to these members should be hidden behind a public interface.
134 std::map<Event, BreakPoint> breakpoints; 154 std::array<BreakPoint, (int)Event::NumEvents> breakpoints;
135 Event active_breakpoint; 155 Event active_breakpoint;
136 bool at_breakpoint = false; 156 bool at_breakpoint = false;
137 157
@@ -206,6 +226,36 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data);
206 226
207void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages); 227void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages);
208 228
229/**
230 * Used in the vertex loader to merge access records. TODO: Investigate if actually useful.
231 */
232class MemoryAccessTracker {
233 /// Combine overlapping and close ranges
234 void SimplifyRanges() {
235 for (auto it = ranges.begin(); it != ranges.end(); ++it) {
236 // NOTE: We add 32 to the range end address to make sure "close" ranges are combined, too
237 auto it2 = std::next(it);
238 while (it2 != ranges.end() && it->first + it->second + 32 >= it2->first) {
239 it->second = std::max(it->second, it2->first + it2->second - it->first);
240 it2 = ranges.erase(it2);
241 }
242 }
243 }
244
245public:
246 /// Record a particular memory access in the list
247 void AddAccess(u32 paddr, u32 size) {
248 // Create new range or extend existing one
249 ranges[paddr] = std::max(ranges[paddr], size);
250
251 // Simplify ranges...
252 SimplifyRanges();
253 }
254
255 /// Map of accessed ranges (mapping start address to range size)
256 std::map<u32, u32> ranges;
257};
258
209} // namespace 259} // namespace
210 260
211} // namespace 261} // namespace
diff --git a/src/video_core/pica.cpp b/src/video_core/pica.cpp
index ccbaf071b..ec78f9593 100644
--- a/src/video_core/pica.cpp
+++ b/src/video_core/pica.cpp
@@ -3,10 +3,13 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cstring> 5#include <cstring>
6#include <iterator>
6#include <unordered_map> 7#include <unordered_map>
8#include <utility>
7 9
8#include "video_core/pica.h" 10#include "video_core/pica.h"
9#include "video_core/pica_state.h" 11#include "video_core/pica_state.h"
12#include "video_core/primitive_assembly.h"
10#include "video_core/shader/shader.h" 13#include "video_core/shader/shader.h"
11 14
12namespace Pica { 15namespace Pica {
@@ -480,7 +483,7 @@ std::string Regs::GetCommandName(int index) {
480 static std::unordered_map<u32, const char*> map; 483 static std::unordered_map<u32, const char*> map;
481 484
482 if (map.empty()) { 485 if (map.empty()) {
483 map.insert(begin(register_names), end(register_names)); 486 map.insert(std::begin(register_names), std::end(register_names));
484 } 487 }
485 488
486 // Return empty string if no match is found 489 // Return empty string if no match is found
@@ -497,7 +500,7 @@ void Init() {
497} 500}
498 501
499void Shutdown() { 502void Shutdown() {
500 Shader::Shutdown(); 503 Shader::ClearCache();
501} 504}
502 505
503template <typename T> 506template <typename T>
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 4552ff81c..86c0a0096 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -5,10 +5,13 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <cmath>
9#include <cstddef> 8#include <cstddef>
10#include <string> 9#include <string>
11 10
11#ifndef _MSC_VER
12#include <type_traits> // for std::enable_if
13#endif
14
12#include "common/assert.h" 15#include "common/assert.h"
13#include "common/bit_field.h" 16#include "common/bit_field.h"
14#include "common/common_funcs.h" 17#include "common/common_funcs.h"
@@ -16,8 +19,6 @@
16#include "common/vector_math.h" 19#include "common/vector_math.h"
17#include "common/logging/log.h" 20#include "common/logging/log.h"
18 21
19#include "pica_types.h"
20
21namespace Pica { 22namespace Pica {
22 23
23// Returns index corresponding to the Regs member labeled by field_name 24// Returns index corresponding to the Regs member labeled by field_name
@@ -69,7 +70,7 @@ struct Regs {
69 INSERT_PADDING_WORDS(0x9); 70 INSERT_PADDING_WORDS(0x9);
70 71
71 BitField<0, 24, u32> viewport_depth_range; // float24 72 BitField<0, 24, u32> viewport_depth_range; // float24
72 BitField<0, 24, u32> viewport_depth_far_plane; // float24 73 BitField<0, 24, u32> viewport_depth_near_plane; // float24
73 74
74 BitField<0, 3, u32> vs_output_total; 75 BitField<0, 3, u32> vs_output_total;
75 76
@@ -121,9 +122,31 @@ struct Regs {
121 BitField<16, 10, s32> y; 122 BitField<16, 10, s32> y;
122 } viewport_corner; 123 } viewport_corner;
123 124
124 INSERT_PADDING_WORDS(0x17); 125 INSERT_PADDING_WORDS(0x1);
126
127 //TODO: early depth
128 INSERT_PADDING_WORDS(0x1);
129
130 INSERT_PADDING_WORDS(0x2);
131
132 enum DepthBuffering : u32 {
133 WBuffering = 0,
134 ZBuffering = 1,
135 };
136 BitField< 0, 1, DepthBuffering> depthmap_enable;
137
138 INSERT_PADDING_WORDS(0x12);
125 139
126 struct TextureConfig { 140 struct TextureConfig {
141 enum TextureType : u32 {
142 Texture2D = 0,
143 TextureCube = 1,
144 Shadow2D = 2,
145 Projection2D = 3,
146 ShadowCube = 4,
147 Disabled = 5,
148 };
149
127 enum WrapMode : u32 { 150 enum WrapMode : u32 {
128 ClampToEdge = 0, 151 ClampToEdge = 0,
129 ClampToBorder = 1, 152 ClampToBorder = 1,
@@ -154,6 +177,7 @@ struct Regs {
154 BitField< 2, 1, TextureFilter> min_filter; 177 BitField< 2, 1, TextureFilter> min_filter;
155 BitField< 8, 2, WrapMode> wrap_t; 178 BitField< 8, 2, WrapMode> wrap_t;
156 BitField<12, 2, WrapMode> wrap_s; 179 BitField<12, 2, WrapMode> wrap_s;
180 BitField<28, 2, TextureType> type; ///< @note Only valid for texture 0 according to 3DBrew.
157 }; 181 };
158 182
159 INSERT_PADDING_WORDS(0x1); 183 INSERT_PADDING_WORDS(0x1);
@@ -577,7 +601,7 @@ struct Regs {
577 } 601 }
578 } 602 }
579 603
580 struct { 604 struct FramebufferConfig {
581 INSERT_PADDING_WORDS(0x3); 605 INSERT_PADDING_WORDS(0x3);
582 606
583 union { 607 union {
@@ -747,8 +771,13 @@ struct Regs {
747 case LightingSampler::ReflectGreen: 771 case LightingSampler::ReflectGreen:
748 case LightingSampler::ReflectBlue: 772 case LightingSampler::ReflectBlue:
749 return (config == LightingConfig::Config4) || (config == LightingConfig::Config5) || (config == LightingConfig::Config7); 773 return (config == LightingConfig::Config4) || (config == LightingConfig::Config5) || (config == LightingConfig::Config7);
774 default:
775 UNREACHABLE_MSG("Regs::IsLightingSamplerSupported: Reached "
776 "unreachable section, sampler should be one "
777 "of Distribution0, Distribution1, Fresnel, "
778 "ReflectRed, ReflectGreen or ReflectBlue, instead "
779 "got %i", static_cast<int>(config));
750 } 780 }
751 return false;
752 } 781 }
753 782
754 struct { 783 struct {
@@ -1273,10 +1302,11 @@ ASSERT_REG_POSITION(cull_mode, 0x40);
1273ASSERT_REG_POSITION(viewport_size_x, 0x41); 1302ASSERT_REG_POSITION(viewport_size_x, 0x41);
1274ASSERT_REG_POSITION(viewport_size_y, 0x43); 1303ASSERT_REG_POSITION(viewport_size_y, 0x43);
1275ASSERT_REG_POSITION(viewport_depth_range, 0x4d); 1304ASSERT_REG_POSITION(viewport_depth_range, 0x4d);
1276ASSERT_REG_POSITION(viewport_depth_far_plane, 0x4e); 1305ASSERT_REG_POSITION(viewport_depth_near_plane, 0x4e);
1277ASSERT_REG_POSITION(vs_output_attributes[0], 0x50); 1306ASSERT_REG_POSITION(vs_output_attributes[0], 0x50);
1278ASSERT_REG_POSITION(vs_output_attributes[1], 0x51); 1307ASSERT_REG_POSITION(vs_output_attributes[1], 0x51);
1279ASSERT_REG_POSITION(viewport_corner, 0x68); 1308ASSERT_REG_POSITION(viewport_corner, 0x68);
1309ASSERT_REG_POSITION(depthmap_enable, 0x6D);
1280ASSERT_REG_POSITION(texture0_enable, 0x80); 1310ASSERT_REG_POSITION(texture0_enable, 0x80);
1281ASSERT_REG_POSITION(texture0, 0x81); 1311ASSERT_REG_POSITION(texture0, 0x81);
1282ASSERT_REG_POSITION(texture0_format, 0x8e); 1312ASSERT_REG_POSITION(texture0_format, 0x8e);
diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h
index 323290054..495174c25 100644
--- a/src/video_core/pica_state.h
+++ b/src/video_core/pica_state.h
@@ -4,6 +4,11 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8
9#include "common/bit_field.h"
10#include "common/common_types.h"
11
7#include "video_core/pica.h" 12#include "video_core/pica.h"
8#include "video_core/primitive_assembly.h" 13#include "video_core/primitive_assembly.h"
9#include "video_core/shader/shader.h" 14#include "video_core/shader/shader.h"
@@ -20,6 +25,8 @@ struct State {
20 Shader::ShaderSetup vs; 25 Shader::ShaderSetup vs;
21 Shader::ShaderSetup gs; 26 Shader::ShaderSetup gs;
22 27
28 std::array<Math::Vec4<float24>, 16> vs_default_attributes;
29
23 struct { 30 struct {
24 union LutEntry { 31 union LutEntry {
25 // Used for raw access 32 // Used for raw access
@@ -51,7 +58,7 @@ struct State {
51 // Used to buffer partial vertices for immediate-mode rendering. 58 // Used to buffer partial vertices for immediate-mode rendering.
52 Shader::InputVertex input_vertex; 59 Shader::InputVertex input_vertex;
53 // Index of the next attribute to be loaded into `input_vertex`. 60 // Index of the next attribute to be loaded into `input_vertex`.
54 int current_attribute = 0; 61 u32 current_attribute = 0;
55 } immediate; 62 } immediate;
56 63
57 // This is constructed with a dummy triangle topology 64 // This is constructed with a dummy triangle topology
diff --git a/src/video_core/pica_types.h b/src/video_core/pica_types.h
index ecf45654b..3b7bfbdca 100644
--- a/src/video_core/pica_types.h
+++ b/src/video_core/pica_types.h
@@ -4,6 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <cmath>
7#include <cstring> 8#include <cstring>
8 9
9#include "common/common_types.h" 10#include "common/common_types.h"
diff --git a/src/video_core/primitive_assembly.cpp b/src/video_core/primitive_assembly.cpp
index ff3e2b862..68ea3c08a 100644
--- a/src/video_core/primitive_assembly.cpp
+++ b/src/video_core/primitive_assembly.cpp
@@ -6,8 +6,7 @@
6 6
7#include "video_core/pica.h" 7#include "video_core/pica.h"
8#include "video_core/primitive_assembly.h" 8#include "video_core/primitive_assembly.h"
9#include "video_core/debug_utils/debug_utils.h" 9#include "video_core/shader/shader.h"
10#include "video_core/shader/shader_interpreter.h"
11 10
12namespace Pica { 11namespace Pica {
13 12
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 5b9ed7c64..65168f05a 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -3,23 +3,28 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include <algorithm>
6#include <array>
6#include <cmath> 7#include <cmath>
7 8
9#include "common/assert.h"
10#include "common/bit_field.h"
8#include "common/color.h" 11#include "common/color.h"
9#include "common/common_types.h" 12#include "common/common_types.h"
13#include "common/logging/log.h"
10#include "common/math_util.h" 14#include "common/math_util.h"
11#include "common/microprofile.h" 15#include "common/microprofile.h"
12#include "common/profiler.h" 16#include "common/vector_math.h"
13 17
14#include "core/memory.h" 18#include "core/memory.h"
15#include "core/hw/gpu.h" 19#include "core/hw/gpu.h"
16 20
21#include "video_core/debug_utils/debug_utils.h"
17#include "video_core/pica.h" 22#include "video_core/pica.h"
18#include "video_core/pica_state.h" 23#include "video_core/pica_state.h"
24#include "video_core/pica_types.h"
19#include "video_core/rasterizer.h" 25#include "video_core/rasterizer.h"
20#include "video_core/utils.h" 26#include "video_core/utils.h"
21#include "video_core/debug_utils/debug_utils.h" 27#include "video_core/shader/shader.h"
22#include "video_core/shader/shader_interpreter.h"
23 28
24namespace Pica { 29namespace Pica {
25 30
@@ -287,7 +292,6 @@ static int SignedArea (const Math::Vec2<Fix12P4>& vtx1,
287 return Math::Cross(vec1, vec2).z; 292 return Math::Cross(vec1, vec2).z;
288}; 293};
289 294
290static Common::Profiling::TimingCategory rasterization_category("Rasterization");
291MICROPROFILE_DEFINE(GPU_Rasterization, "GPU", "Rasterization", MP_RGB(50, 50, 240)); 295MICROPROFILE_DEFINE(GPU_Rasterization, "GPU", "Rasterization", MP_RGB(50, 50, 240));
292 296
293/** 297/**
@@ -300,7 +304,6 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
300 bool reversed = false) 304 bool reversed = false)
301{ 305{
302 const auto& regs = g_state.regs; 306 const auto& regs = g_state.regs;
303 Common::Profiling::ScopeTimer timer(rasterization_category);
304 MICROPROFILE_SCOPE(GPU_Rasterization); 307 MICROPROFILE_SCOPE(GPU_Rasterization);
305 308
306 // vertex positions in rasterizer coordinates 309 // vertex positions in rasterizer coordinates
@@ -439,8 +442,33 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
439 442
440 DEBUG_ASSERT(0 != texture.config.address); 443 DEBUG_ASSERT(0 != texture.config.address);
441 444
442 int s = (int)(uv[i].u() * float24::FromFloat32(static_cast<float>(texture.config.width))).ToFloat32(); 445 float24 u = uv[i].u();
443 int t = (int)(uv[i].v() * float24::FromFloat32(static_cast<float>(texture.config.height))).ToFloat32(); 446 float24 v = uv[i].v();
447
448 // Only unit 0 respects the texturing type (according to 3DBrew)
449 // TODO: Refactor so cubemaps and shadowmaps can be handled
450 if (i == 0) {
451 switch(texture.config.type) {
452 case Regs::TextureConfig::Texture2D:
453 break;
454 case Regs::TextureConfig::Projection2D: {
455 auto tc0_w = GetInterpolatedAttribute(v0.tc0_w, v1.tc0_w, v2.tc0_w);
456 u /= tc0_w;
457 v /= tc0_w;
458 break;
459 }
460 default:
461 // TODO: Change to LOG_ERROR when more types are handled.
462 LOG_DEBUG(HW_GPU, "Unhandled texture type %x", (int)texture.config.type);
463 UNIMPLEMENTED();
464 break;
465 }
466 }
467
468 int s = (int)(u * float24::FromFloat32(static_cast<float>(texture.config.width))).ToFloat32();
469 int t = (int)(v * float24::FromFloat32(static_cast<float>(texture.config.height))).ToFloat32();
470
471
444 static auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val, unsigned size) { 472 static auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val, unsigned size) {
445 switch (mode) { 473 switch (mode) {
446 case Regs::TextureConfig::ClampToEdge: 474 case Regs::TextureConfig::ClampToEdge:
@@ -859,10 +887,30 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
859 } 887 }
860 } 888 }
861 889
890 // interpolated_z = z / w
891 float interpolated_z_over_w = (v0.screenpos[2].ToFloat32() * w0 +
892 v1.screenpos[2].ToFloat32() * w1 +
893 v2.screenpos[2].ToFloat32() * w2) / wsum;
894
895 // Not fully accurate. About 3 bits in precision are missing.
896 // Z-Buffer (z / w * scale + offset)
897 float depth_scale = float24::FromRaw(regs.viewport_depth_range).ToFloat32();
898 float depth_offset = float24::FromRaw(regs.viewport_depth_near_plane).ToFloat32();
899 float depth = interpolated_z_over_w * depth_scale + depth_offset;
900
901 // Potentially switch to W-Buffer
902 if (regs.depthmap_enable == Pica::Regs::DepthBuffering::WBuffering) {
903
904 // W-Buffer (z * scale + w * offset = (z / w * scale + offset) * w)
905 depth *= interpolated_w_inverse.ToFloat32() * wsum;
906 }
907
908 // Clamp the result
909 depth = MathUtil::Clamp(depth, 0.0f, 1.0f);
910
911 // Convert float to integer
862 unsigned num_bits = Regs::DepthBitsPerPixel(regs.framebuffer.depth_format); 912 unsigned num_bits = Regs::DepthBitsPerPixel(regs.framebuffer.depth_format);
863 u32 z = (u32)((v0.screenpos[2].ToFloat32() * w0 + 913 u32 z = (u32)(depth * ((1 << num_bits) - 1));
864 v1.screenpos[2].ToFloat32() * w1 +
865 v2.screenpos[2].ToFloat32() * w2) * ((1 << num_bits) - 1) / wsum);
866 914
867 if (output_merger.depth_test_enable) { 915 if (output_merger.depth_test_enable) {
868 u32 ref_z = GetDepth(x >> 4, y >> 4); 916 u32 ref_z = GetDepth(x >> 4, y >> 4);
@@ -923,92 +971,72 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
923 if (output_merger.alphablend_enable) { 971 if (output_merger.alphablend_enable) {
924 auto params = output_merger.alpha_blending; 972 auto params = output_merger.alpha_blending;
925 973
926 auto LookupFactorRGB = [&](Regs::BlendFactor factor) -> Math::Vec3<u8> { 974 auto LookupFactor = [&](unsigned channel, Regs::BlendFactor factor) -> u8 {
975 DEBUG_ASSERT(channel < 4);
976
977 const Math::Vec4<u8> blend_const = {
978 static_cast<u8>(output_merger.blend_const.r),
979 static_cast<u8>(output_merger.blend_const.g),
980 static_cast<u8>(output_merger.blend_const.b),
981 static_cast<u8>(output_merger.blend_const.a)
982 };
983
927 switch (factor) { 984 switch (factor) {
928 case Regs::BlendFactor::Zero : 985 case Regs::BlendFactor::Zero:
929 return Math::Vec3<u8>(0, 0, 0); 986 return 0;
930 987
931 case Regs::BlendFactor::One : 988 case Regs::BlendFactor::One:
932 return Math::Vec3<u8>(255, 255, 255); 989 return 255;
933 990
934 case Regs::BlendFactor::SourceColor: 991 case Regs::BlendFactor::SourceColor:
935 return combiner_output.rgb(); 992 return combiner_output[channel];
936 993
937 case Regs::BlendFactor::OneMinusSourceColor: 994 case Regs::BlendFactor::OneMinusSourceColor:
938 return Math::Vec3<u8>(255 - combiner_output.r(), 255 - combiner_output.g(), 255 - combiner_output.b()); 995 return 255 - combiner_output[channel];
939 996
940 case Regs::BlendFactor::DestColor: 997 case Regs::BlendFactor::DestColor:
941 return dest.rgb(); 998 return dest[channel];
942 999
943 case Regs::BlendFactor::OneMinusDestColor: 1000 case Regs::BlendFactor::OneMinusDestColor:
944 return Math::Vec3<u8>(255 - dest.r(), 255 - dest.g(), 255 - dest.b()); 1001 return 255 - dest[channel];
945 1002
946 case Regs::BlendFactor::SourceAlpha: 1003 case Regs::BlendFactor::SourceAlpha:
947 return Math::Vec3<u8>(combiner_output.a(), combiner_output.a(), combiner_output.a()); 1004 return combiner_output.a();
948 1005
949 case Regs::BlendFactor::OneMinusSourceAlpha: 1006 case Regs::BlendFactor::OneMinusSourceAlpha:
950 return Math::Vec3<u8>(255 - combiner_output.a(), 255 - combiner_output.a(), 255 - combiner_output.a()); 1007 return 255 - combiner_output.a();
951 1008
952 case Regs::BlendFactor::DestAlpha: 1009 case Regs::BlendFactor::DestAlpha:
953 return Math::Vec3<u8>(dest.a(), dest.a(), dest.a()); 1010 return dest.a();
954 1011
955 case Regs::BlendFactor::OneMinusDestAlpha: 1012 case Regs::BlendFactor::OneMinusDestAlpha:
956 return Math::Vec3<u8>(255 - dest.a(), 255 - dest.a(), 255 - dest.a()); 1013 return 255 - dest.a();
957 1014
958 case Regs::BlendFactor::ConstantColor: 1015 case Regs::BlendFactor::ConstantColor:
959 return Math::Vec3<u8>(output_merger.blend_const.r, output_merger.blend_const.g, output_merger.blend_const.b); 1016 return blend_const[channel];
960 1017
961 case Regs::BlendFactor::OneMinusConstantColor: 1018 case Regs::BlendFactor::OneMinusConstantColor:
962 return Math::Vec3<u8>(255 - output_merger.blend_const.r, 255 - output_merger.blend_const.g, 255 - output_merger.blend_const.b); 1019 return 255 - blend_const[channel];
963 1020
964 case Regs::BlendFactor::ConstantAlpha: 1021 case Regs::BlendFactor::ConstantAlpha:
965 return Math::Vec3<u8>(output_merger.blend_const.a, output_merger.blend_const.a, output_merger.blend_const.a); 1022 return blend_const.a();
966 1023
967 case Regs::BlendFactor::OneMinusConstantAlpha: 1024 case Regs::BlendFactor::OneMinusConstantAlpha:
968 return Math::Vec3<u8>(255 - output_merger.blend_const.a, 255 - output_merger.blend_const.a, 255 - output_merger.blend_const.a); 1025 return 255 - blend_const.a();
969 1026
970 default: 1027 case Regs::BlendFactor::SourceAlphaSaturate:
971 LOG_CRITICAL(HW_GPU, "Unknown color blend factor %x", factor); 1028 // Returns 1.0 for the alpha channel
972 UNIMPLEMENTED(); 1029 if (channel == 3)
973 break; 1030 return 255;
974 } 1031 return std::min(combiner_output.a(), static_cast<u8>(255 - dest.a()));
975
976 return {};
977 };
978
979 auto LookupFactorA = [&](Regs::BlendFactor factor) -> u8 {
980 switch (factor) {
981 case Regs::BlendFactor::Zero:
982 return 0;
983
984 case Regs::BlendFactor::One:
985 return 255;
986
987 case Regs::BlendFactor::SourceAlpha:
988 return combiner_output.a();
989
990 case Regs::BlendFactor::OneMinusSourceAlpha:
991 return 255 - combiner_output.a();
992
993 case Regs::BlendFactor::DestAlpha:
994 return dest.a();
995
996 case Regs::BlendFactor::OneMinusDestAlpha:
997 return 255 - dest.a();
998
999 case Regs::BlendFactor::ConstantAlpha:
1000 return output_merger.blend_const.a;
1001
1002 case Regs::BlendFactor::OneMinusConstantAlpha:
1003 return 255 - output_merger.blend_const.a;
1004 1032
1005 default: 1033 default:
1006 LOG_CRITICAL(HW_GPU, "Unknown alpha blend factor %x", factor); 1034 LOG_CRITICAL(HW_GPU, "Unknown blend factor %x", factor);
1007 UNIMPLEMENTED(); 1035 UNIMPLEMENTED();
1008 break; 1036 break;
1009 } 1037 }
1010 1038
1011 return {}; 1039 return combiner_output[channel];
1012 }; 1040 };
1013 1041
1014 static auto EvaluateBlendEquation = [](const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor, 1042 static auto EvaluateBlendEquation = [](const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor,
@@ -1060,10 +1088,15 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
1060 MathUtil::Clamp(result.a(), 0, 255)); 1088 MathUtil::Clamp(result.a(), 0, 255));
1061 }; 1089 };
1062 1090
1063 auto srcfactor = Math::MakeVec(LookupFactorRGB(params.factor_source_rgb), 1091 auto srcfactor = Math::MakeVec(LookupFactor(0, params.factor_source_rgb),
1064 LookupFactorA(params.factor_source_a)); 1092 LookupFactor(1, params.factor_source_rgb),
1065 auto dstfactor = Math::MakeVec(LookupFactorRGB(params.factor_dest_rgb), 1093 LookupFactor(2, params.factor_source_rgb),
1066 LookupFactorA(params.factor_dest_a)); 1094 LookupFactor(3, params.factor_source_a));
1095
1096 auto dstfactor = Math::MakeVec(LookupFactor(0, params.factor_dest_rgb),
1097 LookupFactor(1, params.factor_dest_rgb),
1098 LookupFactor(2, params.factor_dest_rgb),
1099 LookupFactor(3, params.factor_dest_a));
1067 1100
1068 blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_rgb); 1101 blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_rgb);
1069 blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_a).a(); 1102 blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_a).a();
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 008c5827b..bf7101665 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -6,6 +6,10 @@
6 6
7#include "common/common_types.h" 7#include "common/common_types.h"
8 8
9#include "core/hw/gpu.h"
10
11struct ScreenInfo;
12
9namespace Pica { 13namespace Pica {
10namespace Shader { 14namespace Shader {
11struct OutputVertex; 15struct OutputVertex;
@@ -18,12 +22,6 @@ class RasterizerInterface {
18public: 22public:
19 virtual ~RasterizerInterface() {} 23 virtual ~RasterizerInterface() {}
20 24
21 /// Initialize API-specific GPU objects
22 virtual void InitObjects() = 0;
23
24 /// Reset the rasterizer, such as flushing all caches and updating all state
25 virtual void Reset() = 0;
26
27 /// Queues the primitive formed by the given vertices for rendering 25 /// Queues the primitive formed by the given vertices for rendering
28 virtual void AddTriangle(const Pica::Shader::OutputVertex& v0, 26 virtual void AddTriangle(const Pica::Shader::OutputVertex& v0,
29 const Pica::Shader::OutputVertex& v1, 27 const Pica::Shader::OutputVertex& v1,
@@ -32,17 +30,26 @@ public:
32 /// Draw the current batch of triangles 30 /// Draw the current batch of triangles
33 virtual void DrawTriangles() = 0; 31 virtual void DrawTriangles() = 0;
34 32
35 /// Commit the rasterizer's framebuffer contents immediately to the current 3DS memory framebuffer
36 virtual void FlushFramebuffer() = 0;
37
38 /// Notify rasterizer that the specified PICA register has been changed 33 /// Notify rasterizer that the specified PICA register has been changed
39 virtual void NotifyPicaRegisterChanged(u32 id) = 0; 34 virtual void NotifyPicaRegisterChanged(u32 id) = 0;
40 35
41 /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory. 36 /// Notify rasterizer that all caches should be flushed to 3DS memory
37 virtual void FlushAll() = 0;
38
39 /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory
42 virtual void FlushRegion(PAddr addr, u32 size) = 0; 40 virtual void FlushRegion(PAddr addr, u32 size) = 0;
43 41
44 /// Notify rasterizer that any caches of the specified region should be discraded and reloaded from 3DS memory. 42 /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory and invalidated
45 virtual void InvalidateRegion(PAddr addr, u32 size) = 0; 43 virtual void FlushAndInvalidateRegion(PAddr addr, u32 size) = 0;
44
45 /// Attempt to use a faster method to perform a display transfer
46 virtual bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { return false; }
47
48 /// Attempt to use a faster method to fill a region
49 virtual bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { return false; }
50
51 /// Attempt to use a faster method to display the framebuffer to screen
52 virtual bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) { return false; }
46}; 53};
47 54
48} 55}
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp
index 101f84eb9..3f451e062 100644
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -2,10 +2,9 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <atomic>
5#include <memory> 6#include <memory>
6 7
7#include "core/settings.h"
8
9#include "video_core/renderer_base.h" 8#include "video_core/renderer_base.h"
10#include "video_core/video_core.h" 9#include "video_core/video_core.h"
11#include "video_core/swrasterizer.h" 10#include "video_core/swrasterizer.h"
@@ -21,7 +20,5 @@ void RendererBase::RefreshRasterizerSetting() {
21 } else { 20 } else {
22 rasterizer = std::make_unique<VideoCore::SWRasterizer>(); 21 rasterizer = std::make_unique<VideoCore::SWRasterizer>();
23 } 22 }
24 rasterizer->InitObjects();
25 rasterizer->Reset();
26 } 23 }
27} 24}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 6ca9f45e2..ed2e2f3ae 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -2,28 +2,28 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cstring>
6#include <memory> 5#include <memory>
6#include <string>
7#include <tuple>
8#include <utility>
7 9
8#include <glad/glad.h> 10#include <glad/glad.h>
9 11
12#include "common/assert.h"
10#include "common/color.h" 13#include "common/color.h"
11#include "common/file_util.h" 14#include "common/logging/log.h"
12#include "common/math_util.h" 15#include "common/math_util.h"
13#include "common/microprofile.h" 16#include "common/vector_math.h"
14#include "common/profiler.h"
15 17
16#include "core/memory.h"
17#include "core/settings.h"
18#include "core/hw/gpu.h" 18#include "core/hw/gpu.h"
19 19
20#include "video_core/pica.h" 20#include "video_core/pica.h"
21#include "video_core/pica_state.h" 21#include "video_core/pica_state.h"
22#include "video_core/utils.h"
23#include "video_core/renderer_opengl/gl_rasterizer.h" 22#include "video_core/renderer_opengl/gl_rasterizer.h"
24#include "video_core/renderer_opengl/gl_shader_gen.h" 23#include "video_core/renderer_opengl/gl_shader_gen.h"
25#include "video_core/renderer_opengl/gl_shader_util.h" 24#include "video_core/renderer_opengl/gl_shader_util.h"
26#include "video_core/renderer_opengl/pica_to_gl.h" 25#include "video_core/renderer_opengl/pica_to_gl.h"
26#include "video_core/renderer_opengl/renderer_opengl.h"
27 27
28static bool IsPassThroughTevStage(const Pica::Regs::TevStageConfig& stage) { 28static bool IsPassThroughTevStage(const Pica::Regs::TevStageConfig& stage) {
29 return (stage.color_op == Pica::Regs::TevStageConfig::Operation::Replace && 29 return (stage.color_op == Pica::Regs::TevStageConfig::Operation::Replace &&
@@ -36,10 +36,7 @@ static bool IsPassThroughTevStage(const Pica::Regs::TevStageConfig& stage) {
36 stage.GetAlphaMultiplier() == 1); 36 stage.GetAlphaMultiplier() == 1);
37} 37}
38 38
39RasterizerOpenGL::RasterizerOpenGL() : cached_fb_color_addr(0), cached_fb_depth_addr(0) { } 39RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
40RasterizerOpenGL::~RasterizerOpenGL() { }
41
42void RasterizerOpenGL::InitObjects() {
43 // Create sampler objects 40 // Create sampler objects
44 for (size_t i = 0; i < texture_samplers.size(); ++i) { 41 for (size_t i = 0; i < texture_samplers.size(); ++i) {
45 texture_samplers[i].Create(); 42 texture_samplers[i].Create();
@@ -61,6 +58,10 @@ void RasterizerOpenGL::InitObjects() {
61 58
62 uniform_block_data.dirty = true; 59 uniform_block_data.dirty = true;
63 60
61 for (unsigned index = 0; index < lighting_luts.size(); index++) {
62 uniform_block_data.lut_dirty[index] = true;
63 }
64
64 // Set vertex attributes 65 // Set vertex attributes
65 glVertexAttribPointer(GLShader::ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position)); 66 glVertexAttribPointer(GLShader::ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position));
66 glEnableVertexAttribArray(GLShader::ATTRIBUTE_POSITION); 67 glEnableVertexAttribArray(GLShader::ATTRIBUTE_POSITION);
@@ -75,76 +76,33 @@ void RasterizerOpenGL::InitObjects() {
75 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD1); 76 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD1);
76 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD2); 77 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD2);
77 78
79 glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD0_W, 1, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord0_w));
80 glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD0_W);
81
78 glVertexAttribPointer(GLShader::ATTRIBUTE_NORMQUAT, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, normquat)); 82 glVertexAttribPointer(GLShader::ATTRIBUTE_NORMQUAT, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, normquat));
79 glEnableVertexAttribArray(GLShader::ATTRIBUTE_NORMQUAT); 83 glEnableVertexAttribArray(GLShader::ATTRIBUTE_NORMQUAT);
80 84
81 glVertexAttribPointer(GLShader::ATTRIBUTE_VIEW, 3, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, view)); 85 glVertexAttribPointer(GLShader::ATTRIBUTE_VIEW, 3, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, view));
82 glEnableVertexAttribArray(GLShader::ATTRIBUTE_VIEW); 86 glEnableVertexAttribArray(GLShader::ATTRIBUTE_VIEW);
83 87
84 SetShader(); 88 // Create render framebuffer
85
86 // Create textures for OGL framebuffer that will be rendered to, initially 1x1 to succeed in framebuffer creation
87 fb_color_texture.texture.Create();
88 ReconfigureColorTexture(fb_color_texture, Pica::Regs::ColorFormat::RGBA8, 1, 1);
89
90 state.texture_units[0].texture_2d = fb_color_texture.texture.handle;
91 state.Apply();
92
93 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
94 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
95 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
96 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
97 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
98
99 state.texture_units[0].texture_2d = 0;
100 state.Apply();
101
102 fb_depth_texture.texture.Create();
103 ReconfigureDepthTexture(fb_depth_texture, Pica::Regs::DepthFormat::D16, 1, 1);
104
105 state.texture_units[0].texture_2d = fb_depth_texture.texture.handle;
106 state.Apply();
107
108 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
109 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
110 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
111 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
112 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
113 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_FUNC, GL_LEQUAL);
114 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_MODE, GL_NONE);
115
116 state.texture_units[0].texture_2d = 0;
117 state.Apply();
118
119 // Configure OpenGL framebuffer
120 framebuffer.Create(); 89 framebuffer.Create();
121 90
122 state.draw.framebuffer = framebuffer.handle; 91 // Allocate and bind lighting lut textures
92 for (size_t i = 0; i < lighting_luts.size(); ++i) {
93 lighting_luts[i].Create();
94 state.lighting_luts[i].texture_1d = lighting_luts[i].handle;
95 }
123 state.Apply(); 96 state.Apply();
124 97
125 glActiveTexture(GL_TEXTURE0); 98 for (size_t i = 0; i < lighting_luts.size(); ++i) {
126 glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, fb_color_texture.texture.handle, 0); 99 glActiveTexture(static_cast<GLenum>(GL_TEXTURE3 + i));
127 glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, fb_depth_texture.texture.handle, 0);
128
129 for (size_t i = 0; i < lighting_lut.size(); ++i) {
130 lighting_lut[i].Create();
131 state.lighting_lut[i].texture_1d = lighting_lut[i].handle;
132
133 glActiveTexture(GL_TEXTURE3 + i);
134 glBindTexture(GL_TEXTURE_1D, state.lighting_lut[i].texture_1d);
135
136 glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr); 100 glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr);
137 glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); 101 glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
138 glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); 102 glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
139 } 103 }
140 state.Apply();
141 104
142 GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER); 105 // Sync fixed function OpenGL state
143 ASSERT_MSG(status == GL_FRAMEBUFFER_COMPLETE,
144 "OpenGL rasterizer framebuffer setup failed, status %X", status);
145}
146
147void RasterizerOpenGL::Reset() {
148 SyncCullMode(); 106 SyncCullMode();
149 SyncDepthModifiers(); 107 SyncDepthModifiers();
150 SyncBlendEnabled(); 108 SyncBlendEnabled();
@@ -156,10 +114,10 @@ void RasterizerOpenGL::Reset() {
156 SyncColorWriteMask(); 114 SyncColorWriteMask();
157 SyncStencilWriteMask(); 115 SyncStencilWriteMask();
158 SyncDepthWriteMask(); 116 SyncDepthWriteMask();
117}
159 118
160 SetShader(); 119RasterizerOpenGL::~RasterizerOpenGL() {
161 120
162 res_cache.InvalidateAll();
163} 121}
164 122
165/** 123/**
@@ -196,47 +154,98 @@ void RasterizerOpenGL::DrawTriangles() {
196 if (vertex_batch.empty()) 154 if (vertex_batch.empty())
197 return; 155 return;
198 156
199 SyncFramebuffer(); 157 const auto& regs = Pica::g_state.regs;
200 SyncDrawState(); 158
159 // Sync and bind the framebuffer surfaces
160 CachedSurface* color_surface;
161 CachedSurface* depth_surface;
162 MathUtil::Rectangle<int> rect;
163 std::tie(color_surface, depth_surface, rect) = res_cache.GetFramebufferSurfaces(regs.framebuffer);
201 164
202 if (state.draw.shader_dirty) { 165 state.draw.draw_framebuffer = framebuffer.handle;
166 state.Apply();
167
168 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, color_surface != nullptr ? color_surface->texture.handle : 0, 0);
169 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, depth_surface != nullptr ? depth_surface->texture.handle : 0, 0);
170 bool has_stencil = regs.framebuffer.depth_format == Pica::Regs::DepthFormat::D24S8;
171 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, (has_stencil && depth_surface != nullptr) ? depth_surface->texture.handle : 0, 0);
172
173 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
174 return;
175 }
176
177 // Sync the viewport
178 // These registers hold half-width and half-height, so must be multiplied by 2
179 GLsizei viewport_width = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_x).ToFloat32() * 2;
180 GLsizei viewport_height = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_y).ToFloat32() * 2;
181
182 glViewport((GLint)(rect.left + regs.viewport_corner.x * color_surface->res_scale_width),
183 (GLint)(rect.bottom + regs.viewport_corner.y * color_surface->res_scale_height),
184 (GLsizei)(viewport_width * color_surface->res_scale_width), (GLsizei)(viewport_height * color_surface->res_scale_height));
185
186 // Sync and bind the texture surfaces
187 const auto pica_textures = regs.GetTextures();
188 for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) {
189 const auto& texture = pica_textures[texture_index];
190
191 if (texture.enabled) {
192 texture_samplers[texture_index].SyncWithConfig(texture.config);
193 CachedSurface* surface = res_cache.GetTextureSurface(texture);
194 if (surface != nullptr) {
195 state.texture_units[texture_index].texture_2d = surface->texture.handle;
196 } else {
197 // Can occur when texture addr is null or its memory is unmapped/invalid
198 state.texture_units[texture_index].texture_2d = 0;
199 }
200 } else {
201 state.texture_units[texture_index].texture_2d = 0;
202 }
203 }
204
205 // Sync and bind the shader
206 if (shader_dirty) {
203 SetShader(); 207 SetShader();
204 state.draw.shader_dirty = false; 208 shader_dirty = false;
205 } 209 }
206 210
207 for (unsigned index = 0; index < lighting_lut.size(); index++) { 211 // Sync the lighting luts
212 for (unsigned index = 0; index < lighting_luts.size(); index++) {
208 if (uniform_block_data.lut_dirty[index]) { 213 if (uniform_block_data.lut_dirty[index]) {
209 SyncLightingLUT(index); 214 SyncLightingLUT(index);
210 uniform_block_data.lut_dirty[index] = false; 215 uniform_block_data.lut_dirty[index] = false;
211 } 216 }
212 } 217 }
213 218
219 // Sync the uniform data
214 if (uniform_block_data.dirty) { 220 if (uniform_block_data.dirty) {
215 glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data, GL_STATIC_DRAW); 221 glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data, GL_STATIC_DRAW);
216 uniform_block_data.dirty = false; 222 uniform_block_data.dirty = false;
217 } 223 }
218 224
225 state.Apply();
226
227 // Draw the vertex batch
219 glBufferData(GL_ARRAY_BUFFER, vertex_batch.size() * sizeof(HardwareVertex), vertex_batch.data(), GL_STREAM_DRAW); 228 glBufferData(GL_ARRAY_BUFFER, vertex_batch.size() * sizeof(HardwareVertex), vertex_batch.data(), GL_STREAM_DRAW);
220 glDrawArrays(GL_TRIANGLES, 0, (GLsizei)vertex_batch.size()); 229 glDrawArrays(GL_TRIANGLES, 0, (GLsizei)vertex_batch.size());
221 230
222 vertex_batch.clear(); 231 // Mark framebuffer surfaces as dirty
223 232 // TODO: Restrict invalidation area to the viewport
224 // Flush the resource cache at the current depth and color framebuffer addresses for render-to-texture 233 if (color_surface != nullptr) {
225 const auto& regs = Pica::g_state.regs; 234 color_surface->dirty = true;
226 235 res_cache.FlushRegion(color_surface->addr, color_surface->size, color_surface, true);
227 u32 cached_fb_color_size = Pica::Regs::BytesPerColorPixel(fb_color_texture.format) 236 }
228 * fb_color_texture.width * fb_color_texture.height; 237 if (depth_surface != nullptr) {
229 238 depth_surface->dirty = true;
230 u32 cached_fb_depth_size = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format) 239 res_cache.FlushRegion(depth_surface->addr, depth_surface->size, depth_surface, true);
231 * fb_depth_texture.width * fb_depth_texture.height; 240 }
232 241
233 res_cache.InvalidateInRange(cached_fb_color_addr, cached_fb_color_size, true); 242 vertex_batch.clear();
234 res_cache.InvalidateInRange(cached_fb_depth_addr, cached_fb_depth_size, true);
235}
236 243
237void RasterizerOpenGL::FlushFramebuffer() { 244 // Unbind textures for potential future use as framebuffer attachments
238 CommitColorBuffer(); 245 for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) {
239 CommitDepthBuffer(); 246 state.texture_units[texture_index].texture_2d = 0;
247 }
248 state.Apply();
240} 249}
241 250
242void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { 251void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
@@ -250,10 +259,15 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
250 259
251 // Depth modifiers 260 // Depth modifiers
252 case PICA_REG_INDEX(viewport_depth_range): 261 case PICA_REG_INDEX(viewport_depth_range):
253 case PICA_REG_INDEX(viewport_depth_far_plane): 262 case PICA_REG_INDEX(viewport_depth_near_plane):
254 SyncDepthModifiers(); 263 SyncDepthModifiers();
255 break; 264 break;
256 265
266 // Depth buffering
267 case PICA_REG_INDEX(depthmap_enable):
268 shader_dirty = true;
269 break;
270
257 // Blending 271 // Blending
258 case PICA_REG_INDEX(output_merger.alphablend_enable): 272 case PICA_REG_INDEX(output_merger.alphablend_enable):
259 SyncBlendEnabled(); 273 SyncBlendEnabled();
@@ -268,7 +282,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
268 // Alpha test 282 // Alpha test
269 case PICA_REG_INDEX(output_merger.alpha_test): 283 case PICA_REG_INDEX(output_merger.alpha_test):
270 SyncAlphaTest(); 284 SyncAlphaTest();
271 state.draw.shader_dirty = true; 285 shader_dirty = true;
272 break; 286 break;
273 287
274 // Sync GL stencil test + stencil write mask 288 // Sync GL stencil test + stencil write mask
@@ -308,6 +322,11 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
308 SyncLogicOp(); 322 SyncLogicOp();
309 break; 323 break;
310 324
325 // Texture 0 type
326 case PICA_REG_INDEX(texture0.type):
327 shader_dirty = true;
328 break;
329
311 // TEV stages 330 // TEV stages
312 case PICA_REG_INDEX(tev_stage0.color_source1): 331 case PICA_REG_INDEX(tev_stage0.color_source1):
313 case PICA_REG_INDEX(tev_stage0.color_modifier1): 332 case PICA_REG_INDEX(tev_stage0.color_modifier1):
@@ -334,7 +353,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
334 case PICA_REG_INDEX(tev_stage5.color_op): 353 case PICA_REG_INDEX(tev_stage5.color_op):
335 case PICA_REG_INDEX(tev_stage5.color_scale): 354 case PICA_REG_INDEX(tev_stage5.color_scale):
336 case PICA_REG_INDEX(tev_combiner_buffer_input): 355 case PICA_REG_INDEX(tev_combiner_buffer_input):
337 state.draw.shader_dirty = true; 356 shader_dirty = true;
338 break; 357 break;
339 case PICA_REG_INDEX(tev_stage0.const_r): 358 case PICA_REG_INDEX(tev_stage0.const_r):
340 SyncTevConstColor(0, regs.tev_stage0); 359 SyncTevConstColor(0, regs.tev_stage0);
@@ -521,41 +540,257 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
521 } 540 }
522} 541}
523 542
543void RasterizerOpenGL::FlushAll() {
544 res_cache.FlushAll();
545}
546
524void RasterizerOpenGL::FlushRegion(PAddr addr, u32 size) { 547void RasterizerOpenGL::FlushRegion(PAddr addr, u32 size) {
525 const auto& regs = Pica::g_state.regs; 548 res_cache.FlushRegion(addr, size, nullptr, false);
549}
550
551void RasterizerOpenGL::FlushAndInvalidateRegion(PAddr addr, u32 size) {
552 res_cache.FlushRegion(addr, size, nullptr, true);
553}
554
555bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) {
556 using PixelFormat = CachedSurface::PixelFormat;
557 using SurfaceType = CachedSurface::SurfaceType;
558
559 if (config.is_texture_copy) {
560 // TODO(tfarley): Try to hardware accelerate this
561 return false;
562 }
563
564 CachedSurface src_params;
565 src_params.addr = config.GetPhysicalInputAddress();
566 src_params.width = config.output_width;
567 src_params.height = config.output_height;
568 src_params.is_tiled = !config.input_linear;
569 src_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.input_format);
570
571 CachedSurface dst_params;
572 dst_params.addr = config.GetPhysicalOutputAddress();
573 dst_params.width = config.scaling != config.NoScale ? config.output_width / 2 : config.output_width.Value();
574 dst_params.height = config.scaling == config.ScaleXY ? config.output_height / 2 : config.output_height.Value();
575 dst_params.is_tiled = config.input_linear != config.dont_swizzle;
576 dst_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.output_format);
577
578 MathUtil::Rectangle<int> src_rect;
579 CachedSurface* src_surface = res_cache.GetSurfaceRect(src_params, false, true, src_rect);
580
581 if (src_surface == nullptr) {
582 return false;
583 }
526 584
527 u32 cached_fb_color_size = Pica::Regs::BytesPerColorPixel(fb_color_texture.format) 585 // Require destination surface to have same resolution scale as source to preserve scaling
528 * fb_color_texture.width * fb_color_texture.height; 586 dst_params.res_scale_width = src_surface->res_scale_width;
587 dst_params.res_scale_height = src_surface->res_scale_height;
529 588
530 u32 cached_fb_depth_size = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format) 589 MathUtil::Rectangle<int> dst_rect;
531 * fb_depth_texture.width * fb_depth_texture.height; 590 CachedSurface* dst_surface = res_cache.GetSurfaceRect(dst_params, true, false, dst_rect);
532 591
533 // If source memory region overlaps 3DS framebuffers, commit them before the copy happens 592 if (dst_surface == nullptr) {
534 if (MathUtil::IntervalsIntersect(addr, size, cached_fb_color_addr, cached_fb_color_size)) 593 return false;
535 CommitColorBuffer(); 594 }
595
596 // Don't accelerate if the src and dst surfaces are the same
597 if (src_surface == dst_surface) {
598 return false;
599 }
600
601 if (config.flip_vertically) {
602 std::swap(dst_rect.top, dst_rect.bottom);
603 }
536 604
537 if (MathUtil::IntervalsIntersect(addr, size, cached_fb_depth_addr, cached_fb_depth_size)) 605 if (!res_cache.TryBlitSurfaces(src_surface, src_rect, dst_surface, dst_rect)) {
538 CommitDepthBuffer(); 606 return false;
607 }
608
609 u32 dst_size = dst_params.width * dst_params.height * CachedSurface::GetFormatBpp(dst_params.pixel_format) / 8;
610 dst_surface->dirty = true;
611 res_cache.FlushRegion(config.GetPhysicalOutputAddress(), dst_size, dst_surface, true);
612 return true;
539} 613}
540 614
541void RasterizerOpenGL::InvalidateRegion(PAddr addr, u32 size) { 615bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) {
542 const auto& regs = Pica::g_state.regs; 616 using PixelFormat = CachedSurface::PixelFormat;
617 using SurfaceType = CachedSurface::SurfaceType;
618
619 CachedSurface* dst_surface = res_cache.TryGetFillSurface(config);
620
621 if (dst_surface == nullptr) {
622 return false;
623 }
624
625 OpenGLState cur_state = OpenGLState::GetCurState();
626
627 SurfaceType dst_type = CachedSurface::GetFormatType(dst_surface->pixel_format);
628
629 GLuint old_fb = cur_state.draw.draw_framebuffer;
630 cur_state.draw.draw_framebuffer = framebuffer.handle;
631 // TODO: When scissor test is implemented, need to disable scissor test in cur_state here so Clear call isn't affected
632 cur_state.Apply();
633
634 if (dst_type == SurfaceType::Color || dst_type == SurfaceType::Texture) {
635 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_surface->texture.handle, 0);
636 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
637
638 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
639 return false;
640 }
641
642 GLfloat color_values[4] = {0.0f, 0.0f, 0.0f, 0.0f};
643
644 // TODO: Handle additional pixel format and fill value size combinations to accelerate more cases
645 // For instance, checking if fill value's bytes/bits repeat to allow filling I8/A8/I4/A4/...
646 // Currently only handles formats that are multiples of the fill value size
647
648 if (config.fill_24bit) {
649 switch (dst_surface->pixel_format) {
650 case PixelFormat::RGB8:
651 color_values[0] = config.value_24bit_r / 255.0f;
652 color_values[1] = config.value_24bit_g / 255.0f;
653 color_values[2] = config.value_24bit_b / 255.0f;
654 break;
655 default:
656 return false;
657 }
658 } else if (config.fill_32bit) {
659 u32 value = config.value_32bit;
660
661 switch (dst_surface->pixel_format) {
662 case PixelFormat::RGBA8:
663 color_values[0] = (value >> 24) / 255.0f;
664 color_values[1] = ((value >> 16) & 0xFF) / 255.0f;
665 color_values[2] = ((value >> 8) & 0xFF) / 255.0f;
666 color_values[3] = (value & 0xFF) / 255.0f;
667 break;
668 default:
669 return false;
670 }
671 } else {
672 u16 value_16bit = config.value_16bit.Value();
673 Math::Vec4<u8> color;
674
675 switch (dst_surface->pixel_format) {
676 case PixelFormat::RGBA8:
677 color_values[0] = (value_16bit >> 8) / 255.0f;
678 color_values[1] = (value_16bit & 0xFF) / 255.0f;
679 color_values[2] = color_values[0];
680 color_values[3] = color_values[1];
681 break;
682 case PixelFormat::RGB5A1:
683 color = Color::DecodeRGB5A1((const u8*)&value_16bit);
684 color_values[0] = color[0] / 31.0f;
685 color_values[1] = color[1] / 31.0f;
686 color_values[2] = color[2] / 31.0f;
687 color_values[3] = color[3];
688 break;
689 case PixelFormat::RGB565:
690 color = Color::DecodeRGB565((const u8*)&value_16bit);
691 color_values[0] = color[0] / 31.0f;
692 color_values[1] = color[1] / 63.0f;
693 color_values[2] = color[2] / 31.0f;
694 break;
695 case PixelFormat::RGBA4:
696 color = Color::DecodeRGBA4((const u8*)&value_16bit);
697 color_values[0] = color[0] / 15.0f;
698 color_values[1] = color[1] / 15.0f;
699 color_values[2] = color[2] / 15.0f;
700 color_values[3] = color[3] / 15.0f;
701 break;
702 case PixelFormat::IA8:
703 case PixelFormat::RG8:
704 color_values[0] = (value_16bit >> 8) / 255.0f;
705 color_values[1] = (value_16bit & 0xFF) / 255.0f;
706 break;
707 default:
708 return false;
709 }
710 }
711
712 cur_state.color_mask.red_enabled = true;
713 cur_state.color_mask.green_enabled = true;
714 cur_state.color_mask.blue_enabled = true;
715 cur_state.color_mask.alpha_enabled = true;
716 cur_state.Apply();
717 glClearBufferfv(GL_COLOR, 0, color_values);
718 } else if (dst_type == SurfaceType::Depth) {
719 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
720 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_surface->texture.handle, 0);
721 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
722
723 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
724 return false;
725 }
726
727 GLfloat value_float;
728 if (dst_surface->pixel_format == CachedSurface::PixelFormat::D16) {
729 value_float = config.value_32bit / 65535.0f; // 2^16 - 1
730 } else if (dst_surface->pixel_format == CachedSurface::PixelFormat::D24) {
731 value_float = config.value_32bit / 16777215.0f; // 2^24 - 1
732 }
733
734 cur_state.depth.write_mask = true;
735 cur_state.Apply();
736 glClearBufferfv(GL_DEPTH, 0, &value_float);
737 } else if (dst_type == SurfaceType::DepthStencil) {
738 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
739 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, dst_surface->texture.handle, 0);
740
741 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
742 return false;
743 }
744
745 GLfloat value_float = (config.value_32bit & 0xFFFFFF) / 16777215.0f; // 2^24 - 1
746 GLint value_int = (config.value_32bit >> 24);
747
748 cur_state.depth.write_mask = true;
749 cur_state.stencil.write_mask = true;
750 cur_state.Apply();
751 glClearBufferfi(GL_DEPTH_STENCIL, 0, value_float, value_int);
752 }
753
754 cur_state.draw.draw_framebuffer = old_fb;
755 // TODO: Return scissor test to previous value when scissor test is implemented
756 cur_state.Apply();
757
758 dst_surface->dirty = true;
759 res_cache.FlushRegion(dst_surface->addr, dst_surface->size, dst_surface, true);
760 return true;
761}
762
763bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) {
764 if (framebuffer_addr == 0) {
765 return false;
766 }
543 767
544 u32 cached_fb_color_size = Pica::Regs::BytesPerColorPixel(fb_color_texture.format) 768 CachedSurface src_params;
545 * fb_color_texture.width * fb_color_texture.height; 769 src_params.addr = framebuffer_addr;
770 src_params.width = config.width;
771 src_params.height = config.height;
772 src_params.stride = pixel_stride;
773 src_params.is_tiled = false;
774 src_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.color_format);
546 775
547 u32 cached_fb_depth_size = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format) 776 MathUtil::Rectangle<int> src_rect;
548 * fb_depth_texture.width * fb_depth_texture.height; 777 CachedSurface* src_surface = res_cache.GetSurfaceRect(src_params, false, true, src_rect);
778
779 if (src_surface == nullptr) {
780 return false;
781 }
549 782
550 // If modified memory region overlaps 3DS framebuffers, reload their contents into OpenGL 783 u32 scaled_width = src_surface->GetScaledWidth();
551 if (MathUtil::IntervalsIntersect(addr, size, cached_fb_color_addr, cached_fb_color_size)) 784 u32 scaled_height = src_surface->GetScaledHeight();
552 ReloadColorBuffer();
553 785
554 if (MathUtil::IntervalsIntersect(addr, size, cached_fb_depth_addr, cached_fb_depth_size)) 786 screen_info.display_texcoords = MathUtil::Rectangle<float>((float)src_rect.top / (float)scaled_height,
555 ReloadDepthBuffer(); 787 (float)src_rect.left / (float)scaled_width,
788 (float)src_rect.bottom / (float)scaled_height,
789 (float)src_rect.right / (float)scaled_width);
556 790
557 // Notify cache of flush in case the region touches a cached resource 791 screen_info.display_texture = src_surface->texture.handle;
558 res_cache.InvalidateInRange(addr, size); 792
793 return true;
559} 794}
560 795
561void RasterizerOpenGL::SamplerInfo::Create() { 796void RasterizerOpenGL::SamplerInfo::Create() {
@@ -591,114 +826,13 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Pica::Regs::TextureConf
591 826
592 if (wrap_s == TextureConfig::ClampToBorder || wrap_t == TextureConfig::ClampToBorder) { 827 if (wrap_s == TextureConfig::ClampToBorder || wrap_t == TextureConfig::ClampToBorder) {
593 if (border_color != config.border_color.raw) { 828 if (border_color != config.border_color.raw) {
829 border_color = config.border_color.raw;
594 auto gl_color = PicaToGL::ColorRGBA8(border_color); 830 auto gl_color = PicaToGL::ColorRGBA8(border_color);
595 glSamplerParameterfv(s, GL_TEXTURE_BORDER_COLOR, gl_color.data()); 831 glSamplerParameterfv(s, GL_TEXTURE_BORDER_COLOR, gl_color.data());
596 } 832 }
597 } 833 }
598} 834}
599 835
600void RasterizerOpenGL::ReconfigureColorTexture(TextureInfo& texture, Pica::Regs::ColorFormat format, u32 width, u32 height) {
601 GLint internal_format;
602
603 texture.format = format;
604 texture.width = width;
605 texture.height = height;
606
607 switch (format) {
608 case Pica::Regs::ColorFormat::RGBA8:
609 internal_format = GL_RGBA;
610 texture.gl_format = GL_RGBA;
611 texture.gl_type = GL_UNSIGNED_INT_8_8_8_8;
612 break;
613
614 case Pica::Regs::ColorFormat::RGB8:
615 // This pixel format uses BGR since GL_UNSIGNED_BYTE specifies byte-order, unlike every
616 // specific OpenGL type used in this function using native-endian (that is, little-endian
617 // mostly everywhere) for words or half-words.
618 // TODO: check how those behave on big-endian processors.
619 internal_format = GL_RGB;
620 texture.gl_format = GL_BGR;
621 texture.gl_type = GL_UNSIGNED_BYTE;
622 break;
623
624 case Pica::Regs::ColorFormat::RGB5A1:
625 internal_format = GL_RGBA;
626 texture.gl_format = GL_RGBA;
627 texture.gl_type = GL_UNSIGNED_SHORT_5_5_5_1;
628 break;
629
630 case Pica::Regs::ColorFormat::RGB565:
631 internal_format = GL_RGB;
632 texture.gl_format = GL_RGB;
633 texture.gl_type = GL_UNSIGNED_SHORT_5_6_5;
634 break;
635
636 case Pica::Regs::ColorFormat::RGBA4:
637 internal_format = GL_RGBA;
638 texture.gl_format = GL_RGBA;
639 texture.gl_type = GL_UNSIGNED_SHORT_4_4_4_4;
640 break;
641
642 default:
643 LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer texture color format %x", format);
644 UNIMPLEMENTED();
645 break;
646 }
647
648 state.texture_units[0].texture_2d = texture.texture.handle;
649 state.Apply();
650
651 glActiveTexture(GL_TEXTURE0);
652 glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0,
653 texture.gl_format, texture.gl_type, nullptr);
654
655 state.texture_units[0].texture_2d = 0;
656 state.Apply();
657}
658
659void RasterizerOpenGL::ReconfigureDepthTexture(DepthTextureInfo& texture, Pica::Regs::DepthFormat format, u32 width, u32 height) {
660 GLint internal_format;
661
662 texture.format = format;
663 texture.width = width;
664 texture.height = height;
665
666 switch (format) {
667 case Pica::Regs::DepthFormat::D16:
668 internal_format = GL_DEPTH_COMPONENT16;
669 texture.gl_format = GL_DEPTH_COMPONENT;
670 texture.gl_type = GL_UNSIGNED_SHORT;
671 break;
672
673 case Pica::Regs::DepthFormat::D24:
674 internal_format = GL_DEPTH_COMPONENT24;
675 texture.gl_format = GL_DEPTH_COMPONENT;
676 texture.gl_type = GL_UNSIGNED_INT;
677 break;
678
679 case Pica::Regs::DepthFormat::D24S8:
680 internal_format = GL_DEPTH24_STENCIL8;
681 texture.gl_format = GL_DEPTH_STENCIL;
682 texture.gl_type = GL_UNSIGNED_INT_24_8;
683 break;
684
685 default:
686 LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer texture depth format %x", format);
687 UNIMPLEMENTED();
688 break;
689 }
690
691 state.texture_units[0].texture_2d = texture.texture.handle;
692 state.Apply();
693
694 glActiveTexture(GL_TEXTURE0);
695 glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0,
696 texture.gl_format, texture.gl_type, nullptr);
697
698 state.texture_units[0].texture_2d = 0;
699 state.Apply();
700}
701
702void RasterizerOpenGL::SetShader() { 836void RasterizerOpenGL::SetShader() {
703 PicaShaderConfig config = PicaShaderConfig::CurrentConfig(); 837 PicaShaderConfig config = PicaShaderConfig::CurrentConfig();
704 std::unique_ptr<PicaShader> shader = std::make_unique<PicaShader>(); 838 std::unique_ptr<PicaShader> shader = std::make_unique<PicaShader>();
@@ -754,6 +888,8 @@ void RasterizerOpenGL::SetShader() {
754 888
755 SyncGlobalAmbient(); 889 SyncGlobalAmbient();
756 for (int light_index = 0; light_index < 8; light_index++) { 890 for (int light_index = 0; light_index < 8; light_index++) {
891 SyncLightSpecular0(light_index);
892 SyncLightSpecular1(light_index);
757 SyncLightDiffuse(light_index); 893 SyncLightDiffuse(light_index);
758 SyncLightAmbient(light_index); 894 SyncLightAmbient(light_index);
759 SyncLightPosition(light_index); 895 SyncLightPosition(light_index);
@@ -761,83 +897,6 @@ void RasterizerOpenGL::SetShader() {
761 } 897 }
762} 898}
763 899
764void RasterizerOpenGL::SyncFramebuffer() {
765 const auto& regs = Pica::g_state.regs;
766
767 PAddr new_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress();
768 Pica::Regs::ColorFormat new_fb_color_format = regs.framebuffer.color_format;
769
770 PAddr new_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress();
771 Pica::Regs::DepthFormat new_fb_depth_format = regs.framebuffer.depth_format;
772
773 bool fb_size_changed = fb_color_texture.width != static_cast<GLsizei>(regs.framebuffer.GetWidth()) ||
774 fb_color_texture.height != static_cast<GLsizei>(regs.framebuffer.GetHeight());
775
776 bool color_fb_prop_changed = fb_color_texture.format != new_fb_color_format ||
777 fb_size_changed;
778
779 bool depth_fb_prop_changed = fb_depth_texture.format != new_fb_depth_format ||
780 fb_size_changed;
781
782 bool color_fb_modified = cached_fb_color_addr != new_fb_color_addr ||
783 color_fb_prop_changed;
784
785 bool depth_fb_modified = cached_fb_depth_addr != new_fb_depth_addr ||
786 depth_fb_prop_changed;
787
788 // Commit if framebuffer modified in any way
789 if (color_fb_modified)
790 CommitColorBuffer();
791
792 if (depth_fb_modified)
793 CommitDepthBuffer();
794
795 // Reconfigure framebuffer textures if any property has changed
796 if (color_fb_prop_changed) {
797 ReconfigureColorTexture(fb_color_texture, new_fb_color_format,
798 regs.framebuffer.GetWidth(), regs.framebuffer.GetHeight());
799 }
800
801 if (depth_fb_prop_changed) {
802 ReconfigureDepthTexture(fb_depth_texture, new_fb_depth_format,
803 regs.framebuffer.GetWidth(), regs.framebuffer.GetHeight());
804
805 // Only attach depth buffer as stencil if it supports stencil
806 switch (new_fb_depth_format) {
807 case Pica::Regs::DepthFormat::D16:
808 case Pica::Regs::DepthFormat::D24:
809 glFramebufferTexture2D(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
810 break;
811
812 case Pica::Regs::DepthFormat::D24S8:
813 glFramebufferTexture2D(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, fb_depth_texture.texture.handle, 0);
814 break;
815
816 default:
817 LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer depth format %x", new_fb_depth_format);
818 UNIMPLEMENTED();
819 break;
820 }
821 }
822
823 // Load buffer data again if fb modified in any way
824 if (color_fb_modified) {
825 cached_fb_color_addr = new_fb_color_addr;
826
827 ReloadColorBuffer();
828 }
829
830 if (depth_fb_modified) {
831 cached_fb_depth_addr = new_fb_depth_addr;
832
833 ReloadDepthBuffer();
834 }
835
836 GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER);
837 ASSERT_MSG(status == GL_FRAMEBUFFER_COMPLETE,
838 "OpenGL rasterizer framebuffer setup failed, status %X", status);
839}
840
841void RasterizerOpenGL::SyncCullMode() { 900void RasterizerOpenGL::SyncCullMode() {
842 const auto& regs = Pica::g_state.regs; 901 const auto& regs = Pica::g_state.regs;
843 902
@@ -864,10 +923,10 @@ void RasterizerOpenGL::SyncCullMode() {
864} 923}
865 924
866void RasterizerOpenGL::SyncDepthModifiers() { 925void RasterizerOpenGL::SyncDepthModifiers() {
867 float depth_scale = -Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_range).ToFloat32(); 926 float depth_scale = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_range).ToFloat32();
868 float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_far_plane).ToFloat32() / 2.0f; 927 float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_near_plane).ToFloat32();
869 928
870 // TODO: Implement scale modifier 929 uniform_block_data.data.depth_scale = depth_scale;
871 uniform_block_data.data.depth_offset = depth_offset; 930 uniform_block_data.data.depth_offset = depth_offset;
872 uniform_block_data.dirty = true; 931 uniform_block_data.dirty = true;
873} 932}
@@ -1034,229 +1093,3 @@ void RasterizerOpenGL::SyncLightPosition(int light_index) {
1034 uniform_block_data.dirty = true; 1093 uniform_block_data.dirty = true;
1035 } 1094 }
1036} 1095}
1037
1038void RasterizerOpenGL::SyncDrawState() {
1039 const auto& regs = Pica::g_state.regs;
1040
1041 // Sync the viewport
1042 GLsizei viewport_width = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_x).ToFloat32() * 2;
1043 GLsizei viewport_height = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_y).ToFloat32() * 2;
1044
1045 // OpenGL uses different y coordinates, so negate corner offset and flip origin
1046 // TODO: Ensure viewport_corner.x should not be negated or origin flipped
1047 // TODO: Use floating-point viewports for accuracy if supported
1048 glViewport((GLsizei)regs.viewport_corner.x,
1049 (GLsizei)regs.viewport_corner.y,
1050 viewport_width, viewport_height);
1051
1052 // Sync bound texture(s), upload if not cached
1053 const auto pica_textures = regs.GetTextures();
1054 for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) {
1055 const auto& texture = pica_textures[texture_index];
1056
1057 if (texture.enabled) {
1058 texture_samplers[texture_index].SyncWithConfig(texture.config);
1059 res_cache.LoadAndBindTexture(state, texture_index, texture);
1060 } else {
1061 state.texture_units[texture_index].texture_2d = 0;
1062 }
1063 }
1064
1065 state.draw.uniform_buffer = uniform_buffer.handle;
1066 state.Apply();
1067}
1068
1069MICROPROFILE_DEFINE(OpenGL_FramebufferReload, "OpenGL", "FB Reload", MP_RGB(70, 70, 200));
1070
1071void RasterizerOpenGL::ReloadColorBuffer() {
1072 u8* color_buffer = Memory::GetPhysicalPointer(cached_fb_color_addr);
1073
1074 if (color_buffer == nullptr)
1075 return;
1076
1077 MICROPROFILE_SCOPE(OpenGL_FramebufferReload);
1078
1079 u32 bytes_per_pixel = Pica::Regs::BytesPerColorPixel(fb_color_texture.format);
1080
1081 std::unique_ptr<u8[]> temp_fb_color_buffer(new u8[fb_color_texture.width * fb_color_texture.height * bytes_per_pixel]);
1082
1083 // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion is necessary.
1084 for (int y = 0; y < fb_color_texture.height; ++y) {
1085 for (int x = 0; x < fb_color_texture.width; ++x) {
1086 const u32 coarse_y = y & ~7;
1087 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_color_texture.width * bytes_per_pixel;
1088 u32 gl_pixel_index = (x + (fb_color_texture.height - 1 - y) * fb_color_texture.width) * bytes_per_pixel;
1089
1090 u8* pixel = color_buffer + dst_offset;
1091 memcpy(&temp_fb_color_buffer[gl_pixel_index], pixel, bytes_per_pixel);
1092 }
1093 }
1094
1095 state.texture_units[0].texture_2d = fb_color_texture.texture.handle;
1096 state.Apply();
1097
1098 glActiveTexture(GL_TEXTURE0);
1099 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, fb_color_texture.width, fb_color_texture.height,
1100 fb_color_texture.gl_format, fb_color_texture.gl_type, temp_fb_color_buffer.get());
1101
1102 state.texture_units[0].texture_2d = 0;
1103 state.Apply();
1104}
1105
1106void RasterizerOpenGL::ReloadDepthBuffer() {
1107 if (cached_fb_depth_addr == 0)
1108 return;
1109
1110 // TODO: Appears to work, but double-check endianness of depth values and order of depth-stencil
1111 u8* depth_buffer = Memory::GetPhysicalPointer(cached_fb_depth_addr);
1112
1113 if (depth_buffer == nullptr)
1114 return;
1115
1116 MICROPROFILE_SCOPE(OpenGL_FramebufferReload);
1117
1118 u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format);
1119
1120 // OpenGL needs 4 bpp alignment for D24
1121 u32 gl_bpp = bytes_per_pixel == 3 ? 4 : bytes_per_pixel;
1122
1123 std::unique_ptr<u8[]> temp_fb_depth_buffer(new u8[fb_depth_texture.width * fb_depth_texture.height * gl_bpp]);
1124
1125 u8* temp_fb_depth_data = bytes_per_pixel == 3 ? (temp_fb_depth_buffer.get() + 1) : temp_fb_depth_buffer.get();
1126
1127 if (fb_depth_texture.format == Pica::Regs::DepthFormat::D24S8) {
1128 for (int y = 0; y < fb_depth_texture.height; ++y) {
1129 for (int x = 0; x < fb_depth_texture.width; ++x) {
1130 const u32 coarse_y = y & ~7;
1131 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel;
1132 u32 gl_pixel_index = (x + (fb_depth_texture.height - 1 - y) * fb_depth_texture.width);
1133
1134 u8* pixel = depth_buffer + dst_offset;
1135 u32 depth_stencil = *(u32*)pixel;
1136 ((u32*)temp_fb_depth_data)[gl_pixel_index] = (depth_stencil << 8) | (depth_stencil >> 24);
1137 }
1138 }
1139 } else {
1140 for (int y = 0; y < fb_depth_texture.height; ++y) {
1141 for (int x = 0; x < fb_depth_texture.width; ++x) {
1142 const u32 coarse_y = y & ~7;
1143 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel;
1144 u32 gl_pixel_index = (x + (fb_depth_texture.height - 1 - y) * fb_depth_texture.width) * gl_bpp;
1145
1146 u8* pixel = depth_buffer + dst_offset;
1147 memcpy(&temp_fb_depth_data[gl_pixel_index], pixel, bytes_per_pixel);
1148 }
1149 }
1150 }
1151
1152 state.texture_units[0].texture_2d = fb_depth_texture.texture.handle;
1153 state.Apply();
1154
1155 glActiveTexture(GL_TEXTURE0);
1156 if (fb_depth_texture.format == Pica::Regs::DepthFormat::D24S8) {
1157 // TODO(Subv): There is a bug with Intel Windows drivers that makes glTexSubImage2D not change the stencil buffer.
1158 // The bug has been reported to Intel (https://communities.intel.com/message/324464)
1159 glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH24_STENCIL8, fb_depth_texture.width, fb_depth_texture.height, 0,
1160 GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, temp_fb_depth_buffer.get());
1161 } else {
1162 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, fb_depth_texture.width, fb_depth_texture.height,
1163 fb_depth_texture.gl_format, fb_depth_texture.gl_type, temp_fb_depth_buffer.get());
1164 }
1165
1166 state.texture_units[0].texture_2d = 0;
1167 state.Apply();
1168}
1169
1170Common::Profiling::TimingCategory buffer_commit_category("Framebuffer Commit");
1171MICROPROFILE_DEFINE(OpenGL_FramebufferCommit, "OpenGL", "FB Commit", MP_RGB(70, 70, 200));
1172
1173void RasterizerOpenGL::CommitColorBuffer() {
1174 if (cached_fb_color_addr != 0) {
1175 u8* color_buffer = Memory::GetPhysicalPointer(cached_fb_color_addr);
1176
1177 if (color_buffer != nullptr) {
1178 Common::Profiling::ScopeTimer timer(buffer_commit_category);
1179 MICROPROFILE_SCOPE(OpenGL_FramebufferCommit);
1180
1181 u32 bytes_per_pixel = Pica::Regs::BytesPerColorPixel(fb_color_texture.format);
1182
1183 std::unique_ptr<u8[]> temp_gl_color_buffer(new u8[fb_color_texture.width * fb_color_texture.height * bytes_per_pixel]);
1184
1185 state.texture_units[0].texture_2d = fb_color_texture.texture.handle;
1186 state.Apply();
1187
1188 glActiveTexture(GL_TEXTURE0);
1189 glGetTexImage(GL_TEXTURE_2D, 0, fb_color_texture.gl_format, fb_color_texture.gl_type, temp_gl_color_buffer.get());
1190
1191 state.texture_units[0].texture_2d = 0;
1192 state.Apply();
1193
1194 // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion is necessary.
1195 for (int y = 0; y < fb_color_texture.height; ++y) {
1196 for (int x = 0; x < fb_color_texture.width; ++x) {
1197 const u32 coarse_y = y & ~7;
1198 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_color_texture.width * bytes_per_pixel;
1199 u32 gl_pixel_index = x * bytes_per_pixel + (fb_color_texture.height - 1 - y) * fb_color_texture.width * bytes_per_pixel;
1200
1201 u8* pixel = color_buffer + dst_offset;
1202 memcpy(pixel, &temp_gl_color_buffer[gl_pixel_index], bytes_per_pixel);
1203 }
1204 }
1205 }
1206 }
1207}
1208
1209void RasterizerOpenGL::CommitDepthBuffer() {
1210 if (cached_fb_depth_addr != 0) {
1211 // TODO: Output seems correct visually, but doesn't quite match sw renderer output. One of them is wrong.
1212 u8* depth_buffer = Memory::GetPhysicalPointer(cached_fb_depth_addr);
1213
1214 if (depth_buffer != nullptr) {
1215 Common::Profiling::ScopeTimer timer(buffer_commit_category);
1216 MICROPROFILE_SCOPE(OpenGL_FramebufferCommit);
1217
1218 u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format);
1219
1220 // OpenGL needs 4 bpp alignment for D24
1221 u32 gl_bpp = bytes_per_pixel == 3 ? 4 : bytes_per_pixel;
1222
1223 std::unique_ptr<u8[]> temp_gl_depth_buffer(new u8[fb_depth_texture.width * fb_depth_texture.height * gl_bpp]);
1224
1225 state.texture_units[0].texture_2d = fb_depth_texture.texture.handle;
1226 state.Apply();
1227
1228 glActiveTexture(GL_TEXTURE0);
1229 glGetTexImage(GL_TEXTURE_2D, 0, fb_depth_texture.gl_format, fb_depth_texture.gl_type, temp_gl_depth_buffer.get());
1230
1231 state.texture_units[0].texture_2d = 0;
1232 state.Apply();
1233
1234 u8* temp_gl_depth_data = bytes_per_pixel == 3 ? (temp_gl_depth_buffer.get() + 1) : temp_gl_depth_buffer.get();
1235
1236 if (fb_depth_texture.format == Pica::Regs::DepthFormat::D24S8) {
1237 for (int y = 0; y < fb_depth_texture.height; ++y) {
1238 for (int x = 0; x < fb_depth_texture.width; ++x) {
1239 const u32 coarse_y = y & ~7;
1240 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel;
1241 u32 gl_pixel_index = (x + (fb_depth_texture.height - 1 - y) * fb_depth_texture.width);
1242
1243 u8* pixel = depth_buffer + dst_offset;
1244 u32 depth_stencil = ((u32*)temp_gl_depth_data)[gl_pixel_index];
1245 *(u32*)pixel = (depth_stencil >> 8) | (depth_stencil << 24);
1246 }
1247 }
1248 } else {
1249 for (int y = 0; y < fb_depth_texture.height; ++y) {
1250 for (int x = 0; x < fb_depth_texture.width; ++x) {
1251 const u32 coarse_y = y & ~7;
1252 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel;
1253 u32 gl_pixel_index = (x + (fb_depth_texture.height - 1 - y) * fb_depth_texture.width) * gl_bpp;
1254
1255 u8* pixel = depth_buffer + dst_offset;
1256 memcpy(pixel, &temp_gl_depth_data[gl_pixel_index], bytes_per_pixel);
1257 }
1258 }
1259 }
1260 }
1261 }
1262}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 390349a0c..eed00011a 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -4,22 +4,33 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
7#include <cstddef> 8#include <cstddef>
8#include <cstring> 9#include <cstring>
9#include <memory> 10#include <memory>
10#include <vector> 11#include <vector>
11#include <unordered_map> 12#include <unordered_map>
12 13
14#include <glad/glad.h>
15
16#include "common/bit_field.h"
13#include "common/common_types.h" 17#include "common/common_types.h"
14#include "common/hash.h" 18#include "common/hash.h"
19#include "common/vector_math.h"
20
21#include "core/hw/gpu.h"
15 22
16#include "video_core/pica.h" 23#include "video_core/pica.h"
17#include "video_core/pica_state.h" 24#include "video_core/pica_state.h"
25#include "video_core/pica_types.h"
18#include "video_core/rasterizer_interface.h" 26#include "video_core/rasterizer_interface.h"
19#include "video_core/renderer_opengl/gl_rasterizer_cache.h" 27#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
28#include "video_core/renderer_opengl/gl_resource_manager.h"
20#include "video_core/renderer_opengl/gl_state.h" 29#include "video_core/renderer_opengl/gl_state.h"
21#include "video_core/renderer_opengl/pica_to_gl.h" 30#include "video_core/renderer_opengl/pica_to_gl.h"
22#include "video_core/shader/shader_interpreter.h" 31#include "video_core/shader/shader.h"
32
33struct ScreenInfo;
23 34
24/** 35/**
25 * This struct contains all state used to generate the GLSL shader program that emulates the current 36 * This struct contains all state used to generate the GLSL shader program that emulates the current
@@ -28,158 +39,185 @@
28 * directly accessing Pica registers. This should reduce the risk of bugs in shader generation where 39 * directly accessing Pica registers. This should reduce the risk of bugs in shader generation where
29 * Pica state is not being captured in the shader cache key, thereby resulting in (what should be) 40 * Pica state is not being captured in the shader cache key, thereby resulting in (what should be)
30 * two separate shaders sharing the same key. 41 * two separate shaders sharing the same key.
42 *
43 * We use a union because "implicitly-defined copy/move constructor for a union X copies the object representation of X."
44 * and "implicitly-defined copy assignment operator for a union X copies the object representation (3.9) of X."
45 * = Bytewise copy instead of memberwise copy.
46 * This is important because the padding bytes are included in the hash and comparison between objects.
31 */ 47 */
32struct PicaShaderConfig { 48union PicaShaderConfig {
49
33 /// Construct a PicaShaderConfig with the current Pica register configuration. 50 /// Construct a PicaShaderConfig with the current Pica register configuration.
34 static PicaShaderConfig CurrentConfig() { 51 static PicaShaderConfig CurrentConfig() {
35 PicaShaderConfig res; 52 PicaShaderConfig res;
53
54 auto& state = res.state;
55 std::memset(&state, 0, sizeof(PicaShaderConfig::State));
56
36 const auto& regs = Pica::g_state.regs; 57 const auto& regs = Pica::g_state.regs;
37 58
38 res.alpha_test_func = regs.output_merger.alpha_test.enable ? 59 state.depthmap_enable = regs.depthmap_enable;
60
61 state.alpha_test_func = regs.output_merger.alpha_test.enable ?
39 regs.output_merger.alpha_test.func.Value() : Pica::Regs::CompareFunc::Always; 62 regs.output_merger.alpha_test.func.Value() : Pica::Regs::CompareFunc::Always;
40 63
41 // Copy relevant TevStageConfig fields only. We're doing this manually (instead of calling 64 state.texture0_type = regs.texture0.type;
42 // the GetTevStages() function) because BitField explicitly disables copies. 65
43 66 // Copy relevant tev stages fields.
44 res.tev_stages[0].sources_raw = regs.tev_stage0.sources_raw; 67 // We don't sync const_color here because of the high variance, it is a
45 res.tev_stages[1].sources_raw = regs.tev_stage1.sources_raw; 68 // shader uniform instead.
46 res.tev_stages[2].sources_raw = regs.tev_stage2.sources_raw; 69 const auto& tev_stages = regs.GetTevStages();
47 res.tev_stages[3].sources_raw = regs.tev_stage3.sources_raw; 70 DEBUG_ASSERT(state.tev_stages.size() == tev_stages.size());
48 res.tev_stages[4].sources_raw = regs.tev_stage4.sources_raw; 71 for (size_t i = 0; i < tev_stages.size(); i++) {
49 res.tev_stages[5].sources_raw = regs.tev_stage5.sources_raw; 72 const auto& tev_stage = tev_stages[i];
50 73 state.tev_stages[i].sources_raw = tev_stage.sources_raw;
51 res.tev_stages[0].modifiers_raw = regs.tev_stage0.modifiers_raw; 74 state.tev_stages[i].modifiers_raw = tev_stage.modifiers_raw;
52 res.tev_stages[1].modifiers_raw = regs.tev_stage1.modifiers_raw; 75 state.tev_stages[i].ops_raw = tev_stage.ops_raw;
53 res.tev_stages[2].modifiers_raw = regs.tev_stage2.modifiers_raw; 76 state.tev_stages[i].scales_raw = tev_stage.scales_raw;
54 res.tev_stages[3].modifiers_raw = regs.tev_stage3.modifiers_raw; 77 }
55 res.tev_stages[4].modifiers_raw = regs.tev_stage4.modifiers_raw; 78
56 res.tev_stages[5].modifiers_raw = regs.tev_stage5.modifiers_raw; 79 state.combiner_buffer_input =
57
58 res.tev_stages[0].ops_raw = regs.tev_stage0.ops_raw;
59 res.tev_stages[1].ops_raw = regs.tev_stage1.ops_raw;
60 res.tev_stages[2].ops_raw = regs.tev_stage2.ops_raw;
61 res.tev_stages[3].ops_raw = regs.tev_stage3.ops_raw;
62 res.tev_stages[4].ops_raw = regs.tev_stage4.ops_raw;
63 res.tev_stages[5].ops_raw = regs.tev_stage5.ops_raw;
64
65 res.tev_stages[0].scales_raw = regs.tev_stage0.scales_raw;
66 res.tev_stages[1].scales_raw = regs.tev_stage1.scales_raw;
67 res.tev_stages[2].scales_raw = regs.tev_stage2.scales_raw;
68 res.tev_stages[3].scales_raw = regs.tev_stage3.scales_raw;
69 res.tev_stages[4].scales_raw = regs.tev_stage4.scales_raw;
70 res.tev_stages[5].scales_raw = regs.tev_stage5.scales_raw;
71
72 res.combiner_buffer_input =
73 regs.tev_combiner_buffer_input.update_mask_rgb.Value() | 80 regs.tev_combiner_buffer_input.update_mask_rgb.Value() |
74 regs.tev_combiner_buffer_input.update_mask_a.Value() << 4; 81 regs.tev_combiner_buffer_input.update_mask_a.Value() << 4;
75 82
76 // Fragment lighting 83 // Fragment lighting
77 84
78 res.lighting.enable = !regs.lighting.disable; 85 state.lighting.enable = !regs.lighting.disable;
79 res.lighting.src_num = regs.lighting.num_lights + 1; 86 state.lighting.src_num = regs.lighting.num_lights + 1;
80 87
81 for (unsigned light_index = 0; light_index < res.lighting.src_num; ++light_index) { 88 for (unsigned light_index = 0; light_index < state.lighting.src_num; ++light_index) {
82 unsigned num = regs.lighting.light_enable.GetNum(light_index); 89 unsigned num = regs.lighting.light_enable.GetNum(light_index);
83 const auto& light = regs.lighting.light[num]; 90 const auto& light = regs.lighting.light[num];
84 res.lighting.light[light_index].num = num; 91 state.lighting.light[light_index].num = num;
85 res.lighting.light[light_index].directional = light.directional != 0; 92 state.lighting.light[light_index].directional = light.directional != 0;
86 res.lighting.light[light_index].two_sided_diffuse = light.two_sided_diffuse != 0; 93 state.lighting.light[light_index].two_sided_diffuse = light.two_sided_diffuse != 0;
87 res.lighting.light[light_index].dist_atten_enable = !regs.lighting.IsDistAttenDisabled(num); 94 state.lighting.light[light_index].dist_atten_enable = !regs.lighting.IsDistAttenDisabled(num);
88 res.lighting.light[light_index].dist_atten_bias = Pica::float20::FromRaw(light.dist_atten_bias).ToFloat32(); 95 state.lighting.light[light_index].dist_atten_bias = Pica::float20::FromRaw(light.dist_atten_bias).ToFloat32();
89 res.lighting.light[light_index].dist_atten_scale = Pica::float20::FromRaw(light.dist_atten_scale).ToFloat32(); 96 state.lighting.light[light_index].dist_atten_scale = Pica::float20::FromRaw(light.dist_atten_scale).ToFloat32();
90 } 97 }
91 98
92 res.lighting.lut_d0.enable = regs.lighting.disable_lut_d0 == 0; 99 state.lighting.lut_d0.enable = regs.lighting.disable_lut_d0 == 0;
93 res.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0; 100 state.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0;
94 res.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value(); 101 state.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value();
95 res.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0); 102 state.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0);
96 103
97 res.lighting.lut_d1.enable = regs.lighting.disable_lut_d1 == 0; 104 state.lighting.lut_d1.enable = regs.lighting.disable_lut_d1 == 0;
98 res.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0; 105 state.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0;
99 res.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value(); 106 state.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value();
100 res.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1); 107 state.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1);
101 108
102 res.lighting.lut_fr.enable = regs.lighting.disable_lut_fr == 0; 109 state.lighting.lut_fr.enable = regs.lighting.disable_lut_fr == 0;
103 res.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0; 110 state.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0;
104 res.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value(); 111 state.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value();
105 res.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr); 112 state.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr);
106 113
107 res.lighting.lut_rr.enable = regs.lighting.disable_lut_rr == 0; 114 state.lighting.lut_rr.enable = regs.lighting.disable_lut_rr == 0;
108 res.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0; 115 state.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0;
109 res.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value(); 116 state.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value();
110 res.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr); 117 state.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr);
111 118
112 res.lighting.lut_rg.enable = regs.lighting.disable_lut_rg == 0; 119 state.lighting.lut_rg.enable = regs.lighting.disable_lut_rg == 0;
113 res.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0; 120 state.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0;
114 res.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value(); 121 state.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value();
115 res.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg); 122 state.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg);
116 123
117 res.lighting.lut_rb.enable = regs.lighting.disable_lut_rb == 0; 124 state.lighting.lut_rb.enable = regs.lighting.disable_lut_rb == 0;
118 res.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0; 125 state.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0;
119 res.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value(); 126 state.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value();
120 res.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb); 127 state.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb);
121 128
122 res.lighting.config = regs.lighting.config; 129 state.lighting.config = regs.lighting.config;
123 res.lighting.fresnel_selector = regs.lighting.fresnel_selector; 130 state.lighting.fresnel_selector = regs.lighting.fresnel_selector;
124 res.lighting.bump_mode = regs.lighting.bump_mode; 131 state.lighting.bump_mode = regs.lighting.bump_mode;
125 res.lighting.bump_selector = regs.lighting.bump_selector; 132 state.lighting.bump_selector = regs.lighting.bump_selector;
126 res.lighting.bump_renorm = regs.lighting.disable_bump_renorm == 0; 133 state.lighting.bump_renorm = regs.lighting.disable_bump_renorm == 0;
127 res.lighting.clamp_highlights = regs.lighting.clamp_highlights != 0; 134 state.lighting.clamp_highlights = regs.lighting.clamp_highlights != 0;
128 135
129 return res; 136 return res;
130 } 137 }
131 138
132 bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const { 139 bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const {
133 return (stage_index < 4) && (combiner_buffer_input & (1 << stage_index)); 140 return (stage_index < 4) && (state.combiner_buffer_input & (1 << stage_index));
134 } 141 }
135 142
136 bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const { 143 bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const {
137 return (stage_index < 4) && ((combiner_buffer_input >> 4) & (1 << stage_index)); 144 return (stage_index < 4) && ((state.combiner_buffer_input >> 4) & (1 << stage_index));
138 } 145 }
139 146
140 bool operator ==(const PicaShaderConfig& o) const { 147 bool operator ==(const PicaShaderConfig& o) const {
141 return std::memcmp(this, &o, sizeof(PicaShaderConfig)) == 0; 148 return std::memcmp(&state, &o.state, sizeof(PicaShaderConfig::State)) == 0;
142 }; 149 };
143 150
144 Pica::Regs::CompareFunc alpha_test_func = Pica::Regs::CompareFunc::Never; 151 // NOTE: MSVC15 (Update 2) doesn't think `delete`'d constructors and operators are TC.
145 std::array<Pica::Regs::TevStageConfig, 6> tev_stages = {}; 152 // This makes BitField not TC when used in a union or struct so we have to resort
146 u8 combiner_buffer_input = 0; 153 // to this ugly hack.
154 // Once that bug is fixed we can use Pica::Regs::TevStageConfig here.
155 // Doesn't include const_color because we don't sync it, see comment in CurrentConfig()
156 struct TevStageConfigRaw {
157 u32 sources_raw;
158 u32 modifiers_raw;
159 u32 ops_raw;
160 u32 scales_raw;
161 explicit operator Pica::Regs::TevStageConfig() const noexcept {
162 Pica::Regs::TevStageConfig stage;
163 stage.sources_raw = sources_raw;
164 stage.modifiers_raw = modifiers_raw;
165 stage.ops_raw = ops_raw;
166 stage.const_color = 0;
167 stage.scales_raw = scales_raw;
168 return stage;
169 }
170 };
147 171
148 struct { 172 struct State {
149 struct { 173
150 unsigned num = 0; 174 Pica::Regs::CompareFunc alpha_test_func;
151 bool directional = false; 175 Pica::Regs::TextureConfig::TextureType texture0_type;
152 bool two_sided_diffuse = false; 176 std::array<TevStageConfigRaw, 6> tev_stages;
153 bool dist_atten_enable = false; 177 u8 combiner_buffer_input;
154 GLfloat dist_atten_scale = 0.0f; 178
155 GLfloat dist_atten_bias = 0.0f; 179 Pica::Regs::DepthBuffering depthmap_enable;
156 } light[8];
157
158 bool enable = false;
159 unsigned src_num = 0;
160 Pica::Regs::LightingBumpMode bump_mode = Pica::Regs::LightingBumpMode::None;
161 unsigned bump_selector = 0;
162 bool bump_renorm = false;
163 bool clamp_highlights = false;
164
165 Pica::Regs::LightingConfig config = Pica::Regs::LightingConfig::Config0;
166 Pica::Regs::LightingFresnelSelector fresnel_selector = Pica::Regs::LightingFresnelSelector::None;
167 180
168 struct { 181 struct {
169 bool enable = false; 182 struct {
170 bool abs_input = false; 183 unsigned num;
171 Pica::Regs::LightingLutInput type = Pica::Regs::LightingLutInput::NH; 184 bool directional;
172 float scale = 1.0f; 185 bool two_sided_diffuse;
173 } lut_d0, lut_d1, lut_fr, lut_rr, lut_rg, lut_rb; 186 bool dist_atten_enable;
174 } lighting; 187 GLfloat dist_atten_scale;
188 GLfloat dist_atten_bias;
189 } light[8];
190
191 bool enable;
192 unsigned src_num;
193 Pica::Regs::LightingBumpMode bump_mode;
194 unsigned bump_selector;
195 bool bump_renorm;
196 bool clamp_highlights;
197
198 Pica::Regs::LightingConfig config;
199 Pica::Regs::LightingFresnelSelector fresnel_selector;
200
201 struct {
202 bool enable;
203 bool abs_input;
204 Pica::Regs::LightingLutInput type;
205 float scale;
206 } lut_d0, lut_d1, lut_fr, lut_rr, lut_rg, lut_rb;
207 } lighting;
208
209 } state;
175}; 210};
211#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER)
212static_assert(std::is_trivially_copyable<PicaShaderConfig::State>::value, "PicaShaderConfig::State must be trivially copyable");
213#endif
176 214
177namespace std { 215namespace std {
178 216
179template <> 217template <>
180struct hash<PicaShaderConfig> { 218struct hash<PicaShaderConfig> {
181 size_t operator()(const PicaShaderConfig& k) const { 219 size_t operator()(const PicaShaderConfig& k) const {
182 return Common::ComputeHash64(&k, sizeof(PicaShaderConfig)); 220 return Common::ComputeHash64(&k.state, sizeof(PicaShaderConfig::State));
183 } 221 }
184}; 222};
185 223
@@ -191,16 +229,17 @@ public:
191 RasterizerOpenGL(); 229 RasterizerOpenGL();
192 ~RasterizerOpenGL() override; 230 ~RasterizerOpenGL() override;
193 231
194 void InitObjects() override;
195 void Reset() override;
196 void AddTriangle(const Pica::Shader::OutputVertex& v0, 232 void AddTriangle(const Pica::Shader::OutputVertex& v0,
197 const Pica::Shader::OutputVertex& v1, 233 const Pica::Shader::OutputVertex& v1,
198 const Pica::Shader::OutputVertex& v2) override; 234 const Pica::Shader::OutputVertex& v2) override;
199 void DrawTriangles() override; 235 void DrawTriangles() override;
200 void FlushFramebuffer() override;
201 void NotifyPicaRegisterChanged(u32 id) override; 236 void NotifyPicaRegisterChanged(u32 id) override;
237 void FlushAll() override;
202 void FlushRegion(PAddr addr, u32 size) override; 238 void FlushRegion(PAddr addr, u32 size) override;
203 void InvalidateRegion(PAddr addr, u32 size) override; 239 void FlushAndInvalidateRegion(PAddr addr, u32 size) override;
240 bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) override;
241 bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override;
242 bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) override;
204 243
205 /// OpenGL shader generated for a given Pica register state 244 /// OpenGL shader generated for a given Pica register state
206 struct PicaShader { 245 struct PicaShader {
@@ -210,26 +249,6 @@ public:
210 249
211private: 250private:
212 251
213 /// Structure used for storing information about color textures
214 struct TextureInfo {
215 OGLTexture texture;
216 GLsizei width;
217 GLsizei height;
218 Pica::Regs::ColorFormat format;
219 GLenum gl_format;
220 GLenum gl_type;
221 };
222
223 /// Structure used for storing information about depth textures
224 struct DepthTextureInfo {
225 OGLTexture texture;
226 GLsizei width;
227 GLsizei height;
228 Pica::Regs::DepthFormat format;
229 GLenum gl_format;
230 GLenum gl_type;
231 };
232
233 struct SamplerInfo { 252 struct SamplerInfo {
234 using TextureConfig = Pica::Regs::TextureConfig; 253 using TextureConfig = Pica::Regs::TextureConfig;
235 254
@@ -265,6 +284,7 @@ private:
265 tex_coord1[1] = v.tc1.y.ToFloat32(); 284 tex_coord1[1] = v.tc1.y.ToFloat32();
266 tex_coord2[0] = v.tc2.x.ToFloat32(); 285 tex_coord2[0] = v.tc2.x.ToFloat32();
267 tex_coord2[1] = v.tc2.y.ToFloat32(); 286 tex_coord2[1] = v.tc2.y.ToFloat32();
287 tex_coord0_w = v.tc0_w.ToFloat32();
268 normquat[0] = v.quat.x.ToFloat32(); 288 normquat[0] = v.quat.x.ToFloat32();
269 normquat[1] = v.quat.y.ToFloat32(); 289 normquat[1] = v.quat.y.ToFloat32();
270 normquat[2] = v.quat.z.ToFloat32(); 290 normquat[2] = v.quat.z.ToFloat32();
@@ -285,6 +305,7 @@ private:
285 GLfloat tex_coord0[2]; 305 GLfloat tex_coord0[2];
286 GLfloat tex_coord1[2]; 306 GLfloat tex_coord1[2];
287 GLfloat tex_coord2[2]; 307 GLfloat tex_coord2[2];
308 GLfloat tex_coord0_w;
288 GLfloat normquat[4]; 309 GLfloat normquat[4];
289 GLfloat view[3]; 310 GLfloat view[3];
290 }; 311 };
@@ -303,6 +324,7 @@ private:
303 GLvec4 const_color[6]; 324 GLvec4 const_color[6];
304 GLvec4 tev_combiner_buffer_color; 325 GLvec4 tev_combiner_buffer_color;
305 GLint alphatest_ref; 326 GLint alphatest_ref;
327 GLfloat depth_scale;
306 GLfloat depth_offset; 328 GLfloat depth_offset;
307 alignas(16) GLvec3 lighting_global_ambient; 329 alignas(16) GLvec3 lighting_global_ambient;
308 LightSrc light_src[8]; 330 LightSrc light_src[8];
@@ -311,18 +333,9 @@ private:
311 static_assert(sizeof(UniformData) == 0x310, "The size of the UniformData structure has changed, update the structure in the shader"); 333 static_assert(sizeof(UniformData) == 0x310, "The size of the UniformData structure has changed, update the structure in the shader");
312 static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec"); 334 static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec");
313 335
314 /// Reconfigure the OpenGL color texture to use the given format and dimensions
315 void ReconfigureColorTexture(TextureInfo& texture, Pica::Regs::ColorFormat format, u32 width, u32 height);
316
317 /// Reconfigure the OpenGL depth texture to use the given format and dimensions
318 void ReconfigureDepthTexture(DepthTextureInfo& texture, Pica::Regs::DepthFormat format, u32 width, u32 height);
319
320 /// Sets the OpenGL shader in accordance with the current PICA register state 336 /// Sets the OpenGL shader in accordance with the current PICA register state
321 void SetShader(); 337 void SetShader();
322 338
323 /// Syncs the state and contents of the OpenGL framebuffer to match the current PICA framebuffer
324 void SyncFramebuffer();
325
326 /// Syncs the cull mode to match the PICA register 339 /// Syncs the cull mode to match the PICA register
327 void SyncCullMode(); 340 void SyncCullMode();
328 341
@@ -359,72 +372,42 @@ private:
359 /// Syncs the depth test states to match the PICA register 372 /// Syncs the depth test states to match the PICA register
360 void SyncDepthTest(); 373 void SyncDepthTest();
361 374
362 /// Syncs the TEV constant color to match the PICA register
363 void SyncTevConstColor(int tev_index, const Pica::Regs::TevStageConfig& tev_stage);
364
365 /// Syncs the TEV combiner color buffer to match the PICA register 375 /// Syncs the TEV combiner color buffer to match the PICA register
366 void SyncCombinerColor(); 376 void SyncCombinerColor();
367 377
378 /// Syncs the TEV constant color to match the PICA register
379 void SyncTevConstColor(int tev_index, const Pica::Regs::TevStageConfig& tev_stage);
380
368 /// Syncs the lighting global ambient color to match the PICA register 381 /// Syncs the lighting global ambient color to match the PICA register
369 void SyncGlobalAmbient(); 382 void SyncGlobalAmbient();
370 383
371 /// Syncs the lighting lookup tables 384 /// Syncs the lighting lookup tables
372 void SyncLightingLUT(unsigned index); 385 void SyncLightingLUT(unsigned index);
373 386
374 /// Syncs the specified light's diffuse color to match the PICA register
375 void SyncLightDiffuse(int light_index);
376
377 /// Syncs the specified light's ambient color to match the PICA register
378 void SyncLightAmbient(int light_index);
379
380 /// Syncs the specified light's position to match the PICA register
381 void SyncLightPosition(int light_index);
382
383 /// Syncs the specified light's specular 0 color to match the PICA register 387 /// Syncs the specified light's specular 0 color to match the PICA register
384 void SyncLightSpecular0(int light_index); 388 void SyncLightSpecular0(int light_index);
385 389
386 /// Syncs the specified light's specular 1 color to match the PICA register 390 /// Syncs the specified light's specular 1 color to match the PICA register
387 void SyncLightSpecular1(int light_index); 391 void SyncLightSpecular1(int light_index);
388 392
389 /// Syncs the remaining OpenGL drawing state to match the current PICA state 393 /// Syncs the specified light's diffuse color to match the PICA register
390 void SyncDrawState(); 394 void SyncLightDiffuse(int light_index);
391
392 /// Copies the 3DS color framebuffer into the OpenGL color framebuffer texture
393 void ReloadColorBuffer();
394 395
395 /// Copies the 3DS depth framebuffer into the OpenGL depth framebuffer texture 396 /// Syncs the specified light's ambient color to match the PICA register
396 void ReloadDepthBuffer(); 397 void SyncLightAmbient(int light_index);
397 398
398 /** 399 /// Syncs the specified light's position to match the PICA register
399 * Save the current OpenGL color framebuffer to the current PICA framebuffer in 3DS memory 400 void SyncLightPosition(int light_index);
400 * Loads the OpenGL framebuffer textures into temporary buffers
401 * Then copies into the 3DS framebuffer using proper Morton order
402 */
403 void CommitColorBuffer();
404 401
405 /** 402 OpenGLState state;
406 * Save the current OpenGL depth framebuffer to the current PICA framebuffer in 3DS memory
407 * Loads the OpenGL framebuffer textures into temporary buffers
408 * Then copies into the 3DS framebuffer using proper Morton order
409 */
410 void CommitDepthBuffer();
411 403
412 RasterizerCacheOpenGL res_cache; 404 RasterizerCacheOpenGL res_cache;
413 405
414 std::vector<HardwareVertex> vertex_batch; 406 std::vector<HardwareVertex> vertex_batch;
415 407
416 OpenGLState state;
417
418 PAddr cached_fb_color_addr;
419 PAddr cached_fb_depth_addr;
420
421 // Hardware rasterizer
422 std::array<SamplerInfo, 3> texture_samplers;
423 TextureInfo fb_color_texture;
424 DepthTextureInfo fb_depth_texture;
425
426 std::unordered_map<PicaShaderConfig, std::unique_ptr<PicaShader>> shader_cache; 408 std::unordered_map<PicaShaderConfig, std::unique_ptr<PicaShader>> shader_cache;
427 const PicaShader* current_shader = nullptr; 409 const PicaShader* current_shader = nullptr;
410 bool shader_dirty;
428 411
429 struct { 412 struct {
430 UniformData data; 413 UniformData data;
@@ -432,11 +415,12 @@ private:
432 bool dirty; 415 bool dirty;
433 } uniform_block_data; 416 } uniform_block_data;
434 417
418 std::array<SamplerInfo, 3> texture_samplers;
435 OGLVertexArray vertex_array; 419 OGLVertexArray vertex_array;
436 OGLBuffer vertex_buffer; 420 OGLBuffer vertex_buffer;
437 OGLBuffer uniform_buffer; 421 OGLBuffer uniform_buffer;
438 OGLFramebuffer framebuffer; 422 OGLFramebuffer framebuffer;
439 423
440 std::array<OGLTexture, 6> lighting_lut; 424 std::array<OGLTexture, 6> lighting_luts;
441 std::array<std::array<GLvec4, 256>, 6> lighting_lut_data; 425 std::array<std::array<GLvec4, 256>, 6> lighting_lut_data;
442}; 426};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 1323c12e4..7efd0038a 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -2,9 +2,19 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <memory> 5#include <algorithm>
6#include <atomic>
7#include <cstring>
8#include <iterator>
9#include <unordered_set>
10#include <utility>
11#include <vector>
6 12
7#include "common/hash.h" 13#include <glad/glad.h>
14
15#include "common/bit_field.h"
16#include "common/emu_window.h"
17#include "common/logging/log.h"
8#include "common/math_util.h" 18#include "common/math_util.h"
9#include "common/microprofile.h" 19#include "common/microprofile.h"
10#include "common/vector_math.h" 20#include "common/vector_math.h"
@@ -12,71 +22,693 @@
12#include "core/memory.h" 22#include "core/memory.h"
13 23
14#include "video_core/debug_utils/debug_utils.h" 24#include "video_core/debug_utils/debug_utils.h"
25#include "video_core/pica_state.h"
15#include "video_core/renderer_opengl/gl_rasterizer_cache.h" 26#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
16#include "video_core/renderer_opengl/pica_to_gl.h" 27#include "video_core/renderer_opengl/gl_state.h"
28#include "video_core/utils.h"
29#include "video_core/video_core.h"
30
31struct FormatTuple {
32 GLint internal_format;
33 GLenum format;
34 GLenum type;
35};
36
37static const std::array<FormatTuple, 5> fb_format_tuples = {{
38 { GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8 }, // RGBA8
39 { GL_RGB8, GL_BGR, GL_UNSIGNED_BYTE }, // RGB8
40 { GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1 }, // RGB5A1
41 { GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5 }, // RGB565
42 { GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4 }, // RGBA4
43}};
44
45static const std::array<FormatTuple, 4> depth_format_tuples = {{
46 { GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT }, // D16
47 {},
48 { GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT }, // D24
49 { GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8 }, // D24S8
50}};
51
52RasterizerCacheOpenGL::RasterizerCacheOpenGL() {
53 transfer_framebuffers[0].Create();
54 transfer_framebuffers[1].Create();
55}
17 56
18RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { 57RasterizerCacheOpenGL::~RasterizerCacheOpenGL() {
19 InvalidateAll(); 58 FlushAll();
59}
60
61static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width, u32 height, u32 bytes_per_pixel, u32 gl_bytes_per_pixel, u8* morton_data, u8* gl_data, bool morton_to_gl) {
62 using PixelFormat = CachedSurface::PixelFormat;
63
64 u8* data_ptrs[2];
65 u32 depth_stencil_shifts[2] = {24, 8};
66
67 if (morton_to_gl) {
68 std::swap(depth_stencil_shifts[0], depth_stencil_shifts[1]);
69 }
70
71 if (pixel_format == PixelFormat::D24S8) {
72 for (unsigned y = 0; y < height; ++y) {
73 for (unsigned x = 0; x < width; ++x) {
74 const u32 coarse_y = y & ~7;
75 u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel;
76 u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel;
77
78 data_ptrs[morton_to_gl] = morton_data + morton_offset;
79 data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index];
80
81 // Swap depth and stencil value ordering since 3DS does not match OpenGL
82 u32 depth_stencil;
83 memcpy(&depth_stencil, data_ptrs[1], sizeof(u32));
84 depth_stencil = (depth_stencil << depth_stencil_shifts[0]) | (depth_stencil >> depth_stencil_shifts[1]);
85
86 memcpy(data_ptrs[0], &depth_stencil, sizeof(u32));
87 }
88 }
89 } else {
90 for (unsigned y = 0; y < height; ++y) {
91 for (unsigned x = 0; x < width; ++x) {
92 const u32 coarse_y = y & ~7;
93 u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel;
94 u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel;
95
96 data_ptrs[morton_to_gl] = morton_data + morton_offset;
97 data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index];
98
99 memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
100 }
101 }
102 }
103}
104
105bool RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, CachedSurface::SurfaceType type, const MathUtil::Rectangle<int>& src_rect, const MathUtil::Rectangle<int>& dst_rect) {
106 using SurfaceType = CachedSurface::SurfaceType;
107
108 OpenGLState cur_state = OpenGLState::GetCurState();
109
110 // Make sure textures aren't bound to texture units, since going to bind them to framebuffer components
111 OpenGLState::ResetTexture(src_tex);
112 OpenGLState::ResetTexture(dst_tex);
113
114 // Keep track of previous framebuffer bindings
115 GLuint old_fbs[2] = { cur_state.draw.read_framebuffer, cur_state.draw.draw_framebuffer };
116 cur_state.draw.read_framebuffer = transfer_framebuffers[0].handle;
117 cur_state.draw.draw_framebuffer = transfer_framebuffers[1].handle;
118 cur_state.Apply();
119
120 u32 buffers = 0;
121
122 if (type == SurfaceType::Color || type == SurfaceType::Texture) {
123 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex, 0);
124 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
125
126 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex, 0);
127 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
128
129 buffers = GL_COLOR_BUFFER_BIT;
130 } else if (type == SurfaceType::Depth) {
131 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
132 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0);
133 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
134
135 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
136 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0);
137 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
138
139 buffers = GL_DEPTH_BUFFER_BIT;
140 } else if (type == SurfaceType::DepthStencil) {
141 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
142 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0);
143
144 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
145 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0);
146
147 buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
148 }
149
150 if (OpenGLState::CheckFBStatus(GL_READ_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
151 return false;
152 }
153
154 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
155 return false;
156 }
157
158 glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
159 dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom,
160 buffers, buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST);
161
162 // Restore previous framebuffer bindings
163 cur_state.draw.read_framebuffer = old_fbs[0];
164 cur_state.draw.draw_framebuffer = old_fbs[1];
165 cur_state.Apply();
166
167 return true;
168}
169
170bool RasterizerCacheOpenGL::TryBlitSurfaces(CachedSurface* src_surface, const MathUtil::Rectangle<int>& src_rect, CachedSurface* dst_surface, const MathUtil::Rectangle<int>& dst_rect) {
171 using SurfaceType = CachedSurface::SurfaceType;
172
173 if (!CachedSurface::CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format)) {
174 return false;
175 }
176
177 return BlitTextures(src_surface->texture.handle, dst_surface->texture.handle, CachedSurface::GetFormatType(src_surface->pixel_format), src_rect, dst_rect);
178}
179
180static void AllocateSurfaceTexture(GLuint texture, CachedSurface::PixelFormat pixel_format, u32 width, u32 height) {
181 // Allocate an uninitialized texture of appropriate size and format for the surface
182 using SurfaceType = CachedSurface::SurfaceType;
183
184 OpenGLState cur_state = OpenGLState::GetCurState();
185
186 // Keep track of previous texture bindings
187 GLuint old_tex = cur_state.texture_units[0].texture_2d;
188 cur_state.texture_units[0].texture_2d = texture;
189 cur_state.Apply();
190 glActiveTexture(GL_TEXTURE0);
191
192 SurfaceType type = CachedSurface::GetFormatType(pixel_format);
193
194 FormatTuple tuple;
195 if (type == SurfaceType::Color) {
196 ASSERT((size_t)pixel_format < fb_format_tuples.size());
197 tuple = fb_format_tuples[(unsigned int)pixel_format];
198 } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) {
199 size_t tuple_idx = (size_t)pixel_format - 14;
200 ASSERT(tuple_idx < depth_format_tuples.size());
201 tuple = depth_format_tuples[tuple_idx];
202 } else {
203 tuple = { GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE };
204 }
205
206 glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, width, height, 0,
207 tuple.format, tuple.type, nullptr);
208
209 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
210 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
211 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
212 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
213
214 // Restore previous texture bindings
215 cur_state.texture_units[0].texture_2d = old_tex;
216 cur_state.Apply();
20} 217}
21 218
22MICROPROFILE_DEFINE(OpenGL_TextureUpload, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192)); 219MICROPROFILE_DEFINE(OpenGL_SurfaceUpload, "OpenGL", "Surface Upload", MP_RGB(128, 64, 192));
220CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bool match_res_scale, bool load_if_create) {
221 using PixelFormat = CachedSurface::PixelFormat;
222 using SurfaceType = CachedSurface::SurfaceType;
223
224 if (params.addr == 0) {
225 return nullptr;
226 }
227
228 u32 params_size = params.width * params.height * CachedSurface::GetFormatBpp(params.pixel_format) / 8;
229
230 // Check for an exact match in existing surfaces
231 CachedSurface* best_exact_surface = nullptr;
232 float exact_surface_goodness = -1.f;
233
234 auto surface_interval = boost::icl::interval<PAddr>::right_open(params.addr, params.addr + params_size);
235 auto range = surface_cache.equal_range(surface_interval);
236 for (auto it = range.first; it != range.second; ++it) {
237 for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) {
238 CachedSurface* surface = it2->get();
239
240 // Check if the request matches the surface exactly
241 if (params.addr == surface->addr &&
242 params.width == surface->width && params.height == surface->height &&
243 params.pixel_format == surface->pixel_format)
244 {
245 // Make sure optional param-matching criteria are fulfilled
246 bool tiling_match = (params.is_tiled == surface->is_tiled);
247 bool res_scale_match = (params.res_scale_width == surface->res_scale_width && params.res_scale_height == surface->res_scale_height);
248 if (!match_res_scale || res_scale_match) {
249 // Prioritize same-tiling and highest resolution surfaces
250 float match_goodness = (float)tiling_match + surface->res_scale_width * surface->res_scale_height;
251 if (match_goodness > exact_surface_goodness || surface->dirty) {
252 exact_surface_goodness = match_goodness;
253 best_exact_surface = surface;
254 }
255 }
256 }
257 }
258 }
259
260 // Return the best exact surface if found
261 if (best_exact_surface != nullptr) {
262 return best_exact_surface;
263 }
264
265 // No matching surfaces found, so create a new one
266 u8* texture_src_data = Memory::GetPhysicalPointer(params.addr);
267 if (texture_src_data == nullptr) {
268 return nullptr;
269 }
270
271 MICROPROFILE_SCOPE(OpenGL_SurfaceUpload);
272
273 std::shared_ptr<CachedSurface> new_surface = std::make_shared<CachedSurface>();
23 274
24void RasterizerCacheOpenGL::LoadAndBindTexture(OpenGLState &state, unsigned texture_unit, const Pica::DebugUtils::TextureInfo& info) { 275 new_surface->addr = params.addr;
25 const auto cached_texture = texture_cache.find(info.physical_address); 276 new_surface->size = params_size;
26 277
27 if (cached_texture != texture_cache.end()) { 278 new_surface->texture.Create();
28 state.texture_units[texture_unit].texture_2d = cached_texture->second->texture.handle; 279 new_surface->width = params.width;
29 state.Apply(); 280 new_surface->height = params.height;
281 new_surface->stride = params.stride;
282 new_surface->res_scale_width = params.res_scale_width;
283 new_surface->res_scale_height = params.res_scale_height;
284
285 new_surface->is_tiled = params.is_tiled;
286 new_surface->pixel_format = params.pixel_format;
287 new_surface->dirty = false;
288
289 if (!load_if_create) {
290 // Don't load any data; just allocate the surface's texture
291 AllocateSurfaceTexture(new_surface->texture.handle, new_surface->pixel_format, new_surface->GetScaledWidth(), new_surface->GetScaledHeight());
30 } else { 292 } else {
31 MICROPROFILE_SCOPE(OpenGL_TextureUpload); 293 // TODO: Consider attempting subrect match in existing surfaces and direct blit here instead of memory upload below if that's a common scenario in some game
294
295 Memory::RasterizerFlushRegion(params.addr, params_size);
296
297 // Load data from memory to the new surface
298 OpenGLState cur_state = OpenGLState::GetCurState();
299
300 GLuint old_tex = cur_state.texture_units[0].texture_2d;
301 cur_state.texture_units[0].texture_2d = new_surface->texture.handle;
302 cur_state.Apply();
303 glActiveTexture(GL_TEXTURE0);
304
305 glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)new_surface->stride);
306 if (!new_surface->is_tiled) {
307 // TODO: Ensure this will always be a color format, not a depth or other format
308 ASSERT((size_t)new_surface->pixel_format < fb_format_tuples.size());
309 const FormatTuple& tuple = fb_format_tuples[(unsigned int)params.pixel_format];
310
311 glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0,
312 tuple.format, tuple.type, texture_src_data);
313 } else {
314 SurfaceType type = CachedSurface::GetFormatType(new_surface->pixel_format);
315 if (type != SurfaceType::Depth && type != SurfaceType::DepthStencil) {
316 FormatTuple tuple;
317 if ((size_t)params.pixel_format < fb_format_tuples.size()) {
318 tuple = fb_format_tuples[(unsigned int)params.pixel_format];
319 } else {
320 // Texture
321 tuple = { GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE };
322 }
323
324 std::vector<Math::Vec4<u8>> tex_buffer(params.width * params.height);
32 325
33 std::unique_ptr<CachedTexture> new_texture = std::make_unique<CachedTexture>(); 326 Pica::DebugUtils::TextureInfo tex_info;
327 tex_info.width = params.width;
328 tex_info.height = params.height;
329 tex_info.stride = params.width * CachedSurface::GetFormatBpp(params.pixel_format) / 8;
330 tex_info.format = (Pica::Regs::TextureFormat)params.pixel_format;
331 tex_info.physical_address = params.addr;
34 332
35 new_texture->texture.Create(); 333 for (unsigned y = 0; y < params.height; ++y) {
36 state.texture_units[texture_unit].texture_2d = new_texture->texture.handle; 334 for (unsigned x = 0; x < params.width; ++x) {
37 state.Apply(); 335 tex_buffer[x + params.width * y] = Pica::DebugUtils::LookupTexture(texture_src_data, x, params.height - 1 - y, tex_info);
38 glActiveTexture(GL_TEXTURE0 + texture_unit); 336 }
337 }
39 338
40 u8* texture_src_data = Memory::GetPhysicalPointer(info.physical_address); 339 glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0, GL_RGBA, GL_UNSIGNED_BYTE, tex_buffer.data());
340 } else {
341 // Depth/Stencil formats need special treatment since they aren't sampleable using LookupTexture and can't use RGBA format
342 size_t tuple_idx = (size_t)params.pixel_format - 14;
343 ASSERT(tuple_idx < depth_format_tuples.size());
344 const FormatTuple& tuple = depth_format_tuples[tuple_idx];
41 345
42 new_texture->width = info.width; 346 u32 bytes_per_pixel = CachedSurface::GetFormatBpp(params.pixel_format) / 8;
43 new_texture->height = info.height;
44 new_texture->size = info.stride * info.height;
45 new_texture->addr = info.physical_address;
46 new_texture->hash = Common::ComputeHash64(texture_src_data, new_texture->size);
47 347
48 std::unique_ptr<Math::Vec4<u8>[]> temp_texture_buffer_rgba(new Math::Vec4<u8>[info.width * info.height]); 348 // OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type
349 bool use_4bpp = (params.pixel_format == PixelFormat::D24);
49 350
50 for (int y = 0; y < info.height; ++y) { 351 u32 gl_bytes_per_pixel = use_4bpp ? 4 : bytes_per_pixel;
51 for (int x = 0; x < info.width; ++x) { 352
52 temp_texture_buffer_rgba[x + info.width * y] = Pica::DebugUtils::LookupTexture(texture_src_data, x, info.height - 1 - y, info); 353 std::vector<u8> temp_fb_depth_buffer(params.width * params.height * gl_bytes_per_pixel);
354
355 u8* temp_fb_depth_buffer_ptr = use_4bpp ? temp_fb_depth_buffer.data() + 1 : temp_fb_depth_buffer.data();
356
357 MortonCopyPixels(params.pixel_format, params.width, params.height, bytes_per_pixel, gl_bytes_per_pixel, texture_src_data, temp_fb_depth_buffer_ptr, true);
358
359 glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0,
360 tuple.format, tuple.type, temp_fb_depth_buffer.data());
53 } 361 }
54 } 362 }
363 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
364
365 // If not 1x scale, blit 1x texture to a new scaled texture and replace texture in surface
366 if (new_surface->res_scale_width != 1.f || new_surface->res_scale_height != 1.f) {
367 OGLTexture scaled_texture;
368 scaled_texture.Create();
369
370 AllocateSurfaceTexture(scaled_texture.handle, new_surface->pixel_format, new_surface->GetScaledWidth(), new_surface->GetScaledHeight());
371 BlitTextures(new_surface->texture.handle, scaled_texture.handle, CachedSurface::GetFormatType(new_surface->pixel_format),
372 MathUtil::Rectangle<int>(0, 0, new_surface->width, new_surface->height),
373 MathUtil::Rectangle<int>(0, 0, new_surface->GetScaledWidth(), new_surface->GetScaledHeight()));
374
375 new_surface->texture.Release();
376 new_surface->texture.handle = scaled_texture.handle;
377 scaled_texture.handle = 0;
378 cur_state.texture_units[0].texture_2d = new_surface->texture.handle;
379 cur_state.Apply();
380 }
55 381
56 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, info.width, info.height, 0, GL_RGBA, GL_UNSIGNED_BYTE, temp_texture_buffer_rgba.get()); 382 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
383 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
384 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
385 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
57 386
58 texture_cache.emplace(info.physical_address, std::move(new_texture)); 387 cur_state.texture_units[0].texture_2d = old_tex;
388 cur_state.Apply();
59 } 389 }
390
391 Memory::RasterizerMarkRegionCached(new_surface->addr, new_surface->size, 1);
392 surface_cache.add(std::make_pair(boost::icl::interval<PAddr>::right_open(new_surface->addr, new_surface->addr + new_surface->size), std::set<std::shared_ptr<CachedSurface>>({ new_surface })));
393 return new_surface.get();
60} 394}
61 395
62void RasterizerCacheOpenGL::InvalidateInRange(PAddr addr, u32 size, bool ignore_hash) { 396CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params, bool match_res_scale, bool load_if_create, MathUtil::Rectangle<int>& out_rect) {
63 // TODO: Optimize by also inserting upper bound (addr + size) of each texture into the same map and also narrow using lower_bound 397 if (params.addr == 0) {
64 auto cache_upper_bound = texture_cache.upper_bound(addr + size); 398 return nullptr;
399 }
400
401 u32 total_pixels = params.width * params.height;
402 u32 params_size = total_pixels * CachedSurface::GetFormatBpp(params.pixel_format) / 8;
65 403
66 for (auto it = texture_cache.begin(); it != cache_upper_bound;) { 404 // Attempt to find encompassing surfaces
67 const auto& info = *it->second; 405 CachedSurface* best_subrect_surface = nullptr;
406 float subrect_surface_goodness = -1.f;
68 407
69 // Flush the texture only if the memory region intersects and a change is detected 408 auto surface_interval = boost::icl::interval<PAddr>::right_open(params.addr, params.addr + params_size);
70 if (MathUtil::IntervalsIntersect(addr, size, info.addr, info.size) && 409 auto cache_upper_bound = surface_cache.upper_bound(surface_interval);
71 (ignore_hash || info.hash != Common::ComputeHash64(Memory::GetPhysicalPointer(info.addr), info.size))) { 410 for (auto it = surface_cache.lower_bound(surface_interval); it != cache_upper_bound; ++it) {
411 for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) {
412 CachedSurface* surface = it2->get();
72 413
73 it = texture_cache.erase(it); 414 // Check if the request is contained in the surface
415 if (params.addr >= surface->addr &&
416 params.addr + params_size - 1 <= surface->addr + surface->size - 1 &&
417 params.pixel_format == surface->pixel_format)
418 {
419 // Make sure optional param-matching criteria are fulfilled
420 bool tiling_match = (params.is_tiled == surface->is_tiled);
421 bool res_scale_match = (params.res_scale_width == surface->res_scale_width && params.res_scale_height == surface->res_scale_height);
422 if (!match_res_scale || res_scale_match) {
423 // Prioritize same-tiling and highest resolution surfaces
424 float match_goodness = (float)tiling_match + surface->res_scale_width * surface->res_scale_height;
425 if (match_goodness > subrect_surface_goodness || surface->dirty) {
426 subrect_surface_goodness = match_goodness;
427 best_subrect_surface = surface;
428 }
429 }
430 }
431 }
432 }
433
434 // Return the best subrect surface if found
435 if (best_subrect_surface != nullptr) {
436 unsigned int bytes_per_pixel = (CachedSurface::GetFormatBpp(best_subrect_surface->pixel_format) / 8);
437
438 int x0, y0;
439
440 if (!params.is_tiled) {
441 u32 begin_pixel_index = (params.addr - best_subrect_surface->addr) / bytes_per_pixel;
442 x0 = begin_pixel_index % best_subrect_surface->width;
443 y0 = begin_pixel_index / best_subrect_surface->width;
444
445 out_rect = MathUtil::Rectangle<int>(x0, y0, x0 + params.width, y0 + params.height);
446 } else {
447 u32 bytes_per_tile = 8 * 8 * bytes_per_pixel;
448 u32 tiles_per_row = best_subrect_surface->width / 8;
449
450 u32 begin_tile_index = (params.addr - best_subrect_surface->addr) / bytes_per_tile;
451 x0 = begin_tile_index % tiles_per_row * 8;
452 y0 = begin_tile_index / tiles_per_row * 8;
453
454 // Tiled surfaces are flipped vertically in the rasterizer vs. 3DS memory.
455 out_rect = MathUtil::Rectangle<int>(x0, best_subrect_surface->height - y0, x0 + params.width, best_subrect_surface->height - (y0 + params.height));
456 }
457
458 out_rect.left = (int)(out_rect.left * best_subrect_surface->res_scale_width);
459 out_rect.right = (int)(out_rect.right * best_subrect_surface->res_scale_width);
460 out_rect.top = (int)(out_rect.top * best_subrect_surface->res_scale_height);
461 out_rect.bottom = (int)(out_rect.bottom * best_subrect_surface->res_scale_height);
462
463 return best_subrect_surface;
464 }
465
466 // No subrect found - create and return a new surface
467 if (!params.is_tiled) {
468 out_rect = MathUtil::Rectangle<int>(0, 0, (int)(params.width * params.res_scale_width), (int)(params.height * params.res_scale_height));
469 } else {
470 out_rect = MathUtil::Rectangle<int>(0, (int)(params.height * params.res_scale_height), (int)(params.width * params.res_scale_width), 0);
471 }
472
473 return GetSurface(params, match_res_scale, load_if_create);
474}
475
476CachedSurface* RasterizerCacheOpenGL::GetTextureSurface(const Pica::Regs::FullTextureConfig& config) {
477 Pica::DebugUtils::TextureInfo info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config.config, config.format);
478
479 CachedSurface params;
480 params.addr = info.physical_address;
481 params.width = info.width;
482 params.height = info.height;
483 params.is_tiled = true;
484 params.pixel_format = CachedSurface::PixelFormatFromTextureFormat(info.format);
485 return GetSurface(params, false, true);
486}
487
488std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> RasterizerCacheOpenGL::GetFramebufferSurfaces(const Pica::Regs::FramebufferConfig& config) {
489 const auto& regs = Pica::g_state.regs;
490
491 // Make sur that framebuffers don't overlap if both color and depth are being used
492 u32 fb_area = config.GetWidth() * config.GetHeight();
493 bool framebuffers_overlap = config.GetColorBufferPhysicalAddress() != 0 &&
494 config.GetDepthBufferPhysicalAddress() != 0 &&
495 MathUtil::IntervalsIntersect(config.GetColorBufferPhysicalAddress(), fb_area * GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(config.color_format.Value())),
496 config.GetDepthBufferPhysicalAddress(), fb_area * Pica::Regs::BytesPerDepthPixel(config.depth_format));
497 bool using_color_fb = config.GetColorBufferPhysicalAddress() != 0;
498 bool using_depth_fb = config.GetDepthBufferPhysicalAddress() != 0 && (regs.output_merger.depth_test_enable || regs.output_merger.depth_write_enable || !framebuffers_overlap);
499
500 if (framebuffers_overlap && using_color_fb && using_depth_fb) {
501 LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; overlapping framebuffers not supported!");
502 using_depth_fb = false;
503 }
504
505 // get color and depth surfaces
506 CachedSurface color_params;
507 CachedSurface depth_params;
508 color_params.width = depth_params.width = config.GetWidth();
509 color_params.height = depth_params.height = config.GetHeight();
510 color_params.is_tiled = depth_params.is_tiled = true;
511 if (VideoCore::g_scaled_resolution_enabled) {
512 auto layout = VideoCore::g_emu_window->GetFramebufferLayout();
513
514 // Assume same scaling factor for top and bottom screens
515 color_params.res_scale_width = depth_params.res_scale_width = (float)layout.top_screen.GetWidth() / VideoCore::kScreenTopWidth;
516 color_params.res_scale_height = depth_params.res_scale_height = (float)layout.top_screen.GetHeight() / VideoCore::kScreenTopHeight;
517 }
518
519 color_params.addr = config.GetColorBufferPhysicalAddress();
520 color_params.pixel_format = CachedSurface::PixelFormatFromColorFormat(config.color_format);
521
522 depth_params.addr = config.GetDepthBufferPhysicalAddress();
523 depth_params.pixel_format = CachedSurface::PixelFormatFromDepthFormat(config.depth_format);
524
525 MathUtil::Rectangle<int> color_rect;
526 CachedSurface* color_surface = using_color_fb ? GetSurfaceRect(color_params, true, true, color_rect) : nullptr;
527
528 MathUtil::Rectangle<int> depth_rect;
529 CachedSurface* depth_surface = using_depth_fb ? GetSurfaceRect(depth_params, true, true, depth_rect) : nullptr;
530
531 // Sanity check to make sure found surfaces aren't the same
532 if (using_depth_fb && using_color_fb && color_surface == depth_surface) {
533 LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer surfaces overlap; overlapping surfaces not supported!");
534 using_depth_fb = false;
535 depth_surface = nullptr;
536 }
537
538 MathUtil::Rectangle<int> rect;
539
540 if (color_surface != nullptr && depth_surface != nullptr && (depth_rect.left != color_rect.left || depth_rect.top != color_rect.top)) {
541 // Can't specify separate color and depth viewport offsets in OpenGL, so re-zero both if they don't match
542 if (color_rect.left != 0 || color_rect.top != 0) {
543 color_surface = GetSurface(color_params, true, true);
544 }
545
546 if (depth_rect.left != 0 || depth_rect.top != 0) {
547 depth_surface = GetSurface(depth_params, true, true);
548 }
549
550 if (!color_surface->is_tiled) {
551 rect = MathUtil::Rectangle<int>(0, 0, (int)(color_params.width * color_params.res_scale_width), (int)(color_params.height * color_params.res_scale_height));
74 } else { 552 } else {
75 ++it; 553 rect = MathUtil::Rectangle<int>(0, (int)(color_params.height * color_params.res_scale_height), (int)(color_params.width * color_params.res_scale_width), 0);
76 } 554 }
555 } else if (color_surface != nullptr) {
556 rect = color_rect;
557 } else if (depth_surface != nullptr) {
558 rect = depth_rect;
559 } else {
560 rect = MathUtil::Rectangle<int>(0, 0, 0, 0);
77 } 561 }
562
563 return std::make_tuple(color_surface, depth_surface, rect);
78} 564}
79 565
80void RasterizerCacheOpenGL::InvalidateAll() { 566CachedSurface* RasterizerCacheOpenGL::TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config) {
81 texture_cache.clear(); 567 auto surface_interval = boost::icl::interval<PAddr>::right_open(config.GetStartAddress(), config.GetEndAddress());
568 auto range = surface_cache.equal_range(surface_interval);
569 for (auto it = range.first; it != range.second; ++it) {
570 for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) {
571 int bits_per_value = 0;
572 if (config.fill_24bit) {
573 bits_per_value = 24;
574 } else if (config.fill_32bit) {
575 bits_per_value = 32;
576 } else {
577 bits_per_value = 16;
578 }
579
580 CachedSurface* surface = it2->get();
581
582 if (surface->addr == config.GetStartAddress() &&
583 CachedSurface::GetFormatBpp(surface->pixel_format) == bits_per_value &&
584 (surface->width * surface->height * CachedSurface::GetFormatBpp(surface->pixel_format) / 8) == (config.GetEndAddress() - config.GetStartAddress()))
585 {
586 return surface;
587 }
588 }
589 }
590
591 return nullptr;
592}
593
594MICROPROFILE_DEFINE(OpenGL_SurfaceDownload, "OpenGL", "Surface Download", MP_RGB(128, 192, 64));
595void RasterizerCacheOpenGL::FlushSurface(CachedSurface* surface) {
596 using PixelFormat = CachedSurface::PixelFormat;
597 using SurfaceType = CachedSurface::SurfaceType;
598
599 if (!surface->dirty) {
600 return;
601 }
602
603 MICROPROFILE_SCOPE(OpenGL_SurfaceDownload);
604
605 u8* dst_buffer = Memory::GetPhysicalPointer(surface->addr);
606 if (dst_buffer == nullptr) {
607 return;
608 }
609
610 OpenGLState cur_state = OpenGLState::GetCurState();
611 GLuint old_tex = cur_state.texture_units[0].texture_2d;
612
613 OGLTexture unscaled_tex;
614 GLuint texture_to_flush = surface->texture.handle;
615
616 // If not 1x scale, blit scaled texture to a new 1x texture and use that to flush
617 if (surface->res_scale_width != 1.f || surface->res_scale_height != 1.f) {
618 unscaled_tex.Create();
619
620 AllocateSurfaceTexture(unscaled_tex.handle, surface->pixel_format, surface->width, surface->height);
621 BlitTextures(surface->texture.handle, unscaled_tex.handle, CachedSurface::GetFormatType(surface->pixel_format),
622 MathUtil::Rectangle<int>(0, 0, surface->GetScaledWidth(), surface->GetScaledHeight()),
623 MathUtil::Rectangle<int>(0, 0, surface->width, surface->height));
624
625 texture_to_flush = unscaled_tex.handle;
626 }
627
628 cur_state.texture_units[0].texture_2d = texture_to_flush;
629 cur_state.Apply();
630 glActiveTexture(GL_TEXTURE0);
631
632 glPixelStorei(GL_PACK_ROW_LENGTH, (GLint)surface->stride);
633 if (!surface->is_tiled) {
634 // TODO: Ensure this will always be a color format, not a depth or other format
635 ASSERT((size_t)surface->pixel_format < fb_format_tuples.size());
636 const FormatTuple& tuple = fb_format_tuples[(unsigned int)surface->pixel_format];
637
638 glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, dst_buffer);
639 } else {
640 SurfaceType type = CachedSurface::GetFormatType(surface->pixel_format);
641 if (type != SurfaceType::Depth && type != SurfaceType::DepthStencil) {
642 ASSERT((size_t)surface->pixel_format < fb_format_tuples.size());
643 const FormatTuple& tuple = fb_format_tuples[(unsigned int)surface->pixel_format];
644
645 u32 bytes_per_pixel = CachedSurface::GetFormatBpp(surface->pixel_format) / 8;
646
647 std::vector<u8> temp_gl_buffer(surface->width * surface->height * bytes_per_pixel);
648
649 glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data());
650
651 // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion is necessary.
652 MortonCopyPixels(surface->pixel_format, surface->width, surface->height, bytes_per_pixel, bytes_per_pixel, dst_buffer, temp_gl_buffer.data(), false);
653 } else {
654 // Depth/Stencil formats need special treatment since they aren't sampleable using LookupTexture and can't use RGBA format
655 size_t tuple_idx = (size_t)surface->pixel_format - 14;
656 ASSERT(tuple_idx < depth_format_tuples.size());
657 const FormatTuple& tuple = depth_format_tuples[tuple_idx];
658
659 u32 bytes_per_pixel = CachedSurface::GetFormatBpp(surface->pixel_format) / 8;
660
661 // OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type
662 bool use_4bpp = (surface->pixel_format == PixelFormat::D24);
663
664 u32 gl_bytes_per_pixel = use_4bpp ? 4 : bytes_per_pixel;
665
666 std::vector<u8> temp_gl_buffer(surface->width * surface->height * gl_bytes_per_pixel);
667
668 glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data());
669
670 u8* temp_gl_buffer_ptr = use_4bpp ? temp_gl_buffer.data() + 1 : temp_gl_buffer.data();
671
672 MortonCopyPixels(surface->pixel_format, surface->width, surface->height, bytes_per_pixel, gl_bytes_per_pixel, dst_buffer, temp_gl_buffer_ptr, false);
673 }
674 }
675 glPixelStorei(GL_PACK_ROW_LENGTH, 0);
676
677 surface->dirty = false;
678
679 cur_state.texture_units[0].texture_2d = old_tex;
680 cur_state.Apply();
681}
682
683void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, bool invalidate) {
684 if (size == 0) {
685 return;
686 }
687
688 // Gather up unique surfaces that touch the region
689 std::unordered_set<std::shared_ptr<CachedSurface>> touching_surfaces;
690
691 auto surface_interval = boost::icl::interval<PAddr>::right_open(addr, addr + size);
692 auto cache_upper_bound = surface_cache.upper_bound(surface_interval);
693 for (auto it = surface_cache.lower_bound(surface_interval); it != cache_upper_bound; ++it) {
694 std::copy_if(it->second.begin(), it->second.end(), std::inserter(touching_surfaces, touching_surfaces.end()),
695 [skip_surface](std::shared_ptr<CachedSurface> surface) { return (surface.get() != skip_surface); });
696 }
697
698 // Flush and invalidate surfaces
699 for (auto surface : touching_surfaces) {
700 FlushSurface(surface.get());
701 if (invalidate) {
702 Memory::RasterizerMarkRegionCached(surface->addr, surface->size, -1);
703 surface_cache.subtract(std::make_pair(boost::icl::interval<PAddr>::right_open(surface->addr, surface->addr + surface->size), std::set<std::shared_ptr<CachedSurface>>({ surface })));
704 }
705 }
706}
707
708void RasterizerCacheOpenGL::FlushAll() {
709 for (auto& surfaces : surface_cache) {
710 for (auto& surface : surfaces.second) {
711 FlushSurface(surface.get());
712 }
713 }
82} 714}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index b69651427..225596415 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -4,40 +4,219 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <map> 7#include <array>
8#include <memory> 8#include <memory>
9#include <set>
10#include <tuple>
11
12#include <boost/icl/interval_map.hpp>
13#include <glad/glad.h>
14
15#include "common/assert.h"
16#include "common/common_funcs.h"
17#include "common/common_types.h"
18
19#include "core/hw/gpu.h"
9 20
10#include "video_core/pica.h" 21#include "video_core/pica.h"
11#include "video_core/debug_utils/debug_utils.h"
12#include "video_core/renderer_opengl/gl_resource_manager.h" 22#include "video_core/renderer_opengl/gl_resource_manager.h"
13#include "video_core/renderer_opengl/gl_state.h" 23
24namespace MathUtil {
25template <class T> struct Rectangle;
26}
27
28struct CachedSurface;
29
30using SurfaceCache = boost::icl::interval_map<PAddr, std::set<std::shared_ptr<CachedSurface>>>;
31
32struct CachedSurface {
33 enum class PixelFormat {
34 // First 5 formats are shared between textures and color buffers
35 RGBA8 = 0,
36 RGB8 = 1,
37 RGB5A1 = 2,
38 RGB565 = 3,
39 RGBA4 = 4,
40
41 // Texture-only formats
42 IA8 = 5,
43 RG8 = 6,
44 I8 = 7,
45 A8 = 8,
46 IA4 = 9,
47 I4 = 10,
48 A4 = 11,
49 ETC1 = 12,
50 ETC1A4 = 13,
51
52 // Depth buffer-only formats
53 D16 = 14,
54 // gap
55 D24 = 16,
56 D24S8 = 17,
57
58 Invalid = 255,
59 };
60
61 enum class SurfaceType {
62 Color = 0,
63 Texture = 1,
64 Depth = 2,
65 DepthStencil = 3,
66 Invalid = 4,
67 };
68
69 static unsigned int GetFormatBpp(CachedSurface::PixelFormat format) {
70 static const std::array<unsigned int, 18> bpp_table = {
71 32, // RGBA8
72 24, // RGB8
73 16, // RGB5A1
74 16, // RGB565
75 16, // RGBA4
76 16, // IA8
77 16, // RG8
78 8, // I8
79 8, // A8
80 8, // IA4
81 4, // I4
82 4, // A4
83 4, // ETC1
84 8, // ETC1A4
85 16, // D16
86 0,
87 24, // D24
88 32, // D24S8
89 };
90
91 ASSERT((unsigned int)format < ARRAY_SIZE(bpp_table));
92 return bpp_table[(unsigned int)format];
93 }
94
95 static PixelFormat PixelFormatFromTextureFormat(Pica::Regs::TextureFormat format) {
96 return ((unsigned int)format < 14) ? (PixelFormat)format : PixelFormat::Invalid;
97 }
98
99 static PixelFormat PixelFormatFromColorFormat(Pica::Regs::ColorFormat format) {
100 return ((unsigned int)format < 5) ? (PixelFormat)format : PixelFormat::Invalid;
101 }
102
103 static PixelFormat PixelFormatFromDepthFormat(Pica::Regs::DepthFormat format) {
104 return ((unsigned int)format < 4) ? (PixelFormat)((unsigned int)format + 14) : PixelFormat::Invalid;
105 }
106
107 static PixelFormat PixelFormatFromGPUPixelFormat(GPU::Regs::PixelFormat format) {
108 switch (format) {
109 // RGB565 and RGB5A1 are switched in PixelFormat compared to ColorFormat
110 case GPU::Regs::PixelFormat::RGB565:
111 return PixelFormat::RGB565;
112 case GPU::Regs::PixelFormat::RGB5A1:
113 return PixelFormat::RGB5A1;
114 default:
115 return ((unsigned int)format < 5) ? (PixelFormat)format : PixelFormat::Invalid;
116 }
117 }
118
119 static bool CheckFormatsBlittable(PixelFormat pixel_format_a, PixelFormat pixel_format_b) {
120 SurfaceType a_type = GetFormatType(pixel_format_a);
121 SurfaceType b_type = GetFormatType(pixel_format_b);
122
123 if ((a_type == SurfaceType::Color || a_type == SurfaceType::Texture) && (b_type == SurfaceType::Color || b_type == SurfaceType::Texture)) {
124 return true;
125 }
126
127 if (a_type == SurfaceType::Depth && b_type == SurfaceType::Depth) {
128 return true;
129 }
130
131 if (a_type == SurfaceType::DepthStencil && b_type == SurfaceType::DepthStencil) {
132 return true;
133 }
134
135 return false;
136 }
137
138 static SurfaceType GetFormatType(PixelFormat pixel_format) {
139 if ((unsigned int)pixel_format < 5) {
140 return SurfaceType::Color;
141 }
142
143 if ((unsigned int)pixel_format < 14) {
144 return SurfaceType::Texture;
145 }
146
147 if (pixel_format == PixelFormat::D16 || pixel_format == PixelFormat::D24) {
148 return SurfaceType::Depth;
149 }
150
151 if (pixel_format == PixelFormat::D24S8) {
152 return SurfaceType::DepthStencil;
153 }
154
155 return SurfaceType::Invalid;
156 }
157
158 u32 GetScaledWidth() const {
159 return (u32)(width * res_scale_width);
160 }
161
162 u32 GetScaledHeight() const {
163 return (u32)(height * res_scale_height);
164 }
165
166 PAddr addr;
167 u32 size;
168
169 PAddr min_valid;
170 PAddr max_valid;
171
172 OGLTexture texture;
173 u32 width;
174 u32 height;
175 u32 stride = 0;
176 float res_scale_width = 1.f;
177 float res_scale_height = 1.f;
178
179 bool is_tiled;
180 PixelFormat pixel_format;
181 bool dirty;
182};
14 183
15class RasterizerCacheOpenGL : NonCopyable { 184class RasterizerCacheOpenGL : NonCopyable {
16public: 185public:
186 RasterizerCacheOpenGL();
17 ~RasterizerCacheOpenGL(); 187 ~RasterizerCacheOpenGL();
18 188
189 /// Blits one texture to another
190 bool BlitTextures(GLuint src_tex, GLuint dst_tex, CachedSurface::SurfaceType type, const MathUtil::Rectangle<int>& src_rect, const MathUtil::Rectangle<int>& dst_rect);
191
192 /// Attempt to blit one surface's texture to another
193 bool TryBlitSurfaces(CachedSurface* src_surface, const MathUtil::Rectangle<int>& src_rect, CachedSurface* dst_surface, const MathUtil::Rectangle<int>& dst_rect);
194
19 /// Loads a texture from 3DS memory to OpenGL and caches it (if not already cached) 195 /// Loads a texture from 3DS memory to OpenGL and caches it (if not already cached)
20 void LoadAndBindTexture(OpenGLState &state, unsigned texture_unit, const Pica::DebugUtils::TextureInfo& info); 196 CachedSurface* GetSurface(const CachedSurface& params, bool match_res_scale, bool load_if_create);
21 197
22 void LoadAndBindTexture(OpenGLState &state, unsigned texture_unit, const Pica::Regs::FullTextureConfig& config) { 198 /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from 3DS memory to OpenGL and caches it (if not already cached)
23 LoadAndBindTexture(state, texture_unit, Pica::DebugUtils::TextureInfo::FromPicaRegister(config.config, config.format)); 199 CachedSurface* GetSurfaceRect(const CachedSurface& params, bool match_res_scale, bool load_if_create, MathUtil::Rectangle<int>& out_rect);
24 }
25 200
26 /// Invalidate any cached resource intersecting the specified region. 201 /// Gets a surface based on the texture configuration
27 void InvalidateInRange(PAddr addr, u32 size, bool ignore_hash = false); 202 CachedSurface* GetTextureSurface(const Pica::Regs::FullTextureConfig& config);
28 203
29 /// Invalidate all cached OpenGL resources tracked by this cache manager 204 /// Gets the color and depth surfaces and rect (resolution scaled) based on the framebuffer configuration
30 void InvalidateAll(); 205 std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> GetFramebufferSurfaces(const Pica::Regs::FramebufferConfig& config);
31 206
32private: 207 /// Attempt to get a surface that exactly matches the fill region and format
33 struct CachedTexture { 208 CachedSurface* TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config);
34 OGLTexture texture; 209
35 GLuint width; 210 /// Write the surface back to memory
36 GLuint height; 211 void FlushSurface(CachedSurface* surface);
37 u32 size;
38 u64 hash;
39 PAddr addr;
40 };
41 212
42 std::map<PAddr, std::unique_ptr<CachedTexture>> texture_cache; 213 /// Write any cached resources overlapping the region back to memory (if dirty) and optionally invalidate them in the cache
214 void FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, bool invalidate);
215
216 /// Flush all cached resources tracked by this cache manager
217 void FlushAll();
218
219private:
220 SurfaceCache surface_cache;
221 OGLFramebuffer transfer_framebuffers[2];
43}; 222};
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index ee4b54ab9..71d60e69c 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -2,9 +2,17 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <array>
6#include <cstddef>
7
8#include "common/assert.h"
9#include "common/bit_field.h"
10#include "common/logging/log.h"
11
5#include "video_core/pica.h" 12#include "video_core/pica.h"
6#include "video_core/renderer_opengl/gl_rasterizer.h" 13#include "video_core/renderer_opengl/gl_rasterizer.h"
7#include "video_core/renderer_opengl/gl_shader_gen.h" 14#include "video_core/renderer_opengl/gl_shader_gen.h"
15#include "video_core/renderer_opengl/gl_shader_util.h"
8 16
9using Pica::Regs; 17using Pica::Regs;
10using TevStageConfig = Regs::TevStageConfig; 18using TevStageConfig = Regs::TevStageConfig;
@@ -24,8 +32,9 @@ static bool IsPassThroughTevStage(const TevStageConfig& stage) {
24} 32}
25 33
26/// Writes the specified TEV stage source component(s) 34/// Writes the specified TEV stage source component(s)
27static void AppendSource(std::string& out, TevStageConfig::Source source, 35static void AppendSource(std::string& out, const PicaShaderConfig& config, TevStageConfig::Source source,
28 const std::string& index_name) { 36 const std::string& index_name) {
37 const auto& state = config.state;
29 using Source = TevStageConfig::Source; 38 using Source = TevStageConfig::Source;
30 switch (source) { 39 switch (source) {
31 case Source::PrimaryColor: 40 case Source::PrimaryColor:
@@ -38,7 +47,20 @@ static void AppendSource(std::string& out, TevStageConfig::Source source,
38 out += "secondary_fragment_color"; 47 out += "secondary_fragment_color";
39 break; 48 break;
40 case Source::Texture0: 49 case Source::Texture0:
41 out += "texture(tex[0], texcoord[0])"; 50 // Only unit 0 respects the texturing type (according to 3DBrew)
51 switch(state.texture0_type) {
52 case Pica::Regs::TextureConfig::Texture2D:
53 out += "texture(tex[0], texcoord[0])";
54 break;
55 case Pica::Regs::TextureConfig::Projection2D:
56 out += "textureProj(tex[0], vec3(texcoord[0], texcoord0_w))";
57 break;
58 default:
59 out += "texture(tex[0], texcoord[0])";
60 LOG_CRITICAL(HW_GPU, "Unhandled texture type %x", static_cast<int>(state.texture0_type));
61 UNIMPLEMENTED();
62 break;
63 }
42 break; 64 break;
43 case Source::Texture1: 65 case Source::Texture1:
44 out += "texture(tex[1], texcoord[1])"; 66 out += "texture(tex[1], texcoord[1])";
@@ -63,53 +85,53 @@ static void AppendSource(std::string& out, TevStageConfig::Source source,
63} 85}
64 86
65/// Writes the color components to use for the specified TEV stage color modifier 87/// Writes the color components to use for the specified TEV stage color modifier
66static void AppendColorModifier(std::string& out, TevStageConfig::ColorModifier modifier, 88static void AppendColorModifier(std::string& out, const PicaShaderConfig& config, TevStageConfig::ColorModifier modifier,
67 TevStageConfig::Source source, const std::string& index_name) { 89 TevStageConfig::Source source, const std::string& index_name) {
68 using ColorModifier = TevStageConfig::ColorModifier; 90 using ColorModifier = TevStageConfig::ColorModifier;
69 switch (modifier) { 91 switch (modifier) {
70 case ColorModifier::SourceColor: 92 case ColorModifier::SourceColor:
71 AppendSource(out, source, index_name); 93 AppendSource(out, config, source, index_name);
72 out += ".rgb"; 94 out += ".rgb";
73 break; 95 break;
74 case ColorModifier::OneMinusSourceColor: 96 case ColorModifier::OneMinusSourceColor:
75 out += "vec3(1.0) - "; 97 out += "vec3(1.0) - ";
76 AppendSource(out, source, index_name); 98 AppendSource(out, config, source, index_name);
77 out += ".rgb"; 99 out += ".rgb";
78 break; 100 break;
79 case ColorModifier::SourceAlpha: 101 case ColorModifier::SourceAlpha:
80 AppendSource(out, source, index_name); 102 AppendSource(out, config, source, index_name);
81 out += ".aaa"; 103 out += ".aaa";
82 break; 104 break;
83 case ColorModifier::OneMinusSourceAlpha: 105 case ColorModifier::OneMinusSourceAlpha:
84 out += "vec3(1.0) - "; 106 out += "vec3(1.0) - ";
85 AppendSource(out, source, index_name); 107 AppendSource(out, config, source, index_name);
86 out += ".aaa"; 108 out += ".aaa";
87 break; 109 break;
88 case ColorModifier::SourceRed: 110 case ColorModifier::SourceRed:
89 AppendSource(out, source, index_name); 111 AppendSource(out, config, source, index_name);
90 out += ".rrr"; 112 out += ".rrr";
91 break; 113 break;
92 case ColorModifier::OneMinusSourceRed: 114 case ColorModifier::OneMinusSourceRed:
93 out += "vec3(1.0) - "; 115 out += "vec3(1.0) - ";
94 AppendSource(out, source, index_name); 116 AppendSource(out, config, source, index_name);
95 out += ".rrr"; 117 out += ".rrr";
96 break; 118 break;
97 case ColorModifier::SourceGreen: 119 case ColorModifier::SourceGreen:
98 AppendSource(out, source, index_name); 120 AppendSource(out, config, source, index_name);
99 out += ".ggg"; 121 out += ".ggg";
100 break; 122 break;
101 case ColorModifier::OneMinusSourceGreen: 123 case ColorModifier::OneMinusSourceGreen:
102 out += "vec3(1.0) - "; 124 out += "vec3(1.0) - ";
103 AppendSource(out, source, index_name); 125 AppendSource(out, config, source, index_name);
104 out += ".ggg"; 126 out += ".ggg";
105 break; 127 break;
106 case ColorModifier::SourceBlue: 128 case ColorModifier::SourceBlue:
107 AppendSource(out, source, index_name); 129 AppendSource(out, config, source, index_name);
108 out += ".bbb"; 130 out += ".bbb";
109 break; 131 break;
110 case ColorModifier::OneMinusSourceBlue: 132 case ColorModifier::OneMinusSourceBlue:
111 out += "vec3(1.0) - "; 133 out += "vec3(1.0) - ";
112 AppendSource(out, source, index_name); 134 AppendSource(out, config, source, index_name);
113 out += ".bbb"; 135 out += ".bbb";
114 break; 136 break;
115 default: 137 default:
@@ -120,44 +142,44 @@ static void AppendColorModifier(std::string& out, TevStageConfig::ColorModifier
120} 142}
121 143
122/// Writes the alpha component to use for the specified TEV stage alpha modifier 144/// Writes the alpha component to use for the specified TEV stage alpha modifier
123static void AppendAlphaModifier(std::string& out, TevStageConfig::AlphaModifier modifier, 145static void AppendAlphaModifier(std::string& out, const PicaShaderConfig& config, TevStageConfig::AlphaModifier modifier,
124 TevStageConfig::Source source, const std::string& index_name) { 146 TevStageConfig::Source source, const std::string& index_name) {
125 using AlphaModifier = TevStageConfig::AlphaModifier; 147 using AlphaModifier = TevStageConfig::AlphaModifier;
126 switch (modifier) { 148 switch (modifier) {
127 case AlphaModifier::SourceAlpha: 149 case AlphaModifier::SourceAlpha:
128 AppendSource(out, source, index_name); 150 AppendSource(out, config, source, index_name);
129 out += ".a"; 151 out += ".a";
130 break; 152 break;
131 case AlphaModifier::OneMinusSourceAlpha: 153 case AlphaModifier::OneMinusSourceAlpha:
132 out += "1.0 - "; 154 out += "1.0 - ";
133 AppendSource(out, source, index_name); 155 AppendSource(out, config, source, index_name);
134 out += ".a"; 156 out += ".a";
135 break; 157 break;
136 case AlphaModifier::SourceRed: 158 case AlphaModifier::SourceRed:
137 AppendSource(out, source, index_name); 159 AppendSource(out, config, source, index_name);
138 out += ".r"; 160 out += ".r";
139 break; 161 break;
140 case AlphaModifier::OneMinusSourceRed: 162 case AlphaModifier::OneMinusSourceRed:
141 out += "1.0 - "; 163 out += "1.0 - ";
142 AppendSource(out, source, index_name); 164 AppendSource(out, config, source, index_name);
143 out += ".r"; 165 out += ".r";
144 break; 166 break;
145 case AlphaModifier::SourceGreen: 167 case AlphaModifier::SourceGreen:
146 AppendSource(out, source, index_name); 168 AppendSource(out, config, source, index_name);
147 out += ".g"; 169 out += ".g";
148 break; 170 break;
149 case AlphaModifier::OneMinusSourceGreen: 171 case AlphaModifier::OneMinusSourceGreen:
150 out += "1.0 - "; 172 out += "1.0 - ";
151 AppendSource(out, source, index_name); 173 AppendSource(out, config, source, index_name);
152 out += ".g"; 174 out += ".g";
153 break; 175 break;
154 case AlphaModifier::SourceBlue: 176 case AlphaModifier::SourceBlue:
155 AppendSource(out, source, index_name); 177 AppendSource(out, config, source, index_name);
156 out += ".b"; 178 out += ".b";
157 break; 179 break;
158 case AlphaModifier::OneMinusSourceBlue: 180 case AlphaModifier::OneMinusSourceBlue:
159 out += "1.0 - "; 181 out += "1.0 - ";
160 AppendSource(out, source, index_name); 182 AppendSource(out, config, source, index_name);
161 out += ".b"; 183 out += ".b";
162 break; 184 break;
163 default: 185 default:
@@ -198,6 +220,9 @@ static void AppendColorCombiner(std::string& out, TevStageConfig::Operation oper
198 case Operation::AddThenMultiply: 220 case Operation::AddThenMultiply:
199 out += "min(" + variable_name + "[0] + " + variable_name + "[1], vec3(1.0)) * " + variable_name + "[2]"; 221 out += "min(" + variable_name + "[0] + " + variable_name + "[1], vec3(1.0)) * " + variable_name + "[2]";
200 break; 222 break;
223 case Operation::Dot3_RGB:
224 out += "vec3(dot(" + variable_name + "[0] - vec3(0.5), " + variable_name + "[1] - vec3(0.5)) * 4.0)";
225 break;
201 default: 226 default:
202 out += "vec3(0.0)"; 227 out += "vec3(0.0)";
203 LOG_CRITICAL(Render_OpenGL, "Unknown color combiner operation: %u", operation); 228 LOG_CRITICAL(Render_OpenGL, "Unknown color combiner operation: %u", operation);
@@ -276,16 +301,16 @@ static void AppendAlphaTestCondition(std::string& out, Regs::CompareFunc func) {
276 301
277/// Writes the code to emulate the specified TEV stage 302/// Writes the code to emulate the specified TEV stage
278static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsigned index) { 303static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsigned index) {
279 auto& stage = config.tev_stages[index]; 304 const auto stage = static_cast<const Pica::Regs::TevStageConfig>(config.state.tev_stages[index]);
280 if (!IsPassThroughTevStage(stage)) { 305 if (!IsPassThroughTevStage(stage)) {
281 std::string index_name = std::to_string(index); 306 std::string index_name = std::to_string(index);
282 307
283 out += "vec3 color_results_" + index_name + "[3] = vec3[3]("; 308 out += "vec3 color_results_" + index_name + "[3] = vec3[3](";
284 AppendColorModifier(out, stage.color_modifier1, stage.color_source1, index_name); 309 AppendColorModifier(out, config, stage.color_modifier1, stage.color_source1, index_name);
285 out += ", "; 310 out += ", ";
286 AppendColorModifier(out, stage.color_modifier2, stage.color_source2, index_name); 311 AppendColorModifier(out, config, stage.color_modifier2, stage.color_source2, index_name);
287 out += ", "; 312 out += ", ";
288 AppendColorModifier(out, stage.color_modifier3, stage.color_source3, index_name); 313 AppendColorModifier(out, config, stage.color_modifier3, stage.color_source3, index_name);
289 out += ");\n"; 314 out += ");\n";
290 315
291 out += "vec3 color_output_" + index_name + " = "; 316 out += "vec3 color_output_" + index_name + " = ";
@@ -293,11 +318,11 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi
293 out += ";\n"; 318 out += ";\n";
294 319
295 out += "float alpha_results_" + index_name + "[3] = float[3]("; 320 out += "float alpha_results_" + index_name + "[3] = float[3](";
296 AppendAlphaModifier(out, stage.alpha_modifier1, stage.alpha_source1, index_name); 321 AppendAlphaModifier(out, config, stage.alpha_modifier1, stage.alpha_source1, index_name);
297 out += ", "; 322 out += ", ";
298 AppendAlphaModifier(out, stage.alpha_modifier2, stage.alpha_source2, index_name); 323 AppendAlphaModifier(out, config, stage.alpha_modifier2, stage.alpha_source2, index_name);
299 out += ", "; 324 out += ", ";
300 AppendAlphaModifier(out, stage.alpha_modifier3, stage.alpha_source3, index_name); 325 AppendAlphaModifier(out, config, stage.alpha_modifier3, stage.alpha_source3, index_name);
301 out += ");\n"; 326 out += ");\n";
302 327
303 out += "float alpha_output_" + index_name + " = "; 328 out += "float alpha_output_" + index_name + " = ";
@@ -320,6 +345,8 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi
320 345
321/// Writes the code to emulate fragment lighting 346/// Writes the code to emulate fragment lighting
322static void WriteLighting(std::string& out, const PicaShaderConfig& config) { 347static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
348 const auto& lighting = config.state.lighting;
349
323 // Define lighting globals 350 // Define lighting globals
324 out += "vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" 351 out += "vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);\n"
325 "vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" 352 "vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);\n"
@@ -327,17 +354,17 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
327 "vec3 refl_value = vec3(0.0);\n"; 354 "vec3 refl_value = vec3(0.0);\n";
328 355
329 // Compute fragment normals 356 // Compute fragment normals
330 if (config.lighting.bump_mode == Pica::Regs::LightingBumpMode::NormalMap) { 357 if (lighting.bump_mode == Pica::Regs::LightingBumpMode::NormalMap) {
331 // Bump mapping is enabled using a normal map, read perturbation vector from the selected texture 358 // Bump mapping is enabled using a normal map, read perturbation vector from the selected texture
332 std::string bump_selector = std::to_string(config.lighting.bump_selector); 359 std::string bump_selector = std::to_string(lighting.bump_selector);
333 out += "vec3 surface_normal = 2.0 * texture(tex[" + bump_selector + "], texcoord[" + bump_selector + "]).rgb - 1.0;\n"; 360 out += "vec3 surface_normal = 2.0 * texture(tex[" + bump_selector + "], texcoord[" + bump_selector + "]).rgb - 1.0;\n";
334 361
335 // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher precision result 362 // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher precision result
336 if (config.lighting.bump_renorm) { 363 if (lighting.bump_renorm) {
337 std::string val = "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))"; 364 std::string val = "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))";
338 out += "surface_normal.z = sqrt(max(" + val + ", 0.0));\n"; 365 out += "surface_normal.z = sqrt(max(" + val + ", 0.0));\n";
339 } 366 }
340 } else if (config.lighting.bump_mode == Pica::Regs::LightingBumpMode::TangentMap) { 367 } else if (lighting.bump_mode == Pica::Regs::LightingBumpMode::TangentMap) {
341 // Bump mapping is enabled using a tangent map 368 // Bump mapping is enabled using a tangent map
342 LOG_CRITICAL(HW_GPU, "unimplemented bump mapping mode (tangent mapping)"); 369 LOG_CRITICAL(HW_GPU, "unimplemented bump mapping mode (tangent mapping)");
343 UNIMPLEMENTED(); 370 UNIMPLEMENTED();
@@ -350,7 +377,7 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
350 out += "vec3 normal = normalize(quaternion_rotate(normquat, surface_normal));\n"; 377 out += "vec3 normal = normalize(quaternion_rotate(normquat, surface_normal));\n";
351 378
352 // Gets the index into the specified lookup table for specular lighting 379 // Gets the index into the specified lookup table for specular lighting
353 auto GetLutIndex = [config](unsigned light_num, Regs::LightingLutInput input, bool abs) { 380 auto GetLutIndex = [&lighting](unsigned light_num, Regs::LightingLutInput input, bool abs) {
354 const std::string half_angle = "normalize(normalize(view) + light_vector)"; 381 const std::string half_angle = "normalize(normalize(view) + light_vector)";
355 std::string index; 382 std::string index;
356 switch (input) { 383 switch (input) {
@@ -378,7 +405,7 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
378 405
379 if (abs) { 406 if (abs) {
380 // LUT index is in the range of (0.0, 1.0) 407 // LUT index is in the range of (0.0, 1.0)
381 index = config.lighting.light[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)"; 408 index = lighting.light[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)";
382 return "(FLOAT_255 * clamp(" + index + ", 0.0, 1.0))"; 409 return "(FLOAT_255 * clamp(" + index + ", 0.0, 1.0))";
383 } else { 410 } else {
384 // LUT index is in the range of (-1.0, 1.0) 411 // LUT index is in the range of (-1.0, 1.0)
@@ -396,8 +423,8 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
396 }; 423 };
397 424
398 // Write the code to emulate each enabled light 425 // Write the code to emulate each enabled light
399 for (unsigned light_index = 0; light_index < config.lighting.src_num; ++light_index) { 426 for (unsigned light_index = 0; light_index < lighting.src_num; ++light_index) {
400 const auto& light_config = config.lighting.light[light_index]; 427 const auto& light_config = lighting.light[light_index];
401 std::string light_src = "light_src[" + std::to_string(light_config.num) + "]"; 428 std::string light_src = "light_src[" + std::to_string(light_config.num) + "]";
402 429
403 // Compute light vector (directional or positional) 430 // Compute light vector (directional or positional)
@@ -421,39 +448,39 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
421 } 448 }
422 449
423 // If enabled, clamp specular component if lighting result is negative 450 // If enabled, clamp specular component if lighting result is negative
424 std::string clamp_highlights = config.lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0"; 451 std::string clamp_highlights = lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0";
425 452
426 // Specular 0 component 453 // Specular 0 component
427 std::string d0_lut_value = "1.0"; 454 std::string d0_lut_value = "1.0";
428 if (config.lighting.lut_d0.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Distribution0)) { 455 if (lighting.lut_d0.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::Distribution0)) {
429 // Lookup specular "distribution 0" LUT value 456 // Lookup specular "distribution 0" LUT value
430 std::string index = GetLutIndex(light_config.num, config.lighting.lut_d0.type, config.lighting.lut_d0.abs_input); 457 std::string index = GetLutIndex(light_config.num, lighting.lut_d0.type, lighting.lut_d0.abs_input);
431 d0_lut_value = "(" + std::to_string(config.lighting.lut_d0.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution0, index) + ")"; 458 d0_lut_value = "(" + std::to_string(lighting.lut_d0.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution0, index) + ")";
432 } 459 }
433 std::string specular_0 = "(" + d0_lut_value + " * " + light_src + ".specular_0)"; 460 std::string specular_0 = "(" + d0_lut_value + " * " + light_src + ".specular_0)";
434 461
435 // If enabled, lookup ReflectRed value, otherwise, 1.0 is used 462 // If enabled, lookup ReflectRed value, otherwise, 1.0 is used
436 if (config.lighting.lut_rr.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectRed)) { 463 if (lighting.lut_rr.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::ReflectRed)) {
437 std::string index = GetLutIndex(light_config.num, config.lighting.lut_rr.type, config.lighting.lut_rr.abs_input); 464 std::string index = GetLutIndex(light_config.num, lighting.lut_rr.type, lighting.lut_rr.abs_input);
438 std::string value = "(" + std::to_string(config.lighting.lut_rr.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectRed, index) + ")"; 465 std::string value = "(" + std::to_string(lighting.lut_rr.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectRed, index) + ")";
439 out += "refl_value.r = " + value + ";\n"; 466 out += "refl_value.r = " + value + ";\n";
440 } else { 467 } else {
441 out += "refl_value.r = 1.0;\n"; 468 out += "refl_value.r = 1.0;\n";
442 } 469 }
443 470
444 // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used 471 // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used
445 if (config.lighting.lut_rg.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectGreen)) { 472 if (lighting.lut_rg.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::ReflectGreen)) {
446 std::string index = GetLutIndex(light_config.num, config.lighting.lut_rg.type, config.lighting.lut_rg.abs_input); 473 std::string index = GetLutIndex(light_config.num, lighting.lut_rg.type, lighting.lut_rg.abs_input);
447 std::string value = "(" + std::to_string(config.lighting.lut_rg.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectGreen, index) + ")"; 474 std::string value = "(" + std::to_string(lighting.lut_rg.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectGreen, index) + ")";
448 out += "refl_value.g = " + value + ";\n"; 475 out += "refl_value.g = " + value + ";\n";
449 } else { 476 } else {
450 out += "refl_value.g = refl_value.r;\n"; 477 out += "refl_value.g = refl_value.r;\n";
451 } 478 }
452 479
453 // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used 480 // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used
454 if (config.lighting.lut_rb.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectBlue)) { 481 if (lighting.lut_rb.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::ReflectBlue)) {
455 std::string index = GetLutIndex(light_config.num, config.lighting.lut_rb.type, config.lighting.lut_rb.abs_input); 482 std::string index = GetLutIndex(light_config.num, lighting.lut_rb.type, lighting.lut_rb.abs_input);
456 std::string value = "(" + std::to_string(config.lighting.lut_rb.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectBlue, index) + ")"; 483 std::string value = "(" + std::to_string(lighting.lut_rb.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectBlue, index) + ")";
457 out += "refl_value.b = " + value + ";\n"; 484 out += "refl_value.b = " + value + ";\n";
458 } else { 485 } else {
459 out += "refl_value.b = refl_value.r;\n"; 486 out += "refl_value.b = refl_value.r;\n";
@@ -461,27 +488,27 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
461 488
462 // Specular 1 component 489 // Specular 1 component
463 std::string d1_lut_value = "1.0"; 490 std::string d1_lut_value = "1.0";
464 if (config.lighting.lut_d1.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Distribution1)) { 491 if (lighting.lut_d1.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::Distribution1)) {
465 // Lookup specular "distribution 1" LUT value 492 // Lookup specular "distribution 1" LUT value
466 std::string index = GetLutIndex(light_config.num, config.lighting.lut_d1.type, config.lighting.lut_d1.abs_input); 493 std::string index = GetLutIndex(light_config.num, lighting.lut_d1.type, lighting.lut_d1.abs_input);
467 d1_lut_value = "(" + std::to_string(config.lighting.lut_d1.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution1, index) + ")"; 494 d1_lut_value = "(" + std::to_string(lighting.lut_d1.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution1, index) + ")";
468 } 495 }
469 std::string specular_1 = "(" + d1_lut_value + " * refl_value * " + light_src + ".specular_1)"; 496 std::string specular_1 = "(" + d1_lut_value + " * refl_value * " + light_src + ".specular_1)";
470 497
471 // Fresnel 498 // Fresnel
472 if (config.lighting.lut_fr.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Fresnel)) { 499 if (lighting.lut_fr.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::Fresnel)) {
473 // Lookup fresnel LUT value 500 // Lookup fresnel LUT value
474 std::string index = GetLutIndex(light_config.num, config.lighting.lut_fr.type, config.lighting.lut_fr.abs_input); 501 std::string index = GetLutIndex(light_config.num, lighting.lut_fr.type, lighting.lut_fr.abs_input);
475 std::string value = "(" + std::to_string(config.lighting.lut_fr.scale) + " * " + GetLutValue(Regs::LightingSampler::Fresnel, index) + ")"; 502 std::string value = "(" + std::to_string(lighting.lut_fr.scale) + " * " + GetLutValue(Regs::LightingSampler::Fresnel, index) + ")";
476 503
477 // Enabled for difffuse lighting alpha component 504 // Enabled for difffuse lighting alpha component
478 if (config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::PrimaryAlpha || 505 if (lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::PrimaryAlpha ||
479 config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both) 506 lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both)
480 out += "diffuse_sum.a *= " + value + ";\n"; 507 out += "diffuse_sum.a *= " + value + ";\n";
481 508
482 // Enabled for the specular lighting alpha component 509 // Enabled for the specular lighting alpha component
483 if (config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::SecondaryAlpha || 510 if (lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::SecondaryAlpha ||
484 config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both) 511 lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both)
485 out += "specular_sum.a *= " + value + ";\n"; 512 out += "specular_sum.a *= " + value + ";\n";
486 } 513 }
487 514
@@ -499,6 +526,8 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
499} 526}
500 527
501std::string GenerateFragmentShader(const PicaShaderConfig& config) { 528std::string GenerateFragmentShader(const PicaShaderConfig& config) {
529 const auto& state = config.state;
530
502 std::string out = R"( 531 std::string out = R"(
503#version 330 core 532#version 330 core
504#define NUM_TEV_STAGES 6 533#define NUM_TEV_STAGES 6
@@ -508,6 +537,7 @@ std::string GenerateFragmentShader(const PicaShaderConfig& config) {
508 537
509in vec4 primary_color; 538in vec4 primary_color;
510in vec2 texcoord[3]; 539in vec2 texcoord[3];
540in float texcoord0_w;
511in vec4 normquat; 541in vec4 normquat;
512in vec3 view; 542in vec3 view;
513 543
@@ -525,6 +555,7 @@ layout (std140) uniform shader_data {
525 vec4 const_color[NUM_TEV_STAGES]; 555 vec4 const_color[NUM_TEV_STAGES];
526 vec4 tev_combiner_buffer_color; 556 vec4 tev_combiner_buffer_color;
527 int alphatest_ref; 557 int alphatest_ref;
558 float depth_scale;
528 float depth_offset; 559 float depth_offset;
529 vec3 lighting_global_ambient; 560 vec3 lighting_global_ambient;
530 LightSrc light_src[NUM_LIGHTS]; 561 LightSrc light_src[NUM_LIGHTS];
@@ -544,29 +575,37 @@ vec4 secondary_fragment_color = vec4(0.0);
544)"; 575)";
545 576
546 // Do not do any sort of processing if it's obvious we're not going to pass the alpha test 577 // Do not do any sort of processing if it's obvious we're not going to pass the alpha test
547 if (config.alpha_test_func == Regs::CompareFunc::Never) { 578 if (state.alpha_test_func == Regs::CompareFunc::Never) {
548 out += "discard; }"; 579 out += "discard; }";
549 return out; 580 return out;
550 } 581 }
551 582
552 if (config.lighting.enable) 583 if (state.lighting.enable)
553 WriteLighting(out, config); 584 WriteLighting(out, config);
554 585
555 out += "vec4 combiner_buffer = vec4(0.0);\n"; 586 out += "vec4 combiner_buffer = vec4(0.0);\n";
556 out += "vec4 next_combiner_buffer = tev_combiner_buffer_color;\n"; 587 out += "vec4 next_combiner_buffer = tev_combiner_buffer_color;\n";
557 out += "vec4 last_tex_env_out = vec4(0.0);\n"; 588 out += "vec4 last_tex_env_out = vec4(0.0);\n";
558 589
559 for (size_t index = 0; index < config.tev_stages.size(); ++index) 590 for (size_t index = 0; index < state.tev_stages.size(); ++index)
560 WriteTevStage(out, config, (unsigned)index); 591 WriteTevStage(out, config, (unsigned)index);
561 592
562 if (config.alpha_test_func != Regs::CompareFunc::Always) { 593 if (state.alpha_test_func != Regs::CompareFunc::Always) {
563 out += "if ("; 594 out += "if (";
564 AppendAlphaTestCondition(out, config.alpha_test_func); 595 AppendAlphaTestCondition(out, state.alpha_test_func);
565 out += ") discard;\n"; 596 out += ") discard;\n";
566 } 597 }
567 598
568 out += "color = last_tex_env_out;\n"; 599 out += "color = last_tex_env_out;\n";
569 out += "gl_FragDepth = gl_FragCoord.z + depth_offset;\n}"; 600
601 out += "float z_over_w = 1.0 - gl_FragCoord.z * 2.0;\n";
602 out += "float depth = z_over_w * depth_scale + depth_offset;\n";
603 if (state.depthmap_enable == Pica::Regs::DepthBuffering::WBuffering) {
604 out += "depth /= gl_FragCoord.w;\n";
605 }
606 out += "gl_FragDepth = depth;\n";
607
608 out += "}";
570 609
571 return out; 610 return out;
572} 611}
@@ -574,17 +613,19 @@ vec4 secondary_fragment_color = vec4(0.0);
574std::string GenerateVertexShader() { 613std::string GenerateVertexShader() {
575 std::string out = "#version 330 core\n"; 614 std::string out = "#version 330 core\n";
576 615
577 out += "layout(location = " + std::to_string((int)ATTRIBUTE_POSITION) + ") in vec4 vert_position;\n"; 616 out += "layout(location = " + std::to_string((int)ATTRIBUTE_POSITION) + ") in vec4 vert_position;\n";
578 out += "layout(location = " + std::to_string((int)ATTRIBUTE_COLOR) + ") in vec4 vert_color;\n"; 617 out += "layout(location = " + std::to_string((int)ATTRIBUTE_COLOR) + ") in vec4 vert_color;\n";
579 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0) + ") in vec2 vert_texcoord0;\n"; 618 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0) + ") in vec2 vert_texcoord0;\n";
580 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD1) + ") in vec2 vert_texcoord1;\n"; 619 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD1) + ") in vec2 vert_texcoord1;\n";
581 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD2) + ") in vec2 vert_texcoord2;\n"; 620 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD2) + ") in vec2 vert_texcoord2;\n";
582 out += "layout(location = " + std::to_string((int)ATTRIBUTE_NORMQUAT) + ") in vec4 vert_normquat;\n"; 621 out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0_W) + ") in float vert_texcoord0_w;\n";
583 out += "layout(location = " + std::to_string((int)ATTRIBUTE_VIEW) + ") in vec3 vert_view;\n"; 622 out += "layout(location = " + std::to_string((int)ATTRIBUTE_NORMQUAT) + ") in vec4 vert_normquat;\n";
623 out += "layout(location = " + std::to_string((int)ATTRIBUTE_VIEW) + ") in vec3 vert_view;\n";
584 624
585 out += R"( 625 out += R"(
586out vec4 primary_color; 626out vec4 primary_color;
587out vec2 texcoord[3]; 627out vec2 texcoord[3];
628out float texcoord0_w;
588out vec4 normquat; 629out vec4 normquat;
589out vec3 view; 630out vec3 view;
590 631
@@ -593,6 +634,7 @@ void main() {
593 texcoord[0] = vert_texcoord0; 634 texcoord[0] = vert_texcoord0;
594 texcoord[1] = vert_texcoord1; 635 texcoord[1] = vert_texcoord1;
595 texcoord[2] = vert_texcoord2; 636 texcoord[2] = vert_texcoord2;
637 texcoord0_w = vert_texcoord0_w;
596 normquat = vert_normquat; 638 normquat = vert_normquat;
597 view = vert_view; 639 view = vert_view;
598 gl_Position = vec4(vert_position.x, vert_position.y, -vert_position.z, vert_position.w); 640 gl_Position = vec4(vert_position.x, vert_position.y, -vert_position.z, vert_position.w);
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index 0ca9d2879..bef3249cf 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -6,7 +6,7 @@
6 6
7#include <string> 7#include <string>
8 8
9#include "video_core/renderer_opengl/gl_rasterizer.h" 9union PicaShaderConfig;
10 10
11namespace GLShader { 11namespace GLShader {
12 12
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp
index e3f7a5868..dded3db46 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_util.cpp
@@ -2,9 +2,10 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
6#include <vector> 5#include <vector>
7 6
7#include <glad/glad.h>
8
8#include "common/logging/log.h" 9#include "common/logging/log.h"
9#include "video_core/renderer_opengl/gl_shader_util.h" 10#include "video_core/renderer_opengl/gl_shader_util.h"
10 11
diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h
index 097242f6f..f59912f79 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.h
+++ b/src/video_core/renderer_opengl/gl_shader_util.h
@@ -14,6 +14,7 @@ enum Attributes {
14 ATTRIBUTE_TEXCOORD0, 14 ATTRIBUTE_TEXCOORD0,
15 ATTRIBUTE_TEXCOORD1, 15 ATTRIBUTE_TEXCOORD1,
16 ATTRIBUTE_TEXCOORD2, 16 ATTRIBUTE_TEXCOORD2,
17 ATTRIBUTE_TEXCOORD0_W,
17 ATTRIBUTE_NORMQUAT, 18 ATTRIBUTE_NORMQUAT,
18 ATTRIBUTE_VIEW, 19 ATTRIBUTE_VIEW,
19}; 20};
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 08e4d0b54..02cd9f417 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -2,7 +2,11 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "video_core/pica.h" 5#include <glad/glad.h>
6
7#include "common/common_funcs.h"
8#include "common/logging/log.h"
9
6#include "video_core/renderer_opengl/gl_state.h" 10#include "video_core/renderer_opengl/gl_state.h"
7 11
8OpenGLState OpenGLState::cur_state; 12OpenGLState OpenGLState::cur_state;
@@ -48,17 +52,19 @@ OpenGLState::OpenGLState() {
48 texture_unit.sampler = 0; 52 texture_unit.sampler = 0;
49 } 53 }
50 54
51 for (auto& lut : lighting_lut) { 55 for (auto& lut : lighting_luts) {
52 lut.texture_1d = 0; 56 lut.texture_1d = 0;
53 } 57 }
54 58
55 draw.framebuffer = 0; 59 draw.read_framebuffer = 0;
60 draw.draw_framebuffer = 0;
56 draw.vertex_array = 0; 61 draw.vertex_array = 0;
57 draw.vertex_buffer = 0; 62 draw.vertex_buffer = 0;
63 draw.uniform_buffer = 0;
58 draw.shader_program = 0; 64 draw.shader_program = 0;
59} 65}
60 66
61void OpenGLState::Apply() { 67void OpenGLState::Apply() const {
62 // Culling 68 // Culling
63 if (cull.enabled != cur_state.cull.enabled) { 69 if (cull.enabled != cur_state.cull.enabled) {
64 if (cull.enabled) { 70 if (cull.enabled) {
@@ -175,16 +181,19 @@ void OpenGLState::Apply() {
175 } 181 }
176 182
177 // Lighting LUTs 183 // Lighting LUTs
178 for (unsigned i = 0; i < ARRAY_SIZE(lighting_lut); ++i) { 184 for (unsigned i = 0; i < ARRAY_SIZE(lighting_luts); ++i) {
179 if (lighting_lut[i].texture_1d != cur_state.lighting_lut[i].texture_1d) { 185 if (lighting_luts[i].texture_1d != cur_state.lighting_luts[i].texture_1d) {
180 glActiveTexture(GL_TEXTURE3 + i); 186 glActiveTexture(GL_TEXTURE3 + i);
181 glBindTexture(GL_TEXTURE_1D, lighting_lut[i].texture_1d); 187 glBindTexture(GL_TEXTURE_1D, lighting_luts[i].texture_1d);
182 } 188 }
183 } 189 }
184 190
185 // Framebuffer 191 // Framebuffer
186 if (draw.framebuffer != cur_state.draw.framebuffer) { 192 if (draw.read_framebuffer != cur_state.draw.read_framebuffer) {
187 glBindFramebuffer(GL_FRAMEBUFFER, draw.framebuffer); 193 glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
194 }
195 if (draw.draw_framebuffer != cur_state.draw.draw_framebuffer) {
196 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer);
188 } 197 }
189 198
190 // Vertex array 199 // Vertex array
@@ -210,45 +219,58 @@ void OpenGLState::Apply() {
210 cur_state = *this; 219 cur_state = *this;
211} 220}
212 221
213void OpenGLState::ResetTexture(GLuint id) { 222GLenum OpenGLState::CheckFBStatus(GLenum target) {
223 GLenum fb_status = glCheckFramebufferStatus(target);
224 if (fb_status != GL_FRAMEBUFFER_COMPLETE) {
225 const char* fb_description = (target == GL_READ_FRAMEBUFFER ? "READ" : (target == GL_DRAW_FRAMEBUFFER ? "DRAW" : "UNK"));
226 LOG_CRITICAL(Render_OpenGL, "OpenGL %s framebuffer check failed, status %X", fb_description, fb_status);
227 }
228
229 return fb_status;
230}
231
232void OpenGLState::ResetTexture(GLuint handle) {
214 for (auto& unit : cur_state.texture_units) { 233 for (auto& unit : cur_state.texture_units) {
215 if (unit.texture_2d == id) { 234 if (unit.texture_2d == handle) {
216 unit.texture_2d = 0; 235 unit.texture_2d = 0;
217 } 236 }
218 } 237 }
219} 238}
220 239
221void OpenGLState::ResetSampler(GLuint id) { 240void OpenGLState::ResetSampler(GLuint handle) {
222 for (auto& unit : cur_state.texture_units) { 241 for (auto& unit : cur_state.texture_units) {
223 if (unit.sampler == id) { 242 if (unit.sampler == handle) {
224 unit.sampler = 0; 243 unit.sampler = 0;
225 } 244 }
226 } 245 }
227} 246}
228 247
229void OpenGLState::ResetProgram(GLuint id) { 248void OpenGLState::ResetProgram(GLuint handle) {
230 if (cur_state.draw.shader_program == id) { 249 if (cur_state.draw.shader_program == handle) {
231 cur_state.draw.shader_program = 0; 250 cur_state.draw.shader_program = 0;
232 } 251 }
233} 252}
234 253
235void OpenGLState::ResetBuffer(GLuint id) { 254void OpenGLState::ResetBuffer(GLuint handle) {
236 if (cur_state.draw.vertex_buffer == id) { 255 if (cur_state.draw.vertex_buffer == handle) {
237 cur_state.draw.vertex_buffer = 0; 256 cur_state.draw.vertex_buffer = 0;
238 } 257 }
239 if (cur_state.draw.uniform_buffer == id) { 258 if (cur_state.draw.uniform_buffer == handle) {
240 cur_state.draw.uniform_buffer = 0; 259 cur_state.draw.uniform_buffer = 0;
241 } 260 }
242} 261}
243 262
244void OpenGLState::ResetVertexArray(GLuint id) { 263void OpenGLState::ResetVertexArray(GLuint handle) {
245 if (cur_state.draw.vertex_array == id) { 264 if (cur_state.draw.vertex_array == handle) {
246 cur_state.draw.vertex_array = 0; 265 cur_state.draw.vertex_array = 0;
247 } 266 }
248} 267}
249 268
250void OpenGLState::ResetFramebuffer(GLuint id) { 269void OpenGLState::ResetFramebuffer(GLuint handle) {
251 if (cur_state.draw.framebuffer == id) { 270 if (cur_state.draw.read_framebuffer == handle) {
252 cur_state.draw.framebuffer = 0; 271 cur_state.draw.read_framebuffer = 0;
272 }
273 if (cur_state.draw.draw_framebuffer == handle) {
274 cur_state.draw.draw_framebuffer = 0;
253 } 275 }
254} 276}
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index e848058d7..24f20e47c 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -63,15 +63,15 @@ public:
63 63
64 struct { 64 struct {
65 GLuint texture_1d; // GL_TEXTURE_BINDING_1D 65 GLuint texture_1d; // GL_TEXTURE_BINDING_1D
66 } lighting_lut[6]; 66 } lighting_luts[6];
67 67
68 struct { 68 struct {
69 GLuint framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING 69 GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING
70 GLuint draw_framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING
70 GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING 71 GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING
71 GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING 72 GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING
72 GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING 73 GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING
73 GLuint shader_program; // GL_CURRENT_PROGRAM 74 GLuint shader_program; // GL_CURRENT_PROGRAM
74 bool shader_dirty;
75 } draw; 75 } draw;
76 76
77 OpenGLState(); 77 OpenGLState();
@@ -82,14 +82,18 @@ public:
82 } 82 }
83 83
84 /// Apply this state as the current OpenGL state 84 /// Apply this state as the current OpenGL state
85 void Apply(); 85 void Apply() const;
86 86
87 static void ResetTexture(GLuint id); 87 /// Check the status of the current OpenGL read or draw framebuffer configuration
88 static void ResetSampler(GLuint id); 88 static GLenum CheckFBStatus(GLenum target);
89 static void ResetProgram(GLuint id); 89
90 static void ResetBuffer(GLuint id); 90 /// Resets and unbinds any references to the given resource in the current OpenGL state
91 static void ResetVertexArray(GLuint id); 91 static void ResetTexture(GLuint handle);
92 static void ResetFramebuffer(GLuint id); 92 static void ResetSampler(GLuint handle);
93 static void ResetProgram(GLuint handle);
94 static void ResetBuffer(GLuint handle);
95 static void ResetVertexArray(GLuint handle);
96 static void ResetFramebuffer(GLuint handle);
93 97
94private: 98private:
95 static OpenGLState cur_state; 99 static OpenGLState cur_state;
diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h
index fd3617d77..976d1f364 100644
--- a/src/video_core/renderer_opengl/pica_to_gl.h
+++ b/src/video_core/renderer_opengl/pica_to_gl.h
@@ -4,9 +4,16 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <cstddef>
9
7#include <glad/glad.h> 10#include <glad/glad.h>
8 11
12#include "common/assert.h"
13#include "common/bit_field.h"
14#include "common/common_funcs.h"
9#include "common/common_types.h" 15#include "common/common_types.h"
16#include "common/logging/log.h"
10 17
11#include "video_core/pica.h" 18#include "video_core/pica.h"
12 19
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 11c4d0daf..8f424a435 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -5,23 +5,28 @@
5#include <algorithm> 5#include <algorithm>
6#include <cstddef> 6#include <cstddef>
7#include <cstdlib> 7#include <cstdlib>
8#include <memory>
9
10#include <glad/glad.h>
8 11
9#include "common/assert.h" 12#include "common/assert.h"
13#include "common/bit_field.h"
10#include "common/emu_window.h" 14#include "common/emu_window.h"
11#include "common/logging/log.h" 15#include "common/logging/log.h"
12#include "common/profiler_reporting.h" 16#include "common/profiler_reporting.h"
17#include "common/synchronized_wrapper.h"
13 18
14#include "core/memory.h"
15#include "core/settings.h"
16#include "core/hw/gpu.h" 19#include "core/hw/gpu.h"
17#include "core/hw/hw.h" 20#include "core/hw/hw.h"
18#include "core/hw/lcd.h" 21#include "core/hw/lcd.h"
22#include "core/memory.h"
23#include "core/settings.h"
24#include "core/tracer/recorder.h"
19 25
20#include "video_core/video_core.h"
21#include "video_core/debug_utils/debug_utils.h" 26#include "video_core/debug_utils/debug_utils.h"
22#include "video_core/renderer_opengl/gl_rasterizer.h" 27#include "video_core/rasterizer_interface.h"
23#include "video_core/renderer_opengl/gl_shader_util.h"
24#include "video_core/renderer_opengl/renderer_opengl.h" 28#include "video_core/renderer_opengl/renderer_opengl.h"
29#include "video_core/video_core.h"
25 30
26static const char vertex_shader[] = R"( 31static const char vertex_shader[] = R"(
27#version 150 core 32#version 150 core
@@ -107,7 +112,7 @@ void RendererOpenGL::SwapBuffers() {
107 OpenGLState prev_state = OpenGLState::GetCurState(); 112 OpenGLState prev_state = OpenGLState::GetCurState();
108 state.Apply(); 113 state.Apply();
109 114
110 for(int i : {0, 1}) { 115 for (int i : {0, 1}) {
111 const auto& framebuffer = GPU::g_regs.framebuffer_config[i]; 116 const auto& framebuffer = GPU::g_regs.framebuffer_config[i];
112 117
113 // Main LCD (0): 0x1ED02204, Sub LCD (1): 0x1ED02A04 118 // Main LCD (0): 0x1ED02204, Sub LCD (1): 0x1ED02A04
@@ -117,25 +122,25 @@ void RendererOpenGL::SwapBuffers() {
117 LCD::Read(color_fill.raw, lcd_color_addr); 122 LCD::Read(color_fill.raw, lcd_color_addr);
118 123
119 if (color_fill.is_enabled) { 124 if (color_fill.is_enabled) {
120 LoadColorToActiveGLTexture(color_fill.color_r, color_fill.color_g, color_fill.color_b, textures[i]); 125 LoadColorToActiveGLTexture(color_fill.color_r, color_fill.color_g, color_fill.color_b, screen_infos[i].texture);
121 126
122 // Resize the texture in case the framebuffer size has changed 127 // Resize the texture in case the framebuffer size has changed
123 textures[i].width = 1; 128 screen_infos[i].texture.width = 1;
124 textures[i].height = 1; 129 screen_infos[i].texture.height = 1;
125 } else { 130 } else {
126 if (textures[i].width != (GLsizei)framebuffer.width || 131 if (screen_infos[i].texture.width != (GLsizei)framebuffer.width ||
127 textures[i].height != (GLsizei)framebuffer.height || 132 screen_infos[i].texture.height != (GLsizei)framebuffer.height ||
128 textures[i].format != framebuffer.color_format) { 133 screen_infos[i].texture.format != framebuffer.color_format) {
129 // Reallocate texture if the framebuffer size has changed. 134 // Reallocate texture if the framebuffer size has changed.
130 // This is expected to not happen very often and hence should not be a 135 // This is expected to not happen very often and hence should not be a
131 // performance problem. 136 // performance problem.
132 ConfigureFramebufferTexture(textures[i], framebuffer); 137 ConfigureFramebufferTexture(screen_infos[i].texture, framebuffer);
133 } 138 }
134 LoadFBToActiveGLTexture(framebuffer, textures[i]); 139 LoadFBToScreenInfo(framebuffer, screen_infos[i]);
135 140
136 // Resize the texture in case the framebuffer size has changed 141 // Resize the texture in case the framebuffer size has changed
137 textures[i].width = framebuffer.width; 142 screen_infos[i].texture.width = framebuffer.width;
138 textures[i].height = framebuffer.height; 143 screen_infos[i].texture.height = framebuffer.height;
139 } 144 }
140 } 145 }
141 146
@@ -166,8 +171,8 @@ void RendererOpenGL::SwapBuffers() {
166/** 171/**
167 * Loads framebuffer from emulated memory into the active OpenGL texture. 172 * Loads framebuffer from emulated memory into the active OpenGL texture.
168 */ 173 */
169void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig& framebuffer, 174void RendererOpenGL::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer,
170 const TextureInfo& texture) { 175 ScreenInfo& screen_info) {
171 176
172 const PAddr framebuffer_addr = framebuffer.active_fb == 0 ? 177 const PAddr framebuffer_addr = framebuffer.active_fb == 0 ?
173 framebuffer.address_left1 : framebuffer.address_left2; 178 framebuffer.address_left1 : framebuffer.address_left2;
@@ -177,8 +182,6 @@ void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig&
177 framebuffer_addr, (int)framebuffer.width, 182 framebuffer_addr, (int)framebuffer.width,
178 (int)framebuffer.height, (int)framebuffer.format); 183 (int)framebuffer.height, (int)framebuffer.format);
179 184
180 const u8* framebuffer_data = Memory::GetPhysicalPointer(framebuffer_addr);
181
182 int bpp = GPU::Regs::BytesPerPixel(framebuffer.color_format); 185 int bpp = GPU::Regs::BytesPerPixel(framebuffer.color_format);
183 size_t pixel_stride = framebuffer.stride / bpp; 186 size_t pixel_stride = framebuffer.stride / bpp;
184 187
@@ -189,24 +192,34 @@ void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig&
189 // only allows rows to have a memory alignement of 4. 192 // only allows rows to have a memory alignement of 4.
190 ASSERT(pixel_stride % 4 == 0); 193 ASSERT(pixel_stride % 4 == 0);
191 194
192 state.texture_units[0].texture_2d = texture.handle; 195 if (!Rasterizer()->AccelerateDisplay(framebuffer, framebuffer_addr, static_cast<u32>(pixel_stride), screen_info)) {
193 state.Apply(); 196 // Reset the screen info's display texture to its own permanent texture
197 screen_info.display_texture = screen_info.texture.resource.handle;
198 screen_info.display_texcoords = MathUtil::Rectangle<float>(0.f, 0.f, 1.f, 1.f);
194 199
195 glActiveTexture(GL_TEXTURE0); 200 Memory::RasterizerFlushRegion(framebuffer_addr, framebuffer.stride * framebuffer.height);
196 glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)pixel_stride);
197 201
198 // Update existing texture 202 const u8* framebuffer_data = Memory::GetPhysicalPointer(framebuffer_addr);
199 // TODO: Test what happens on hardware when you change the framebuffer dimensions so that they
200 // differ from the LCD resolution.
201 // TODO: Applications could theoretically crash Citra here by specifying too large
202 // framebuffer sizes. We should make sure that this cannot happen.
203 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height,
204 texture.gl_format, texture.gl_type, framebuffer_data);
205 203
206 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); 204 state.texture_units[0].texture_2d = screen_info.texture.resource.handle;
205 state.Apply();
207 206
208 state.texture_units[0].texture_2d = 0; 207 glActiveTexture(GL_TEXTURE0);
209 state.Apply(); 208 glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)pixel_stride);
209
210 // Update existing texture
211 // TODO: Test what happens on hardware when you change the framebuffer dimensions so that they
212 // differ from the LCD resolution.
213 // TODO: Applications could theoretically crash Citra here by specifying too large
214 // framebuffer sizes. We should make sure that this cannot happen.
215 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height,
216 screen_info.texture.gl_format, screen_info.texture.gl_type, framebuffer_data);
217
218 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
219
220 state.texture_units[0].texture_2d = 0;
221 state.Apply();
222 }
210} 223}
211 224
212/** 225/**
@@ -216,7 +229,7 @@ void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig&
216 */ 229 */
217void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, 230void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b,
218 const TextureInfo& texture) { 231 const TextureInfo& texture) {
219 state.texture_units[0].texture_2d = texture.handle; 232 state.texture_units[0].texture_2d = texture.resource.handle;
220 state.Apply(); 233 state.Apply();
221 234
222 glActiveTexture(GL_TEXTURE0); 235 glActiveTexture(GL_TEXTURE0);
@@ -224,6 +237,9 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color
224 237
225 // Update existing texture 238 // Update existing texture
226 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, framebuffer_data); 239 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, framebuffer_data);
240
241 state.texture_units[0].texture_2d = 0;
242 state.Apply();
227} 243}
228 244
229/** 245/**
@@ -233,20 +249,22 @@ void RendererOpenGL::InitOpenGLObjects() {
233 glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, 0.0f); 249 glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, 0.0f);
234 250
235 // Link shaders and get variable locations 251 // Link shaders and get variable locations
236 program_id = GLShader::LoadProgram(vertex_shader, fragment_shader); 252 shader.Create(vertex_shader, fragment_shader);
237 uniform_modelview_matrix = glGetUniformLocation(program_id, "modelview_matrix"); 253 state.draw.shader_program = shader.handle;
238 uniform_color_texture = glGetUniformLocation(program_id, "color_texture"); 254 state.Apply();
239 attrib_position = glGetAttribLocation(program_id, "vert_position"); 255 uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix");
240 attrib_tex_coord = glGetAttribLocation(program_id, "vert_tex_coord"); 256 uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture");
257 attrib_position = glGetAttribLocation(shader.handle, "vert_position");
258 attrib_tex_coord = glGetAttribLocation(shader.handle, "vert_tex_coord");
241 259
242 // Generate VBO handle for drawing 260 // Generate VBO handle for drawing
243 glGenBuffers(1, &vertex_buffer_handle); 261 vertex_buffer.Create();
244 262
245 // Generate VAO 263 // Generate VAO
246 glGenVertexArrays(1, &vertex_array_handle); 264 vertex_array.Create();
247 265
248 state.draw.vertex_array = vertex_array_handle; 266 state.draw.vertex_array = vertex_array.handle;
249 state.draw.vertex_buffer = vertex_buffer_handle; 267 state.draw.vertex_buffer = vertex_buffer.handle;
250 state.draw.uniform_buffer = 0; 268 state.draw.uniform_buffer = 0;
251 state.Apply(); 269 state.Apply();
252 270
@@ -258,13 +276,13 @@ void RendererOpenGL::InitOpenGLObjects() {
258 glEnableVertexAttribArray(attrib_tex_coord); 276 glEnableVertexAttribArray(attrib_tex_coord);
259 277
260 // Allocate textures for each screen 278 // Allocate textures for each screen
261 for (auto& texture : textures) { 279 for (auto& screen_info : screen_infos) {
262 glGenTextures(1, &texture.handle); 280 screen_info.texture.resource.Create();
263 281
264 // Allocation of storage is deferred until the first frame, when we 282 // Allocation of storage is deferred until the first frame, when we
265 // know the framebuffer size. 283 // know the framebuffer size.
266 284
267 state.texture_units[0].texture_2d = texture.handle; 285 state.texture_units[0].texture_2d = screen_info.texture.resource.handle;
268 state.Apply(); 286 state.Apply();
269 287
270 glActiveTexture(GL_TEXTURE0); 288 glActiveTexture(GL_TEXTURE0);
@@ -273,6 +291,8 @@ void RendererOpenGL::InitOpenGLObjects() {
273 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); 291 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
274 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); 292 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
275 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); 293 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
294
295 screen_info.display_texture = screen_info.texture.resource.handle;
276 } 296 }
277 297
278 state.texture_units[0].texture_2d = 0; 298 state.texture_units[0].texture_2d = 0;
@@ -327,30 +347,38 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
327 UNIMPLEMENTED(); 347 UNIMPLEMENTED();
328 } 348 }
329 349
330 state.texture_units[0].texture_2d = texture.handle; 350 state.texture_units[0].texture_2d = texture.resource.handle;
331 state.Apply(); 351 state.Apply();
332 352
333 glActiveTexture(GL_TEXTURE0); 353 glActiveTexture(GL_TEXTURE0);
334 glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0, 354 glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0,
335 texture.gl_format, texture.gl_type, nullptr); 355 texture.gl_format, texture.gl_type, nullptr);
356
357 state.texture_units[0].texture_2d = 0;
358 state.Apply();
336} 359}
337 360
338/** 361/**
339 * Draws a single texture to the emulator window, rotating the texture to correct for the 3DS's LCD rotation. 362 * Draws a single texture to the emulator window, rotating the texture to correct for the 3DS's LCD rotation.
340 */ 363 */
341void RendererOpenGL::DrawSingleScreenRotated(const TextureInfo& texture, float x, float y, float w, float h) { 364void RendererOpenGL::DrawSingleScreenRotated(const ScreenInfo& screen_info, float x, float y, float w, float h) {
365 auto& texcoords = screen_info.display_texcoords;
366
342 std::array<ScreenRectVertex, 4> vertices = {{ 367 std::array<ScreenRectVertex, 4> vertices = {{
343 ScreenRectVertex(x, y, 1.f, 0.f), 368 ScreenRectVertex(x, y, texcoords.bottom, texcoords.left),
344 ScreenRectVertex(x+w, y, 1.f, 1.f), 369 ScreenRectVertex(x+w, y, texcoords.bottom, texcoords.right),
345 ScreenRectVertex(x, y+h, 0.f, 0.f), 370 ScreenRectVertex(x, y+h, texcoords.top, texcoords.left),
346 ScreenRectVertex(x+w, y+h, 0.f, 1.f), 371 ScreenRectVertex(x+w, y+h, texcoords.top, texcoords.right),
347 }}; 372 }};
348 373
349 state.texture_units[0].texture_2d = texture.handle; 374 state.texture_units[0].texture_2d = screen_info.display_texture;
350 state.Apply(); 375 state.Apply();
351 376
352 glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices.data()); 377 glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices.data());
353 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); 378 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
379
380 state.texture_units[0].texture_2d = 0;
381 state.Apply();
354} 382}
355 383
356/** 384/**
@@ -362,9 +390,6 @@ void RendererOpenGL::DrawScreens() {
362 glViewport(0, 0, layout.width, layout.height); 390 glViewport(0, 0, layout.width, layout.height);
363 glClear(GL_COLOR_BUFFER_BIT); 391 glClear(GL_COLOR_BUFFER_BIT);
364 392
365 state.draw.shader_program = program_id;
366 state.Apply();
367
368 // Set projection matrix 393 // Set projection matrix
369 std::array<GLfloat, 3 * 2> ortho_matrix = MakeOrthographicMatrix((float)layout.width, 394 std::array<GLfloat, 3 * 2> ortho_matrix = MakeOrthographicMatrix((float)layout.width,
370 (float)layout.height); 395 (float)layout.height);
@@ -374,9 +399,9 @@ void RendererOpenGL::DrawScreens() {
374 glActiveTexture(GL_TEXTURE0); 399 glActiveTexture(GL_TEXTURE0);
375 glUniform1i(uniform_color_texture, 0); 400 glUniform1i(uniform_color_texture, 0);
376 401
377 DrawSingleScreenRotated(textures[0], (float)layout.top_screen.left, (float)layout.top_screen.top, 402 DrawSingleScreenRotated(screen_infos[0], (float)layout.top_screen.left, (float)layout.top_screen.top,
378 (float)layout.top_screen.GetWidth(), (float)layout.top_screen.GetHeight()); 403 (float)layout.top_screen.GetWidth(), (float)layout.top_screen.GetHeight());
379 DrawSingleScreenRotated(textures[1], (float)layout.bottom_screen.left,(float)layout.bottom_screen.top, 404 DrawSingleScreenRotated(screen_infos[1], (float)layout.bottom_screen.left,(float)layout.bottom_screen.top,
380 (float)layout.bottom_screen.GetWidth(), (float)layout.bottom_screen.GetHeight()); 405 (float)layout.bottom_screen.GetWidth(), (float)layout.bottom_screen.GetHeight());
381 406
382 m_current_frame++; 407 m_current_frame++;
@@ -448,12 +473,6 @@ static void DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severity,
448bool RendererOpenGL::Init() { 473bool RendererOpenGL::Init() {
449 render_window->MakeCurrent(); 474 render_window->MakeCurrent();
450 475
451 // TODO: Make frontends initialize this, so they can use gladLoadGLLoader with their own loaders
452 if (!gladLoadGL()) {
453 LOG_CRITICAL(Render_OpenGL, "Failed to initialize GL functions! Exiting...");
454 exit(-1);
455 }
456
457 if (GLAD_GL_KHR_debug) { 476 if (GLAD_GL_KHR_debug) {
458 glEnable(GL_DEBUG_OUTPUT); 477 glEnable(GL_DEBUG_OUTPUT);
459 glDebugMessageCallback(DebugHandler, nullptr); 478 glDebugMessageCallback(DebugHandler, nullptr);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index fe4d142a5..00e1044ab 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -8,13 +8,34 @@
8 8
9#include <glad/glad.h> 9#include <glad/glad.h>
10 10
11#include "common/common_types.h"
12#include "common/math_util.h"
13
11#include "core/hw/gpu.h" 14#include "core/hw/gpu.h"
12 15
13#include "video_core/renderer_base.h" 16#include "video_core/renderer_base.h"
17#include "video_core/renderer_opengl/gl_resource_manager.h"
14#include "video_core/renderer_opengl/gl_state.h" 18#include "video_core/renderer_opengl/gl_state.h"
15 19
16class EmuWindow; 20class EmuWindow;
17 21
22/// Structure used for storing information about the textures for each 3DS screen
23struct TextureInfo {
24 OGLTexture resource;
25 GLsizei width;
26 GLsizei height;
27 GPU::Regs::PixelFormat format;
28 GLenum gl_format;
29 GLenum gl_type;
30};
31
32/// Structure used for storing information about the display target for each 3DS screen
33struct ScreenInfo {
34 GLuint display_texture;
35 MathUtil::Rectangle<float> display_texcoords;
36 TextureInfo texture;
37};
38
18class RendererOpenGL : public RendererBase { 39class RendererOpenGL : public RendererBase {
19public: 40public:
20 41
@@ -37,26 +58,16 @@ public:
37 void ShutDown() override; 58 void ShutDown() override;
38 59
39private: 60private:
40 /// Structure used for storing information about the textures for each 3DS screen
41 struct TextureInfo {
42 GLuint handle;
43 GLsizei width;
44 GLsizei height;
45 GPU::Regs::PixelFormat format;
46 GLenum gl_format;
47 GLenum gl_type;
48 };
49
50 void InitOpenGLObjects(); 61 void InitOpenGLObjects();
51 void ConfigureFramebufferTexture(TextureInfo& texture, 62 void ConfigureFramebufferTexture(TextureInfo& texture,
52 const GPU::Regs::FramebufferConfig& framebuffer); 63 const GPU::Regs::FramebufferConfig& framebuffer);
53 void DrawScreens(); 64 void DrawScreens();
54 void DrawSingleScreenRotated(const TextureInfo& texture, float x, float y, float w, float h); 65 void DrawSingleScreenRotated(const ScreenInfo& screen_info, float x, float y, float w, float h);
55 void UpdateFramerate(); 66 void UpdateFramerate();
56 67
57 // Loads framebuffer from emulated memory into the active OpenGL texture. 68 // Loads framebuffer from emulated memory into the display information structure
58 void LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig& framebuffer, 69 void LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer,
59 const TextureInfo& texture); 70 ScreenInfo& screen_info);
60 // Fills active OpenGL texture with the given RGB color. 71 // Fills active OpenGL texture with the given RGB color.
61 void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, 72 void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b,
62 const TextureInfo& texture); 73 const TextureInfo& texture);
@@ -69,10 +80,10 @@ private:
69 OpenGLState state; 80 OpenGLState state;
70 81
71 // OpenGL object IDs 82 // OpenGL object IDs
72 GLuint vertex_array_handle; 83 OGLVertexArray vertex_array;
73 GLuint vertex_buffer_handle; 84 OGLBuffer vertex_buffer;
74 GLuint program_id; 85 OGLShader shader;
75 std::array<TextureInfo, 2> textures; ///< Textures for top and bottom screens respectively 86 std::array<ScreenInfo, 2> screen_infos; ///< Display information for top and bottom screens respectively
76 // Shader uniform location indices 87 // Shader uniform location indices
77 GLuint uniform_modelview_matrix; 88 GLuint uniform_modelview_matrix;
78 GLuint uniform_color_texture; 89 GLuint uniform_color_texture;
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index 75301accd..e93a9d92a 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -2,27 +2,30 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <memory> 5#include <atomic>
6#include <cmath>
7#include <cstring>
6#include <unordered_map> 8#include <unordered_map>
9#include <utility>
7 10
8#include <boost/range/algorithm/fill.hpp> 11#include <boost/range/algorithm/fill.hpp>
9 12
13#include "common/bit_field.h"
10#include "common/hash.h" 14#include "common/hash.h"
15#include "common/logging/log.h"
11#include "common/microprofile.h" 16#include "common/microprofile.h"
12#include "common/profiler.h"
13 17
14#include "video_core/debug_utils/debug_utils.h"
15#include "video_core/pica.h" 18#include "video_core/pica.h"
16#include "video_core/pica_state.h" 19#include "video_core/pica_state.h"
17#include "video_core/video_core.h" 20#include "video_core/shader/shader.h"
18 21#include "video_core/shader/shader_interpreter.h"
19#include "shader.h"
20#include "shader_interpreter.h"
21 22
22#ifdef ARCHITECTURE_x86_64 23#ifdef ARCHITECTURE_x86_64
23#include "shader_jit_x64.h" 24#include "video_core/shader/shader_jit_x64.h"
24#endif // ARCHITECTURE_x86_64 25#endif // ARCHITECTURE_x86_64
25 26
27#include "video_core/video_core.h"
28
26namespace Pica { 29namespace Pica {
27 30
28namespace Shader { 31namespace Shader {
@@ -32,7 +35,13 @@ static std::unordered_map<u64, std::unique_ptr<JitShader>> shader_map;
32static const JitShader* jit_shader; 35static const JitShader* jit_shader;
33#endif // ARCHITECTURE_x86_64 36#endif // ARCHITECTURE_x86_64
34 37
35void Setup() { 38void ClearCache() {
39#ifdef ARCHITECTURE_x86_64
40 shader_map.clear();
41#endif // ARCHITECTURE_x86_64
42}
43
44void ShaderSetup::Setup() {
36#ifdef ARCHITECTURE_x86_64 45#ifdef ARCHITECTURE_x86_64
37 if (VideoCore::g_shader_jit_enabled) { 46 if (VideoCore::g_shader_jit_enabled) {
38 u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ 47 u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^
@@ -51,46 +60,21 @@ void Setup() {
51#endif // ARCHITECTURE_x86_64 60#endif // ARCHITECTURE_x86_64
52} 61}
53 62
54void Shutdown() { 63MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240));
55#ifdef ARCHITECTURE_x86_64
56 shader_map.clear();
57#endif // ARCHITECTURE_x86_64
58}
59
60static Common::Profiling::TimingCategory shader_category("Vertex Shader");
61MICROPROFILE_DEFINE(GPU_VertexShader, "GPU", "Vertex Shader", MP_RGB(50, 50, 240));
62 64
63OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes) { 65OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num_attributes) {
64 auto& config = g_state.regs.vs; 66 auto& config = g_state.regs.vs;
65 67
66 Common::Profiling::ScopeTimer timer(shader_category); 68 MICROPROFILE_SCOPE(GPU_Shader);
67 MICROPROFILE_SCOPE(GPU_VertexShader);
68 69
69 state.program_counter = config.main_offset;
70 state.debug.max_offset = 0; 70 state.debug.max_offset = 0;
71 state.debug.max_opdesc_id = 0; 71 state.debug.max_opdesc_id = 0;
72 72
73 // Setup input register table 73 // Setup input register table
74 const auto& attribute_register_map = config.input_register_map; 74 const auto& attribute_register_map = config.input_register_map;
75 75
76 // TODO: Instead of this cumbersome logic, just load the input data directly like 76 for (unsigned i = 0; i < num_attributes; i++)
77 // for (int attr = 0; attr < num_attributes; ++attr) { input_attr[0] = state.registers.input[attribute_register_map.attribute0_register]; } 77 state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i];
78 if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = input.attr[0];
79 if (num_attributes > 1) state.registers.input[attribute_register_map.attribute1_register] = input.attr[1];
80 if (num_attributes > 2) state.registers.input[attribute_register_map.attribute2_register] = input.attr[2];
81 if (num_attributes > 3) state.registers.input[attribute_register_map.attribute3_register] = input.attr[3];
82 if (num_attributes > 4) state.registers.input[attribute_register_map.attribute4_register] = input.attr[4];
83 if (num_attributes > 5) state.registers.input[attribute_register_map.attribute5_register] = input.attr[5];
84 if (num_attributes > 6) state.registers.input[attribute_register_map.attribute6_register] = input.attr[6];
85 if (num_attributes > 7) state.registers.input[attribute_register_map.attribute7_register] = input.attr[7];
86 if (num_attributes > 8) state.registers.input[attribute_register_map.attribute8_register] = input.attr[8];
87 if (num_attributes > 9) state.registers.input[attribute_register_map.attribute9_register] = input.attr[9];
88 if (num_attributes > 10) state.registers.input[attribute_register_map.attribute10_register] = input.attr[10];
89 if (num_attributes > 11) state.registers.input[attribute_register_map.attribute11_register] = input.attr[11];
90 if (num_attributes > 12) state.registers.input[attribute_register_map.attribute12_register] = input.attr[12];
91 if (num_attributes > 13) state.registers.input[attribute_register_map.attribute13_register] = input.attr[13];
92 if (num_attributes > 14) state.registers.input[attribute_register_map.attribute14_register] = input.attr[14];
93 if (num_attributes > 15) state.registers.input[attribute_register_map.attribute15_register] = input.attr[15];
94 78
95 state.conditional_code[0] = false; 79 state.conditional_code[0] = false;
96 state.conditional_code[1] = false; 80 state.conditional_code[1] = false;
@@ -155,10 +139,9 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr
155 return ret; 139 return ret;
156} 140}
157 141
158DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) { 142DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) {
159 UnitState<true> state; 143 UnitState<true> state;
160 144
161 state.program_counter = config.main_offset;
162 state.debug.max_offset = 0; 145 state.debug.max_offset = 0;
163 state.debug.max_opdesc_id = 0; 146 state.debug.max_opdesc_id = 0;
164 147
@@ -167,22 +150,8 @@ DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, c
167 float24 dummy_register; 150 float24 dummy_register;
168 boost::fill(state.registers.input, &dummy_register); 151 boost::fill(state.registers.input, &dummy_register);
169 152
170 if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = &input.attr[0].x; 153 for (unsigned i = 0; i < num_attributes; i++)
171 if (num_attributes > 1) state.registers.input[attribute_register_map.attribute1_register] = &input.attr[1].x; 154 state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i];
172 if (num_attributes > 2) state.registers.input[attribute_register_map.attribute2_register] = &input.attr[2].x;
173 if (num_attributes > 3) state.registers.input[attribute_register_map.attribute3_register] = &input.attr[3].x;
174 if (num_attributes > 4) state.registers.input[attribute_register_map.attribute4_register] = &input.attr[4].x;
175 if (num_attributes > 5) state.registers.input[attribute_register_map.attribute5_register] = &input.attr[5].x;
176 if (num_attributes > 6) state.registers.input[attribute_register_map.attribute6_register] = &input.attr[6].x;
177 if (num_attributes > 7) state.registers.input[attribute_register_map.attribute7_register] = &input.attr[7].x;
178 if (num_attributes > 8) state.registers.input[attribute_register_map.attribute8_register] = &input.attr[8].x;
179 if (num_attributes > 9) state.registers.input[attribute_register_map.attribute9_register] = &input.attr[9].x;
180 if (num_attributes > 10) state.registers.input[attribute_register_map.attribute10_register] = &input.attr[10].x;
181 if (num_attributes > 11) state.registers.input[attribute_register_map.attribute11_register] = &input.attr[11].x;
182 if (num_attributes > 12) state.registers.input[attribute_register_map.attribute12_register] = &input.attr[12].x;
183 if (num_attributes > 13) state.registers.input[attribute_register_map.attribute13_register] = &input.attr[13].x;
184 if (num_attributes > 14) state.registers.input[attribute_register_map.attribute14_register] = &input.attr[14].x;
185 if (num_attributes > 15) state.registers.input[attribute_register_map.attribute15_register] = &input.attr[15].x;
186 155
187 state.conditional_code[0] = false; 156 state.conditional_code[0] = false;
188 state.conditional_code[1] = false; 157 state.conditional_code[1] = false;
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index 9c5bd97bd..983e4a967 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -4,17 +4,23 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <cstddef>
9#include <memory>
10#include <type_traits>
7#include <vector> 11#include <vector>
8 12
9#include <boost/container/static_vector.hpp> 13#include <boost/container/static_vector.hpp>
10 14
11#include <nihstro/shader_binary.h> 15#include <nihstro/shader_bytecode.h>
12 16
17#include "common/assert.h"
13#include "common/common_funcs.h" 18#include "common/common_funcs.h"
14#include "common/common_types.h" 19#include "common/common_types.h"
15#include "common/vector_math.h" 20#include "common/vector_math.h"
16 21
17#include "video_core/pica.h" 22#include "video_core/pica.h"
23#include "video_core/pica_types.h"
18 24
19using nihstro::RegisterType; 25using nihstro::RegisterType;
20using nihstro::SourceRegister; 26using nihstro::SourceRegister;
@@ -25,7 +31,7 @@ namespace Pica {
25namespace Shader { 31namespace Shader {
26 32
27struct InputVertex { 33struct InputVertex {
28 Math::Vec4<float24> attr[16]; 34 alignas(16) Math::Vec4<float24> attr[16];
29}; 35};
30 36
31struct OutputVertex { 37struct OutputVertex {
@@ -37,7 +43,8 @@ struct OutputVertex {
37 Math::Vec4<float24> color; 43 Math::Vec4<float24> color;
38 Math::Vec2<float24> tc0; 44 Math::Vec2<float24> tc0;
39 Math::Vec2<float24> tc1; 45 Math::Vec2<float24> tc1;
40 INSERT_PADDING_WORDS(2); 46 float24 tc0_w;
47 INSERT_PADDING_WORDS(1);
41 Math::Vec3<float24> view; 48 Math::Vec3<float24> view;
42 INSERT_PADDING_WORDS(1); 49 INSERT_PADDING_WORDS(1);
43 Math::Vec2<float24> tc2; 50 Math::Vec2<float24> tc2;
@@ -77,23 +84,6 @@ struct OutputVertex {
77static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); 84static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD");
78static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); 85static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size");
79 86
80/// Vertex shader memory
81struct ShaderSetup {
82 struct {
83 // The float uniforms are accessed by the shader JIT using SSE instructions, and are
84 // therefore required to be 16-byte aligned.
85 alignas(16) Math::Vec4<float24> f[96];
86
87 std::array<bool, 16> b;
88 std::array<Math::Vec4<u8>, 4> i;
89 } uniforms;
90
91 Math::Vec4<float24> default_attributes[16];
92
93 std::array<u32, 1024> program_code;
94 std::array<u32, 1024> swizzle_data;
95};
96
97// Helper structure used to keep track of data useful for inspection of shader emulation 87// Helper structure used to keep track of data useful for inspection of shader emulation
98template<bool full_debugging> 88template<bool full_debugging>
99struct DebugData; 89struct DebugData;
@@ -282,29 +272,12 @@ struct UnitState {
282 } registers; 272 } registers;
283 static_assert(std::is_pod<Registers>::value, "Structure is not POD"); 273 static_assert(std::is_pod<Registers>::value, "Structure is not POD");
284 274
285 u32 program_counter;
286 bool conditional_code[2]; 275 bool conditional_code[2];
287 276
288 // Two Address registers and one loop counter 277 // Two Address registers and one loop counter
289 // TODO: How many bits do these actually have? 278 // TODO: How many bits do these actually have?
290 s32 address_registers[3]; 279 s32 address_registers[3];
291 280
292 enum {
293 INVALID_ADDRESS = 0xFFFFFFFF
294 };
295
296 struct CallStackElement {
297 u32 final_address; // Address upon which we jump to return_address
298 u32 return_address; // Where to jump when leaving scope
299 u8 repeat_counter; // How often to repeat until this call stack element is removed
300 u8 loop_increment; // Which value to add to the loop counter after an iteration
301 // TODO: Should this be a signed value? Does it even matter?
302 u32 loop_address; // The address where we'll return to after each loop iteration
303 };
304
305 // TODO: Is there a maximal size for this?
306 boost::container::static_vector<CallStackElement, 16> call_stack;
307
308 DebugData<Debug> debug; 281 DebugData<Debug> debug;
309 282
310 static size_t InputOffset(const SourceRegister& reg) { 283 static size_t InputOffset(const SourceRegister& reg) {
@@ -336,33 +309,49 @@ struct UnitState {
336 } 309 }
337}; 310};
338 311
339/** 312/// Clears the shader cache
340 * Performs any shader unit setup that only needs to happen once per shader (as opposed to once per 313void ClearCache();
341 * vertex, which would happen within the `Run` function).
342 */
343void Setup();
344 314
345/// Performs any cleanup when the emulator is shutdown 315struct ShaderSetup {
346void Shutdown();
347 316
348/** 317 struct {
349 * Runs the currently setup shader 318 // The float uniforms are accessed by the shader JIT using SSE instructions, and are
350 * @param state Shader unit state, must be setup per shader and per shader unit 319 // therefore required to be 16-byte aligned.
351 * @param input Input vertex into the shader 320 alignas(16) Math::Vec4<float24> f[96];
352 * @param num_attributes The number of vertex shader attributes
353 * @return The output vertex, after having been processed by the vertex shader
354 */
355OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes);
356 321
357/** 322 std::array<bool, 16> b;
358 * Produce debug information based on the given shader and input vertex 323 std::array<Math::Vec4<u8>, 4> i;
359 * @param input Input vertex into the shader 324 } uniforms;
360 * @param num_attributes The number of vertex shader attributes 325
361 * @param config Configuration object for the shader pipeline 326 std::array<u32, 1024> program_code;
362 * @param setup Setup object for the shader pipeline 327 std::array<u32, 1024> swizzle_data;
363 * @return Debug information for this shader with regards to the given vertex 328
364 */ 329 /**
365DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup); 330 * Performs any shader unit setup that only needs to happen once per shader (as opposed to once per
331 * vertex, which would happen within the `Run` function).
332 */
333 void Setup();
334
335 /**
336 * Runs the currently setup shader
337 * @param state Shader unit state, must be setup per shader and per shader unit
338 * @param input Input vertex into the shader
339 * @param num_attributes The number of vertex shader attributes
340 * @return The output vertex, after having been processed by the vertex shader
341 */
342 OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes);
343
344 /**
345 * Produce debug information based on the given shader and input vertex
346 * @param input Input vertex into the shader
347 * @param num_attributes The number of vertex shader attributes
348 * @param config Configuration object for the shader pipeline
349 * @param setup Setup object for the shader pipeline
350 * @return Debug information for this shader with regards to the given vertex
351 */
352 DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup);
353
354};
366 355
367} // namespace Shader 356} // namespace Shader
368 357
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp
index 9b978583e..3a827d11f 100644
--- a/src/video_core/shader/shader_interpreter.cpp
+++ b/src/video_core/shader/shader_interpreter.cpp
@@ -2,12 +2,20 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
6#include <array>
7#include <cmath>
5#include <numeric> 8#include <numeric>
9
6#include <nihstro/shader_bytecode.h> 10#include <nihstro/shader_bytecode.h>
7 11
8#include "common/file_util.h" 12#include "common/assert.h"
9#include "video_core/pica.h" 13#include "common/common_types.h"
14#include "common/logging/log.h"
15#include "common/vector_math.h"
16
10#include "video_core/pica_state.h" 17#include "video_core/pica_state.h"
18#include "video_core/pica_types.h"
11#include "video_core/shader/shader.h" 19#include "video_core/shader/shader.h"
12#include "video_core/shader/shader_interpreter.h" 20#include "video_core/shader/shader_interpreter.h"
13 21
@@ -21,8 +29,24 @@ namespace Pica {
21 29
22namespace Shader { 30namespace Shader {
23 31
32constexpr u32 INVALID_ADDRESS = 0xFFFFFFFF;
33
34struct CallStackElement {
35 u32 final_address; // Address upon which we jump to return_address
36 u32 return_address; // Where to jump when leaving scope
37 u8 repeat_counter; // How often to repeat until this call stack element is removed
38 u8 loop_increment; // Which value to add to the loop counter after an iteration
39 // TODO: Should this be a signed value? Does it even matter?
40 u32 loop_address; // The address where we'll return to after each loop iteration
41};
42
24template<bool Debug> 43template<bool Debug>
25void RunInterpreter(UnitState<Debug>& state) { 44void RunInterpreter(UnitState<Debug>& state) {
45 // TODO: Is there a maximal size for this?
46 boost::container::static_vector<CallStackElement, 16> call_stack;
47
48 u32 program_counter = g_state.regs.vs.main_offset;
49
26 const auto& uniforms = g_state.vs.uniforms; 50 const auto& uniforms = g_state.vs.uniforms;
27 const auto& swizzle_data = g_state.vs.swizzle_data; 51 const auto& swizzle_data = g_state.vs.swizzle_data;
28 const auto& program_code = g_state.vs.program_code; 52 const auto& program_code = g_state.vs.program_code;
@@ -33,16 +57,16 @@ void RunInterpreter(UnitState<Debug>& state) {
33 unsigned iteration = 0; 57 unsigned iteration = 0;
34 bool exit_loop = false; 58 bool exit_loop = false;
35 while (!exit_loop) { 59 while (!exit_loop) {
36 if (!state.call_stack.empty()) { 60 if (!call_stack.empty()) {
37 auto& top = state.call_stack.back(); 61 auto& top = call_stack.back();
38 if (state.program_counter == top.final_address) { 62 if (program_counter == top.final_address) {
39 state.address_registers[2] += top.loop_increment; 63 state.address_registers[2] += top.loop_increment;
40 64
41 if (top.repeat_counter-- == 0) { 65 if (top.repeat_counter-- == 0) {
42 state.program_counter = top.return_address; 66 program_counter = top.return_address;
43 state.call_stack.pop_back(); 67 call_stack.pop_back();
44 } else { 68 } else {
45 state.program_counter = top.loop_address; 69 program_counter = top.loop_address;
46 } 70 }
47 71
48 // TODO: Is "trying again" accurate to hardware? 72 // TODO: Is "trying again" accurate to hardware?
@@ -50,20 +74,20 @@ void RunInterpreter(UnitState<Debug>& state) {
50 } 74 }
51 } 75 }
52 76
53 const Instruction instr = { program_code[state.program_counter] }; 77 const Instruction instr = { program_code[program_counter] };
54 const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] }; 78 const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] };
55 79
56 static auto call = [](UnitState<Debug>& state, u32 offset, u32 num_instructions, 80 static auto call = [&program_counter, &call_stack](UnitState<Debug>& state, u32 offset, u32 num_instructions,
57 u32 return_offset, u8 repeat_count, u8 loop_increment) { 81 u32 return_offset, u8 repeat_count, u8 loop_increment) {
58 state.program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset 82 program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset
59 ASSERT(state.call_stack.size() < state.call_stack.capacity()); 83 ASSERT(call_stack.size() < call_stack.capacity());
60 state.call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); 84 call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset });
61 }; 85 };
62 Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, state.program_counter); 86 Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, program_counter);
63 if (iteration > 0) 87 if (iteration > 0)
64 Record<DebugDataRecord::NEXT_INSTR>(state.debug, iteration - 1, state.program_counter); 88 Record<DebugDataRecord::NEXT_INSTR>(state.debug, iteration - 1, program_counter);
65 89
66 state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + state.program_counter); 90 state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + program_counter);
67 91
68 auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { 92 auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* {
69 switch (source_reg.GetRegisterType()) { 93 switch (source_reg.GetRegisterType()) {
@@ -511,7 +535,7 @@ void RunInterpreter(UnitState<Debug>& state) {
511 case OpCode::Id::JMPC: 535 case OpCode::Id::JMPC:
512 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); 536 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code);
513 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { 537 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) {
514 state.program_counter = instr.flow_control.dest_offset - 1; 538 program_counter = instr.flow_control.dest_offset - 1;
515 } 539 }
516 break; 540 break;
517 541
@@ -519,7 +543,7 @@ void RunInterpreter(UnitState<Debug>& state) {
519 Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); 543 Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
520 544
521 if (uniforms.b[instr.flow_control.bool_uniform_id] == !(instr.flow_control.num_instructions & 1)) { 545 if (uniforms.b[instr.flow_control.bool_uniform_id] == !(instr.flow_control.num_instructions & 1)) {
522 state.program_counter = instr.flow_control.dest_offset - 1; 546 program_counter = instr.flow_control.dest_offset - 1;
523 } 547 }
524 break; 548 break;
525 549
@@ -527,7 +551,7 @@ void RunInterpreter(UnitState<Debug>& state) {
527 call(state, 551 call(state,
528 instr.flow_control.dest_offset, 552 instr.flow_control.dest_offset,
529 instr.flow_control.num_instructions, 553 instr.flow_control.num_instructions,
530 state.program_counter + 1, 0, 0); 554 program_counter + 1, 0, 0);
531 break; 555 break;
532 556
533 case OpCode::Id::CALLU: 557 case OpCode::Id::CALLU:
@@ -536,7 +560,7 @@ void RunInterpreter(UnitState<Debug>& state) {
536 call(state, 560 call(state,
537 instr.flow_control.dest_offset, 561 instr.flow_control.dest_offset,
538 instr.flow_control.num_instructions, 562 instr.flow_control.num_instructions,
539 state.program_counter + 1, 0, 0); 563 program_counter + 1, 0, 0);
540 } 564 }
541 break; 565 break;
542 566
@@ -546,7 +570,7 @@ void RunInterpreter(UnitState<Debug>& state) {
546 call(state, 570 call(state,
547 instr.flow_control.dest_offset, 571 instr.flow_control.dest_offset,
548 instr.flow_control.num_instructions, 572 instr.flow_control.num_instructions,
549 state.program_counter + 1, 0, 0); 573 program_counter + 1, 0, 0);
550 } 574 }
551 break; 575 break;
552 576
@@ -557,8 +581,8 @@ void RunInterpreter(UnitState<Debug>& state) {
557 Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); 581 Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
558 if (uniforms.b[instr.flow_control.bool_uniform_id]) { 582 if (uniforms.b[instr.flow_control.bool_uniform_id]) {
559 call(state, 583 call(state,
560 state.program_counter + 1, 584 program_counter + 1,
561 instr.flow_control.dest_offset - state.program_counter - 1, 585 instr.flow_control.dest_offset - program_counter - 1,
562 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); 586 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0);
563 } else { 587 } else {
564 call(state, 588 call(state,
@@ -576,8 +600,8 @@ void RunInterpreter(UnitState<Debug>& state) {
576 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); 600 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code);
577 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { 601 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) {
578 call(state, 602 call(state,
579 state.program_counter + 1, 603 program_counter + 1,
580 instr.flow_control.dest_offset - state.program_counter - 1, 604 instr.flow_control.dest_offset - program_counter - 1,
581 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); 605 instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0);
582 } else { 606 } else {
583 call(state, 607 call(state,
@@ -599,8 +623,8 @@ void RunInterpreter(UnitState<Debug>& state) {
599 623
600 Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param); 624 Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param);
601 call(state, 625 call(state,
602 state.program_counter + 1, 626 program_counter + 1,
603 instr.flow_control.dest_offset - state.program_counter + 1, 627 instr.flow_control.dest_offset - program_counter + 1,
604 instr.flow_control.dest_offset + 1, 628 instr.flow_control.dest_offset + 1,
605 loop_param.x, 629 loop_param.x,
606 loop_param.z); 630 loop_param.z);
@@ -617,7 +641,7 @@ void RunInterpreter(UnitState<Debug>& state) {
617 } 641 }
618 } 642 }
619 643
620 ++state.program_counter; 644 ++program_counter;
621 ++iteration; 645 ++iteration;
622 } 646 }
623} 647}
diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h
index 294bca50e..6048cdf3a 100644
--- a/src/video_core/shader/shader_interpreter.h
+++ b/src/video_core/shader/shader_interpreter.h
@@ -4,12 +4,12 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "video_core/shader/shader.h"
8
9namespace Pica { 7namespace Pica {
10 8
11namespace Shader { 9namespace Shader {
12 10
11template <bool Debug> struct UnitState;
12
13template<bool Debug> 13template<bool Debug>
14void RunInterpreter(UnitState<Debug>& state); 14void RunInterpreter(UnitState<Debug>& state);
15 15
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index b47d3beda..99f6c51eb 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -3,8 +3,15 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include <algorithm>
6#include <smmintrin.h> 6#include <cmath>
7#include <cstdint>
8#include <xmmintrin.h>
7 9
10#include <nihstro/shader_bytecode.h>
11
12#include "common/assert.h"
13#include "common/logging/log.h"
14#include "common/vector_math.h"
8#include "common/x64/abi.h" 15#include "common/x64/abi.h"
9#include "common/x64/cpu_detect.h" 16#include "common/x64/cpu_detect.h"
10#include "common/x64/emitter.h" 17#include "common/x64/emitter.h"
@@ -13,6 +20,7 @@
13#include "shader_jit_x64.h" 20#include "shader_jit_x64.h"
14 21
15#include "video_core/pica_state.h" 22#include "video_core/pica_state.h"
23#include "video_core/pica_types.h"
16 24
17namespace Pica { 25namespace Pica {
18 26
@@ -148,7 +156,7 @@ static Instruction GetVertexShaderInstruction(size_t offset) {
148} 156}
149 157
150static void LogCritical(const char* msg) { 158static void LogCritical(const char* msg) {
151 LOG_CRITICAL(HW_GPU, msg); 159 LOG_CRITICAL(HW_GPU, "%s", msg);
152} 160}
153 161
154void JitShader::Compile_Assert(bool condition, const char* msg) { 162void JitShader::Compile_Assert(bool condition, const char* msg) {
@@ -795,6 +803,8 @@ void JitShader::FindReturnOffsets() {
795 case OpCode::Id::CALLU: 803 case OpCode::Id::CALLU:
796 return_offsets.push_back(instr.flow_control.dest_offset + instr.flow_control.num_instructions); 804 return_offsets.push_back(instr.flow_control.dest_offset + instr.flow_control.num_instructions);
797 break; 805 break;
806 default:
807 break;
798 } 808 }
799 } 809 }
800 810
@@ -854,7 +864,7 @@ void JitShader::Compile() {
854 uintptr_t size = reinterpret_cast<uintptr_t>(GetCodePtr()) - reinterpret_cast<uintptr_t>(program); 864 uintptr_t size = reinterpret_cast<uintptr_t>(GetCodePtr()) - reinterpret_cast<uintptr_t>(program);
855 ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!"); 865 ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!");
856 866
857 LOG_DEBUG(HW_GPU, "Compiled shader size=%d", size); 867 LOG_DEBUG(HW_GPU, "Compiled shader size=%lu", size);
858} 868}
859 869
860JitShader::JitShader() { 870JitShader::JitShader() {
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h
index cd6280ade..30aa7ff30 100644
--- a/src/video_core/shader/shader_jit_x64.h
+++ b/src/video_core/shader/shader_jit_x64.h
@@ -4,14 +4,17 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <cstddef>
7#include <utility> 9#include <utility>
8#include <vector> 10#include <vector>
9 11
10#include <nihstro/shader_bytecode.h> 12#include <nihstro/shader_bytecode.h>
11 13
14#include "common/bit_set.h"
15#include "common/common_types.h"
12#include "common/x64/emitter.h" 16#include "common/x64/emitter.h"
13 17
14#include "video_core/pica.h"
15#include "video_core/shader/shader.h" 18#include "video_core/shader/shader.h"
16 19
17using nihstro::Instruction; 20using nihstro::Instruction;
diff --git a/src/video_core/swrasterizer.h b/src/video_core/swrasterizer.h
index 9a9a76d7a..0a028b774 100644
--- a/src/video_core/swrasterizer.h
+++ b/src/video_core/swrasterizer.h
@@ -8,19 +8,23 @@
8 8
9#include "video_core/rasterizer_interface.h" 9#include "video_core/rasterizer_interface.h"
10 10
11namespace Pica {
12namespace Shader {
13struct OutputVertex;
14}
15}
16
11namespace VideoCore { 17namespace VideoCore {
12 18
13class SWRasterizer : public RasterizerInterface { 19class SWRasterizer : public RasterizerInterface {
14 void InitObjects() override {}
15 void Reset() override {}
16 void AddTriangle(const Pica::Shader::OutputVertex& v0, 20 void AddTriangle(const Pica::Shader::OutputVertex& v0,
17 const Pica::Shader::OutputVertex& v1, 21 const Pica::Shader::OutputVertex& v1,
18 const Pica::Shader::OutputVertex& v2) override; 22 const Pica::Shader::OutputVertex& v2) override;
19 void DrawTriangles() override {} 23 void DrawTriangles() override {}
20 void FlushFramebuffer() override {}
21 void NotifyPicaRegisterChanged(u32 id) override {} 24 void NotifyPicaRegisterChanged(u32 id) override {}
25 void FlushAll() override {}
22 void FlushRegion(PAddr addr, u32 size) override {} 26 void FlushRegion(PAddr addr, u32 size) override {}
23 void InvalidateRegion(PAddr addr, u32 size) override {} 27 void FlushAndInvalidateRegion(PAddr addr, u32 size) override {}
24}; 28};
25 29
26} 30}
diff --git a/src/video_core/utils.cpp b/src/video_core/utils.cpp
deleted file mode 100644
index 6e1ff5cf4..000000000
--- a/src/video_core/utils.cpp
+++ /dev/null
@@ -1,36 +0,0 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstdio>
6#include <cstring>
7
8#include "video_core/utils.h"
9
10namespace VideoCore {
11
12/**
13 * Dumps a texture to TGA
14 * @param filename String filename to dump texture to
15 * @param width Width of texture in pixels
16 * @param height Height of texture in pixels
17 * @param raw_data Raw RGBA8 texture data to dump
18 * @todo This should be moved to some general purpose/common code
19 */
20void DumpTGA(std::string filename, short width, short height, u8* raw_data) {
21 TGAHeader hdr = {0, 0, 2, 0, 0, 0, 0, width, height, 24, 0};
22 FILE* fout = fopen(filename.c_str(), "wb");
23
24 fwrite(&hdr, sizeof(TGAHeader), 1, fout);
25
26 for (int y = 0; y < height; y++) {
27 for (int x = 0; x < width; x++) {
28 putc(raw_data[(3 * (y * width)) + (3 * x) + 0], fout); // b
29 putc(raw_data[(3 * (y * width)) + (3 * x) + 1], fout); // g
30 putc(raw_data[(3 * (y * width)) + (3 * x) + 2], fout); // r
31 }
32 }
33
34 fclose(fout);
35}
36} // namespace
diff --git a/src/video_core/utils.h b/src/video_core/utils.h
index 4fa60a10e..7ce83a055 100644
--- a/src/video_core/utils.h
+++ b/src/video_core/utils.h
@@ -4,37 +4,10 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <string>
8
9#include "common/common_types.h" 7#include "common/common_types.h"
10 8
11namespace VideoCore { 9namespace VideoCore {
12 10
13/// Structure for the TGA texture format (for dumping)
14struct TGAHeader {
15 char idlength;
16 char colormaptype;
17 char datatypecode;
18 short int colormaporigin;
19 short int colormaplength;
20 short int x_origin;
21 short int y_origin;
22 short width;
23 short height;
24 char bitsperpixel;
25 char imagedescriptor;
26};
27
28/**
29 * Dumps a texture to TGA
30 * @param filename String filename to dump texture to
31 * @param width Width of texture in pixels
32 * @param height Height of texture in pixels
33 * @param raw_data Raw RGBA8 texture data to dump
34 * @todo This should be moved to some general purpose/common code
35 */
36void DumpTGA(std::string filename, short width, short height, u8* raw_data);
37
38/** 11/**
39 * Interleave the lower 3 bits of each coordinate to get the intra-block offsets, which are 12 * Interleave the lower 3 bits of each coordinate to get the intra-block offsets, which are
40 * arranged in a Z-order curve. More details on the bit manipulation at: 13 * arranged in a Z-order curve. More details on the bit manipulation at:
diff --git a/src/video_core/vertex_loader.cpp b/src/video_core/vertex_loader.cpp
new file mode 100644
index 000000000..83896814f
--- /dev/null
+++ b/src/video_core/vertex_loader.cpp
@@ -0,0 +1,140 @@
1#include <memory>
2
3#include <boost/range/algorithm/fill.hpp>
4
5#include "common/assert.h"
6#include "common/alignment.h"
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "common/logging/log.h"
10#include "common/vector_math.h"
11
12#include "core/memory.h"
13
14#include "video_core/debug_utils/debug_utils.h"
15#include "video_core/pica.h"
16#include "video_core/pica_state.h"
17#include "video_core/pica_types.h"
18#include "video_core/shader/shader.h"
19#include "video_core/vertex_loader.h"
20
21namespace Pica {
22
23void VertexLoader::Setup(const Pica::Regs& regs) {
24 const auto& attribute_config = regs.vertex_attributes;
25 num_total_attributes = attribute_config.GetNumTotalAttributes();
26
27 boost::fill(vertex_attribute_sources, 0xdeadbeef);
28
29 for (int i = 0; i < 16; i++) {
30 vertex_attribute_is_default[i] = attribute_config.IsDefaultAttribute(i);
31 }
32
33 // Setup attribute data from loaders
34 for (int loader = 0; loader < 12; ++loader) {
35 const auto& loader_config = attribute_config.attribute_loaders[loader];
36
37 u32 offset = 0;
38
39 // TODO: What happens if a loader overwrites a previous one's data?
40 for (unsigned component = 0; component < loader_config.component_count; ++component) {
41 if (component >= 12) {
42 LOG_ERROR(HW_GPU, "Overflow in the vertex attribute loader %u trying to load component %u", loader, component);
43 continue;
44 }
45
46 u32 attribute_index = loader_config.GetComponent(component);
47 if (attribute_index < 12) {
48 offset = Common::AlignUp(offset, attribute_config.GetElementSizeInBytes(attribute_index));
49 vertex_attribute_sources[attribute_index] = loader_config.data_offset + offset;
50 vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count);
51 vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index);
52 vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index);
53 offset += attribute_config.GetStride(attribute_index);
54 } else if (attribute_index < 16) {
55 // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively
56 offset = Common::AlignUp(offset, 4);
57 offset += (attribute_index - 11) * 4;
58 } else {
59 UNREACHABLE(); // This is truly unreachable due to the number of bits for each component
60 }
61 }
62 }
63}
64
65void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, DebugUtils::MemoryAccessTracker& memory_accesses) {
66 for (int i = 0; i < num_total_attributes; ++i) {
67 if (vertex_attribute_elements[i] != 0) {
68 // Load per-vertex data from the loader arrays
69 u32 source_addr = base_address + vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex;
70
71 if (g_debug_context && Pica::g_debug_context->recorder) {
72 memory_accesses.AddAccess(source_addr, vertex_attribute_elements[i] * (
73 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4
74 : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1));
75 }
76
77 switch (vertex_attribute_formats[i]) {
78 case Regs::VertexAttributeFormat::BYTE:
79 {
80 const s8* srcdata = reinterpret_cast<const s8*>(Memory::GetPhysicalPointer(source_addr));
81 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
82 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
83 }
84 break;
85 }
86 case Regs::VertexAttributeFormat::UBYTE:
87 {
88 const u8* srcdata = reinterpret_cast<const u8*>(Memory::GetPhysicalPointer(source_addr));
89 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
90 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
91 }
92 break;
93 }
94 case Regs::VertexAttributeFormat::SHORT:
95 {
96 const s16* srcdata = reinterpret_cast<const s16*>(Memory::GetPhysicalPointer(source_addr));
97 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
98 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
99 }
100 break;
101 }
102 case Regs::VertexAttributeFormat::FLOAT:
103 {
104 const float* srcdata = reinterpret_cast<const float*>(Memory::GetPhysicalPointer(source_addr));
105 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
106 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
107 }
108 break;
109 }
110 }
111
112 // Default attribute values set if array elements have < 4 components. This
113 // is *not* carried over from the default attribute settings even if they're
114 // enabled for this attribute.
115 for (unsigned int comp = vertex_attribute_elements[i]; comp < 4; ++comp) {
116 input.attr[i][comp] = comp == 3 ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f);
117 }
118
119 LOG_TRACE(HW_GPU, "Loaded %d components of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f %f %f %f",
120 vertex_attribute_elements[i], i, vertex, index,
121 base_address,
122 vertex_attribute_sources[i],
123 vertex_attribute_strides[i] * vertex,
124 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
125 } else if (vertex_attribute_is_default[i]) {
126 // Load the default attribute if we're configured to do so
127 input.attr[i] = g_state.vs_default_attributes[i];
128 LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)",
129 i, vertex, index,
130 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(),
131 input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
132 } else {
133 // TODO(yuriks): In this case, no data gets loaded and the vertex
134 // remains with the last value it had. This isn't currently maintained
135 // as global state, however, and so won't work in Citra yet.
136 }
137 }
138}
139
140} // namespace Pica
diff --git a/src/video_core/vertex_loader.h b/src/video_core/vertex_loader.h
new file mode 100644
index 000000000..becf5a403
--- /dev/null
+++ b/src/video_core/vertex_loader.h
@@ -0,0 +1,33 @@
1#pragma once
2
3#include "common/common_types.h"
4
5#include "video_core/pica.h"
6
7namespace Pica {
8
9namespace DebugUtils {
10class MemoryAccessTracker;
11}
12
13namespace Shader {
14class InputVertex;
15}
16
17class VertexLoader {
18public:
19 void Setup(const Pica::Regs& regs);
20 void LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, DebugUtils::MemoryAccessTracker& memory_accesses);
21
22 int GetNumTotalAttributes() const { return num_total_attributes; }
23
24private:
25 u32 vertex_attribute_sources[16];
26 u32 vertex_attribute_strides[16] = {};
27 Regs::VertexAttributeFormat vertex_attribute_formats[16] = {};
28 u32 vertex_attribute_elements[16] = {};
29 bool vertex_attribute_is_default[16];
30 int num_total_attributes;
31};
32
33} // namespace Pica
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index 256899c89..c9975876d 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -4,12 +4,8 @@
4 4
5#include <memory> 5#include <memory>
6 6
7#include "common/emu_window.h"
8#include "common/logging/log.h" 7#include "common/logging/log.h"
9 8
10#include "core/core.h"
11#include "core/settings.h"
12
13#include "video_core/pica.h" 9#include "video_core/pica.h"
14#include "video_core/renderer_base.h" 10#include "video_core/renderer_base.h"
15#include "video_core/video_core.h" 11#include "video_core/video_core.h"
@@ -25,6 +21,7 @@ std::unique_ptr<RendererBase> g_renderer; ///< Renderer plugin
25 21
26std::atomic<bool> g_hw_renderer_enabled; 22std::atomic<bool> g_hw_renderer_enabled;
27std::atomic<bool> g_shader_jit_enabled; 23std::atomic<bool> g_shader_jit_enabled;
24std::atomic<bool> g_scaled_resolution_enabled;
28 25
29/// Initialize the video core 26/// Initialize the video core
30bool Init(EmuWindow* emu_window) { 27bool Init(EmuWindow* emu_window) {
diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h
index bca67fb8c..30267489e 100644
--- a/src/video_core/video_core.h
+++ b/src/video_core/video_core.h
@@ -36,6 +36,7 @@ extern EmuWindow* g_emu_window; ///< Emu window
36// TODO: Wrap these in a user settings struct along with any other graphics settings (often set from qt ui) 36// TODO: Wrap these in a user settings struct along with any other graphics settings (often set from qt ui)
37extern std::atomic<bool> g_hw_renderer_enabled; 37extern std::atomic<bool> g_hw_renderer_enabled;
38extern std::atomic<bool> g_shader_jit_enabled; 38extern std::atomic<bool> g_shader_jit_enabled;
39extern std::atomic<bool> g_scaled_resolution_enabled;
39 40
40/// Start the video core 41/// Start the video core
41void Start(); 42void Start();