summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/audio_core/CMakeLists.txt7
-rw-r--r--src/audio_core/audio_core.cpp13
-rw-r--r--src/audio_core/audio_core.h2
-rw-r--r--src/audio_core/hle/common.h9
-rw-r--r--src/audio_core/hle/dsp.cpp44
-rw-r--r--src/audio_core/hle/dsp.h21
-rw-r--r--src/audio_core/hle/pipe.cpp32
-rw-r--r--src/audio_core/hle/pipe.h4
-rw-r--r--src/audio_core/interpolate.cpp85
-rw-r--r--src/audio_core/interpolate.h41
-rw-r--r--src/citra/CMakeLists.txt2
-rw-r--r--src/citra/config.cpp1
-rw-r--r--src/citra/default_ini.h4
-rw-r--r--src/citra_qt/CMakeLists.txt2
-rw-r--r--src/citra_qt/bootmanager.cpp2
-rw-r--r--src/citra_qt/config.cpp2
-rw-r--r--src/citra_qt/configure_general.cpp2
-rw-r--r--src/citra_qt/configure_general.ui7
-rw-r--r--src/citra_qt/debugger/graphics_breakpoints.cpp4
-rw-r--r--src/citra_qt/debugger/graphics_framebuffer.cpp6
-rw-r--r--src/citra_qt/debugger/profiler.cpp39
-rw-r--r--src/citra_qt/debugger/profiler.h3
-rw-r--r--src/citra_qt/main.cpp9
-rw-r--r--src/common/CMakeLists.txt1
-rw-r--r--src/common/assert.h2
-rw-r--r--src/common/file_util.h4
-rw-r--r--src/common/microprofile.h4
-rw-r--r--src/common/microprofileui.h3
-rw-r--r--src/common/profiler.cpp82
-rw-r--r--src/common/profiler.h152
-rw-r--r--src/common/profiler_reporting.h27
-rw-r--r--src/core/arm/dyncom/arm_dyncom_interpreter.cpp7
-rw-r--r--src/core/gdbstub/gdbstub.cpp4
-rw-r--r--src/core/hle/result.h1
-rw-r--r--src/core/hle/service/am/am.cpp2
-rw-r--r--src/core/hle/service/dsp_dsp.cpp191
-rw-r--r--src/core/hle/service/dsp_dsp.h19
-rw-r--r--src/core/hle/service/fs/archive.cpp1
-rw-r--r--src/core/hle/service/fs/fs_user.cpp2
-rw-r--r--src/core/hle/service/gsp_gpu.cpp70
-rw-r--r--src/core/hle/service/y2r_u.cpp490
-rw-r--r--src/core/hle/service/y2r_u.h20
-rw-r--r--src/core/hle/svc.cpp4
-rw-r--r--src/core/hw/gpu.cpp327
-rw-r--r--src/core/hw/gpu.h4
-rw-r--r--src/core/loader/ncch.cpp2
-rw-r--r--src/core/memory.cpp140
-rw-r--r--src/core/memory.h16
-rw-r--r--src/core/settings.cpp2
-rw-r--r--src/core/settings.h1
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/command_processor.cpp133
-rw-r--r--src/video_core/debug_utils/debug_utils.cpp9
-rw-r--r--src/video_core/debug_utils/debug_utils.h46
-rw-r--r--src/video_core/pica.h9
-rw-r--r--src/video_core/rasterizer.cpp3
-rw-r--r--src/video_core/rasterizer_interface.h31
-rw-r--r--src/video_core/renderer_base.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp842
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h94
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp699
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h209
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp63
-rw-r--r--src/video_core/renderer_opengl/gl_state.h27
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp128
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h44
-rw-r--r--src/video_core/shader/shader.cpp3
-rw-r--r--src/video_core/shader/shader.h2
-rw-r--r--src/video_core/shader/shader_jit_x64.cpp6
-rw-r--r--src/video_core/swrasterizer.h6
-rw-r--r--src/video_core/vertex_loader.cpp140
-rw-r--r--src/video_core/vertex_loader.h28
-rw-r--r--src/video_core/video_core.cpp1
-rw-r--r--src/video_core/video_core.h1
75 files changed, 2868 insertions, 1582 deletions
diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt
index 869da5e83..a965af291 100644
--- a/src/audio_core/CMakeLists.txt
+++ b/src/audio_core/CMakeLists.txt
@@ -4,6 +4,7 @@ set(SRCS
4 hle/dsp.cpp 4 hle/dsp.cpp
5 hle/filter.cpp 5 hle/filter.cpp
6 hle/pipe.cpp 6 hle/pipe.cpp
7 interpolate.cpp
7 ) 8 )
8 9
9set(HEADERS 10set(HEADERS
@@ -13,9 +14,13 @@ set(HEADERS
13 hle/dsp.h 14 hle/dsp.h
14 hle/filter.h 15 hle/filter.h
15 hle/pipe.h 16 hle/pipe.h
17 interpolate.h
16 sink.h 18 sink.h
17 ) 19 )
18 20
21include_directories(../../externals/soundtouch/include)
22
19create_directory_groups(${SRCS} ${HEADERS}) 23create_directory_groups(${SRCS} ${HEADERS})
20 24
21add_library(audio_core STATIC ${SRCS} ${HEADERS}) \ No newline at end of file 25add_library(audio_core STATIC ${SRCS} ${HEADERS})
26target_link_libraries(audio_core SoundTouch)
diff --git a/src/audio_core/audio_core.cpp b/src/audio_core/audio_core.cpp
index 894f46990..cbe869a04 100644
--- a/src/audio_core/audio_core.cpp
+++ b/src/audio_core/audio_core.cpp
@@ -4,6 +4,7 @@
4 4
5#include "audio_core/audio_core.h" 5#include "audio_core/audio_core.h"
6#include "audio_core/hle/dsp.h" 6#include "audio_core/hle/dsp.h"
7#include "audio_core/hle/pipe.h"
7 8
8#include "core/core_timing.h" 9#include "core/core_timing.h"
9#include "core/hle/kernel/vm_manager.h" 10#include "core/hle/kernel/vm_manager.h"
@@ -17,10 +18,10 @@ static constexpr u64 audio_frame_ticks = 1310252ull; ///< Units: ARM11 cycles
17 18
18static void AudioTickCallback(u64 /*userdata*/, int cycles_late) { 19static void AudioTickCallback(u64 /*userdata*/, int cycles_late) {
19 if (DSP::HLE::Tick()) { 20 if (DSP::HLE::Tick()) {
20 // HACK: We're not signaling the interrups when they should be, but just firing them all off together. 21 // TODO(merry): Signal all the other interrupts as appropriate.
21 // It should be only (interrupt_id = 2, channel_id = 2) that's signalled here. 22 DSP_DSP::SignalPipeInterrupt(DSP::HLE::DspPipe::Audio);
22 // TODO(merry): Understand when the other interrupts are fired. 23 // HACK(merry): Added to prevent regressions. Will remove soon.
23 DSP_DSP::SignalAllInterrupts(); 24 DSP_DSP::SignalPipeInterrupt(DSP::HLE::DspPipe::Binary);
24 } 25 }
25 26
26 // Reschedule recurrent event 27 // Reschedule recurrent event
@@ -37,10 +38,10 @@ void Init() {
37 38
38/// Add DSP address spaces to Process's address space. 39/// Add DSP address spaces to Process's address space.
39void AddAddressSpace(Kernel::VMManager& address_space) { 40void AddAddressSpace(Kernel::VMManager& address_space) {
40 auto r0_vma = address_space.MapBackingMemory(DSP::HLE::region0_base, reinterpret_cast<u8*>(&DSP::HLE::g_region0), sizeof(DSP::HLE::SharedMemory), Kernel::MemoryState::IO).MoveFrom(); 41 auto r0_vma = address_space.MapBackingMemory(DSP::HLE::region0_base, reinterpret_cast<u8*>(&DSP::HLE::g_regions[0]), sizeof(DSP::HLE::SharedMemory), Kernel::MemoryState::IO).MoveFrom();
41 address_space.Reprotect(r0_vma, Kernel::VMAPermission::ReadWrite); 42 address_space.Reprotect(r0_vma, Kernel::VMAPermission::ReadWrite);
42 43
43 auto r1_vma = address_space.MapBackingMemory(DSP::HLE::region1_base, reinterpret_cast<u8*>(&DSP::HLE::g_region1), sizeof(DSP::HLE::SharedMemory), Kernel::MemoryState::IO).MoveFrom(); 44 auto r1_vma = address_space.MapBackingMemory(DSP::HLE::region1_base, reinterpret_cast<u8*>(&DSP::HLE::g_regions[1]), sizeof(DSP::HLE::SharedMemory), Kernel::MemoryState::IO).MoveFrom();
44 address_space.Reprotect(r1_vma, Kernel::VMAPermission::ReadWrite); 45 address_space.Reprotect(r1_vma, Kernel::VMAPermission::ReadWrite);
45} 46}
46 47
diff --git a/src/audio_core/audio_core.h b/src/audio_core/audio_core.h
index 64c330914..b349895ea 100644
--- a/src/audio_core/audio_core.h
+++ b/src/audio_core/audio_core.h
@@ -10,8 +10,6 @@ class VMManager;
10 10
11namespace AudioCore { 11namespace AudioCore {
12 12
13constexpr int num_sources = 24;
14constexpr int samples_per_frame = 160; ///< Samples per audio frame at native sample rate
15constexpr int native_sample_rate = 32728; ///< 32kHz 13constexpr int native_sample_rate = 32728; ///< 32kHz
16 14
17/// Initialise Audio Core 15/// Initialise Audio Core
diff --git a/src/audio_core/hle/common.h b/src/audio_core/hle/common.h
index 37d441eb2..7910f42ae 100644
--- a/src/audio_core/hle/common.h
+++ b/src/audio_core/hle/common.h
@@ -7,18 +7,19 @@
7#include <algorithm> 7#include <algorithm>
8#include <array> 8#include <array>
9 9
10#include "audio_core/audio_core.h"
11
12#include "common/common_types.h" 10#include "common/common_types.h"
13 11
14namespace DSP { 12namespace DSP {
15namespace HLE { 13namespace HLE {
16 14
15constexpr int num_sources = 24;
16constexpr int samples_per_frame = 160; ///< Samples per audio frame at native sample rate
17
17/// The final output to the speakers is stereo. Preprocessing output in Source is also stereo. 18/// The final output to the speakers is stereo. Preprocessing output in Source is also stereo.
18using StereoFrame16 = std::array<std::array<s16, 2>, AudioCore::samples_per_frame>; 19using StereoFrame16 = std::array<std::array<s16, 2>, samples_per_frame>;
19 20
20/// The DSP is quadraphonic internally. 21/// The DSP is quadraphonic internally.
21using QuadFrame32 = std::array<std::array<s32, 4>, AudioCore::samples_per_frame>; 22using QuadFrame32 = std::array<std::array<s32, 4>, samples_per_frame>;
22 23
23/** 24/**
24 * This performs the filter operation defined by FilterT::ProcessSample on the frame in-place. 25 * This performs the filter operation defined by FilterT::ProcessSample on the frame in-place.
diff --git a/src/audio_core/hle/dsp.cpp b/src/audio_core/hle/dsp.cpp
index c89356edc..5759a5b9e 100644
--- a/src/audio_core/hle/dsp.cpp
+++ b/src/audio_core/hle/dsp.cpp
@@ -8,8 +8,32 @@
8namespace DSP { 8namespace DSP {
9namespace HLE { 9namespace HLE {
10 10
11SharedMemory g_region0; 11std::array<SharedMemory, 2> g_regions;
12SharedMemory g_region1; 12
13static size_t CurrentRegionIndex() {
14 // The region with the higher frame counter is chosen unless there is wraparound.
15 // This function only returns a 0 or 1.
16
17 if (g_regions[0].frame_counter == 0xFFFFu && g_regions[1].frame_counter != 0xFFFEu) {
18 // Wraparound has occured.
19 return 1;
20 }
21
22 if (g_regions[1].frame_counter == 0xFFFFu && g_regions[0].frame_counter != 0xFFFEu) {
23 // Wraparound has occured.
24 return 0;
25 }
26
27 return (g_regions[0].frame_counter > g_regions[1].frame_counter) ? 0 : 1;
28}
29
30static SharedMemory& ReadRegion() {
31 return g_regions[CurrentRegionIndex()];
32}
33
34static SharedMemory& WriteRegion() {
35 return g_regions[1 - CurrentRegionIndex()];
36}
13 37
14void Init() { 38void Init() {
15 DSP::HLE::ResetPipes(); 39 DSP::HLE::ResetPipes();
@@ -22,21 +46,5 @@ bool Tick() {
22 return true; 46 return true;
23} 47}
24 48
25SharedMemory& CurrentRegion() {
26 // The region with the higher frame counter is chosen unless there is wraparound.
27
28 if (g_region0.frame_counter == 0xFFFFu && g_region1.frame_counter != 0xFFFEu) {
29 // Wraparound has occured.
30 return g_region1;
31 }
32
33 if (g_region1.frame_counter == 0xFFFFu && g_region0.frame_counter != 0xFFFEu) {
34 // Wraparound has occured.
35 return g_region0;
36 }
37
38 return (g_region0.frame_counter > g_region1.frame_counter) ? g_region0 : g_region1;
39}
40
41} // namespace HLE 49} // namespace HLE
42} // namespace DSP 50} // namespace DSP
diff --git a/src/audio_core/hle/dsp.h b/src/audio_core/hle/dsp.h
index c15ef0b7a..f0f125284 100644
--- a/src/audio_core/hle/dsp.h
+++ b/src/audio_core/hle/dsp.h
@@ -4,10 +4,11 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
7#include <cstddef> 8#include <cstddef>
8#include <type_traits> 9#include <type_traits>
9 10
10#include "audio_core/audio_core.h" 11#include "audio_core/hle/common.h"
11 12
12#include "common/bit_field.h" 13#include "common/bit_field.h"
13#include "common/common_funcs.h" 14#include "common/common_funcs.h"
@@ -30,10 +31,9 @@ namespace HLE {
30struct SharedMemory; 31struct SharedMemory;
31 32
32constexpr VAddr region0_base = 0x1FF50000; 33constexpr VAddr region0_base = 0x1FF50000;
33extern SharedMemory g_region0;
34
35constexpr VAddr region1_base = 0x1FF70000; 34constexpr VAddr region1_base = 0x1FF70000;
36extern SharedMemory g_region1; 35
36extern std::array<SharedMemory, 2> g_regions;
37 37
38/** 38/**
39 * The DSP is native 16-bit. The DSP also appears to be big-endian. When reading 32-bit numbers from 39 * The DSP is native 16-bit. The DSP also appears to be big-endian. When reading 32-bit numbers from
@@ -305,7 +305,7 @@ struct SourceConfiguration {
305 u16_le buffer_id; 305 u16_le buffer_id;
306 }; 306 };
307 307
308 Configuration config[AudioCore::num_sources]; 308 Configuration config[num_sources];
309}; 309};
310ASSERT_DSP_STRUCT(SourceConfiguration::Configuration, 192); 310ASSERT_DSP_STRUCT(SourceConfiguration::Configuration, 192);
311ASSERT_DSP_STRUCT(SourceConfiguration::Configuration::Buffer, 20); 311ASSERT_DSP_STRUCT(SourceConfiguration::Configuration::Buffer, 20);
@@ -320,7 +320,7 @@ struct SourceStatus {
320 INSERT_PADDING_DSPWORDS(1); 320 INSERT_PADDING_DSPWORDS(1);
321 }; 321 };
322 322
323 Status status[AudioCore::num_sources]; 323 Status status[num_sources];
324}; 324};
325ASSERT_DSP_STRUCT(SourceStatus::Status, 12); 325ASSERT_DSP_STRUCT(SourceStatus::Status, 12);
326 326
@@ -413,7 +413,7 @@ ASSERT_DSP_STRUCT(DspConfiguration::ReverbEffect, 52);
413struct AdpcmCoefficients { 413struct AdpcmCoefficients {
414 /// Coefficients are signed fixed point with 11 fractional bits. 414 /// Coefficients are signed fixed point with 11 fractional bits.
415 /// Each source has 16 coefficients associated with it. 415 /// Each source has 16 coefficients associated with it.
416 s16_le coeff[AudioCore::num_sources][16]; 416 s16_le coeff[num_sources][16];
417}; 417};
418ASSERT_DSP_STRUCT(AdpcmCoefficients, 768); 418ASSERT_DSP_STRUCT(AdpcmCoefficients, 768);
419 419
@@ -427,7 +427,7 @@ ASSERT_DSP_STRUCT(DspStatus, 32);
427/// Final mixed output in PCM16 stereo format, what you hear out of the speakers. 427/// Final mixed output in PCM16 stereo format, what you hear out of the speakers.
428/// When the application writes to this region it has no effect. 428/// When the application writes to this region it has no effect.
429struct FinalMixSamples { 429struct FinalMixSamples {
430 s16_le pcm16[2 * AudioCore::samples_per_frame]; 430 s16_le pcm16[2 * samples_per_frame];
431}; 431};
432ASSERT_DSP_STRUCT(FinalMixSamples, 640); 432ASSERT_DSP_STRUCT(FinalMixSamples, 640);
433 433
@@ -437,7 +437,7 @@ ASSERT_DSP_STRUCT(FinalMixSamples, 640);
437/// Values that exceed s16 range will be clipped by the DSP after further processing. 437/// Values that exceed s16 range will be clipped by the DSP after further processing.
438struct IntermediateMixSamples { 438struct IntermediateMixSamples {
439 struct Samples { 439 struct Samples {
440 s32_le pcm32[4][AudioCore::samples_per_frame]; ///< Little-endian as opposed to DSP middle-endian. 440 s32_le pcm32[4][samples_per_frame]; ///< Little-endian as opposed to DSP middle-endian.
441 }; 441 };
442 442
443 Samples mix1; 443 Samples mix1;
@@ -535,8 +535,5 @@ void Shutdown();
535 */ 535 */
536bool Tick(); 536bool Tick();
537 537
538/// Returns a mutable reference to the current region. Current region is selected based on the frame counter.
539SharedMemory& CurrentRegion();
540
541} // namespace HLE 538} // namespace HLE
542} // namespace DSP 539} // namespace DSP
diff --git a/src/audio_core/hle/pipe.cpp b/src/audio_core/hle/pipe.cpp
index 9381883b4..03280780f 100644
--- a/src/audio_core/hle/pipe.cpp
+++ b/src/audio_core/hle/pipe.cpp
@@ -12,12 +12,14 @@
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "common/logging/log.h" 13#include "common/logging/log.h"
14 14
15#include "core/hle/service/dsp_dsp.h"
16
15namespace DSP { 17namespace DSP {
16namespace HLE { 18namespace HLE {
17 19
18static DspState dsp_state = DspState::Off; 20static DspState dsp_state = DspState::Off;
19 21
20static std::array<std::vector<u8>, static_cast<size_t>(DspPipe::DspPipe_MAX)> pipe_data; 22static std::array<std::vector<u8>, NUM_DSP_PIPE> pipe_data;
21 23
22void ResetPipes() { 24void ResetPipes() {
23 for (auto& data : pipe_data) { 25 for (auto& data : pipe_data) {
@@ -27,16 +29,18 @@ void ResetPipes() {
27} 29}
28 30
29std::vector<u8> PipeRead(DspPipe pipe_number, u32 length) { 31std::vector<u8> PipeRead(DspPipe pipe_number, u32 length) {
30 if (pipe_number >= DspPipe::DspPipe_MAX) { 32 const size_t pipe_index = static_cast<size_t>(pipe_number);
31 LOG_ERROR(Audio_DSP, "pipe_number = %u invalid", pipe_number); 33
34 if (pipe_index >= NUM_DSP_PIPE) {
35 LOG_ERROR(Audio_DSP, "pipe_number = %zu invalid", pipe_index);
32 return {}; 36 return {};
33 } 37 }
34 38
35 std::vector<u8>& data = pipe_data[static_cast<size_t>(pipe_number)]; 39 std::vector<u8>& data = pipe_data[pipe_index];
36 40
37 if (length > data.size()) { 41 if (length > data.size()) {
38 LOG_WARNING(Audio_DSP, "pipe_number = %u is out of data, application requested read of %u but %zu remain", 42 LOG_WARNING(Audio_DSP, "pipe_number = %zu is out of data, application requested read of %u but %zu remain",
39 pipe_number, length, data.size()); 43 pipe_index, length, data.size());
40 length = data.size(); 44 length = data.size();
41 } 45 }
42 46
@@ -49,16 +53,20 @@ std::vector<u8> PipeRead(DspPipe pipe_number, u32 length) {
49} 53}
50 54
51size_t GetPipeReadableSize(DspPipe pipe_number) { 55size_t GetPipeReadableSize(DspPipe pipe_number) {
52 if (pipe_number >= DspPipe::DspPipe_MAX) { 56 const size_t pipe_index = static_cast<size_t>(pipe_number);
53 LOG_ERROR(Audio_DSP, "pipe_number = %u invalid", pipe_number); 57
58 if (pipe_index >= NUM_DSP_PIPE) {
59 LOG_ERROR(Audio_DSP, "pipe_number = %zu invalid", pipe_index);
54 return 0; 60 return 0;
55 } 61 }
56 62
57 return pipe_data[static_cast<size_t>(pipe_number)].size(); 63 return pipe_data[pipe_index].size();
58} 64}
59 65
60static void WriteU16(DspPipe pipe_number, u16 value) { 66static void WriteU16(DspPipe pipe_number, u16 value) {
61 std::vector<u8>& data = pipe_data[static_cast<size_t>(pipe_number)]; 67 const size_t pipe_index = static_cast<size_t>(pipe_number);
68
69 std::vector<u8>& data = pipe_data.at(pipe_index);
62 // Little endian 70 // Little endian
63 data.emplace_back(value & 0xFF); 71 data.emplace_back(value & 0xFF);
64 data.emplace_back(value >> 8); 72 data.emplace_back(value >> 8);
@@ -91,6 +99,8 @@ static void AudioPipeWriteStructAddresses() {
91 for (u16 addr : struct_addresses) { 99 for (u16 addr : struct_addresses) {
92 WriteU16(DspPipe::Audio, addr); 100 WriteU16(DspPipe::Audio, addr);
93 } 101 }
102 // Signal that we have data on this pipe.
103 DSP_DSP::SignalPipeInterrupt(DspPipe::Audio);
94} 104}
95 105
96void PipeWrite(DspPipe pipe_number, const std::vector<u8>& buffer) { 106void PipeWrite(DspPipe pipe_number, const std::vector<u8>& buffer) {
@@ -145,7 +155,7 @@ void PipeWrite(DspPipe pipe_number, const std::vector<u8>& buffer) {
145 return; 155 return;
146 } 156 }
147 default: 157 default:
148 LOG_CRITICAL(Audio_DSP, "pipe_number = %u unimplemented", pipe_number); 158 LOG_CRITICAL(Audio_DSP, "pipe_number = %zu unimplemented", static_cast<size_t>(pipe_number));
149 UNIMPLEMENTED(); 159 UNIMPLEMENTED();
150 return; 160 return;
151 } 161 }
diff --git a/src/audio_core/hle/pipe.h b/src/audio_core/hle/pipe.h
index 382d35e87..64d97f8ba 100644
--- a/src/audio_core/hle/pipe.h
+++ b/src/audio_core/hle/pipe.h
@@ -19,9 +19,9 @@ enum class DspPipe {
19 Debug = 0, 19 Debug = 0,
20 Dma = 1, 20 Dma = 1,
21 Audio = 2, 21 Audio = 2,
22 Binary = 3, 22 Binary = 3
23 DspPipe_MAX
24}; 23};
24constexpr size_t NUM_DSP_PIPE = 8;
25 25
26/** 26/**
27 * Read a DSP pipe. 27 * Read a DSP pipe.
diff --git a/src/audio_core/interpolate.cpp b/src/audio_core/interpolate.cpp
new file mode 100644
index 000000000..fcd3aa066
--- /dev/null
+++ b/src/audio_core/interpolate.cpp
@@ -0,0 +1,85 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "audio_core/interpolate.h"
6
7#include "common/assert.h"
8#include "common/math_util.h"
9
10namespace AudioInterp {
11
12// Calculations are done in fixed point with 24 fractional bits.
13// (This is not verified. This was chosen for minimal error.)
14constexpr u64 scale_factor = 1 << 24;
15constexpr u64 scale_mask = scale_factor - 1;
16
17/// Here we step over the input in steps of rate_multiplier, until we consume all of the input.
18/// Three adjacent samples are passed to fn each step.
19template <typename Function>
20static StereoBuffer16 StepOverSamples(State& state, const StereoBuffer16& input, float rate_multiplier, Function fn) {
21 ASSERT(rate_multiplier > 0);
22
23 if (input.size() < 2)
24 return {};
25
26 StereoBuffer16 output;
27 output.reserve(static_cast<size_t>(input.size() / rate_multiplier));
28
29 u64 step_size = static_cast<u64>(rate_multiplier * scale_factor);
30
31 u64 fposition = 0;
32 const u64 max_fposition = input.size() * scale_factor;
33
34 while (fposition < 1 * scale_factor) {
35 u64 fraction = fposition & scale_mask;
36
37 output.push_back(fn(fraction, state.xn2, state.xn1, input[0]));
38
39 fposition += step_size;
40 }
41
42 while (fposition < 2 * scale_factor) {
43 u64 fraction = fposition & scale_mask;
44
45 output.push_back(fn(fraction, state.xn1, input[0], input[1]));
46
47 fposition += step_size;
48 }
49
50 while (fposition < max_fposition) {
51 u64 fraction = fposition & scale_mask;
52
53 size_t index = static_cast<size_t>(fposition / scale_factor);
54 output.push_back(fn(fraction, input[index - 2], input[index - 1], input[index]));
55
56 fposition += step_size;
57 }
58
59 state.xn2 = input[input.size() - 2];
60 state.xn1 = input[input.size() - 1];
61
62 return output;
63}
64
65StereoBuffer16 None(State& state, const StereoBuffer16& input, float rate_multiplier) {
66 return StepOverSamples(state, input, rate_multiplier, [](u64 fraction, const auto& x0, const auto& x1, const auto& x2) {
67 return x0;
68 });
69}
70
71StereoBuffer16 Linear(State& state, const StereoBuffer16& input, float rate_multiplier) {
72 // Note on accuracy: Some values that this produces are +/- 1 from the actual firmware.
73 return StepOverSamples(state, input, rate_multiplier, [](u64 fraction, const auto& x0, const auto& x1, const auto& x2) {
74 // This is a saturated subtraction. (Verified by black-box fuzzing.)
75 s64 delta0 = MathUtil::Clamp<s64>(x1[0] - x0[0], -32768, 32767);
76 s64 delta1 = MathUtil::Clamp<s64>(x1[1] - x0[1], -32768, 32767);
77
78 return std::array<s16, 2> {
79 static_cast<s16>(x0[0] + fraction * delta0 / scale_factor),
80 static_cast<s16>(x0[1] + fraction * delta1 / scale_factor)
81 };
82 });
83}
84
85} // namespace AudioInterp
diff --git a/src/audio_core/interpolate.h b/src/audio_core/interpolate.h
new file mode 100644
index 000000000..a4c0a453d
--- /dev/null
+++ b/src/audio_core/interpolate.h
@@ -0,0 +1,41 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <vector>
9
10#include "common/common_types.h"
11
12namespace AudioInterp {
13
14/// A variable length buffer of signed PCM16 stereo samples.
15using StereoBuffer16 = std::vector<std::array<s16, 2>>;
16
17struct State {
18 // Two historical samples.
19 std::array<s16, 2> xn1 = {}; ///< x[n-1]
20 std::array<s16, 2> xn2 = {}; ///< x[n-2]
21};
22
23/**
24 * No interpolation. This is equivalent to a zero-order hold. There is a two-sample predelay.
25 * @param input Input buffer.
26 * @param rate_multiplier Stretch factor. Must be a positive non-zero value.
27 * rate_multiplier > 1.0 performs decimation and rate_multipler < 1.0 performs upsampling.
28 * @return The resampled audio buffer.
29 */
30StereoBuffer16 None(State& state, const StereoBuffer16& input, float rate_multiplier);
31
32/**
33 * Linear interpolation. This is equivalent to a first-order hold. There is a two-sample predelay.
34 * @param input Input buffer.
35 * @param rate_multiplier Stretch factor. Must be a positive non-zero value.
36 * rate_multiplier > 1.0 performs decimation and rate_multipler < 1.0 performs upsampling.
37 * @return The resampled audio buffer.
38 */
39StereoBuffer16 Linear(State& state, const StereoBuffer16& input, float rate_multiplier);
40
41} // namespace AudioInterp
diff --git a/src/citra/CMakeLists.txt b/src/citra/CMakeLists.txt
index fa615deb9..43fa06b4e 100644
--- a/src/citra/CMakeLists.txt
+++ b/src/citra/CMakeLists.txt
@@ -21,7 +21,7 @@ target_link_libraries(citra ${SDL2_LIBRARY} ${OPENGL_gl_LIBRARY} inih glad)
21if (MSVC) 21if (MSVC)
22 target_link_libraries(citra getopt) 22 target_link_libraries(citra getopt)
23endif() 23endif()
24target_link_libraries(citra ${PLATFORM_LIBRARIES}) 24target_link_libraries(citra ${PLATFORM_LIBRARIES} Threads::Threads)
25 25
26if(${CMAKE_SYSTEM_NAME} MATCHES "Linux|FreeBSD|OpenBSD|NetBSD") 26if(${CMAKE_SYSTEM_NAME} MATCHES "Linux|FreeBSD|OpenBSD|NetBSD")
27 install(TARGETS citra RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/bin") 27 install(TARGETS citra RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/bin")
diff --git a/src/citra/config.cpp b/src/citra/config.cpp
index 6b6617352..9e2ecd307 100644
--- a/src/citra/config.cpp
+++ b/src/citra/config.cpp
@@ -65,6 +65,7 @@ void Config::ReadValues() {
65 // Renderer 65 // Renderer
66 Settings::values.use_hw_renderer = sdl2_config->GetBoolean("Renderer", "use_hw_renderer", false); 66 Settings::values.use_hw_renderer = sdl2_config->GetBoolean("Renderer", "use_hw_renderer", false);
67 Settings::values.use_shader_jit = sdl2_config->GetBoolean("Renderer", "use_shader_jit", true); 67 Settings::values.use_shader_jit = sdl2_config->GetBoolean("Renderer", "use_shader_jit", true);
68 Settings::values.use_scaled_resolution = sdl2_config->GetBoolean("Renderer", "use_scaled_resolution", false);
68 69
69 Settings::values.bg_red = (float)sdl2_config->GetReal("Renderer", "bg_red", 1.0); 70 Settings::values.bg_red = (float)sdl2_config->GetReal("Renderer", "bg_red", 1.0);
70 Settings::values.bg_green = (float)sdl2_config->GetReal("Renderer", "bg_green", 1.0); 71 Settings::values.bg_green = (float)sdl2_config->GetReal("Renderer", "bg_green", 1.0);
diff --git a/src/citra/default_ini.h b/src/citra/default_ini.h
index c9b490a00..1f1aa716b 100644
--- a/src/citra/default_ini.h
+++ b/src/citra/default_ini.h
@@ -46,6 +46,10 @@ use_hw_renderer =
46# 0 : Interpreter (slow), 1 (default): JIT (fast) 46# 0 : Interpreter (slow), 1 (default): JIT (fast)
47use_shader_jit = 47use_shader_jit =
48 48
49# Whether to use native 3DS screen resolution or to scale rendering resolution to the displayed screen size.
50# 0 (default): Native, 1: Scaled
51use_scaled_resolution =
52
49# The clear color for the renderer. What shows up on the sides of the bottom screen. 53# The clear color for the renderer. What shows up on the sides of the bottom screen.
50# Must be in range of 0.0-1.0. Defaults to 1.0 for all. 54# Must be in range of 0.0-1.0. Defaults to 1.0 for all.
51bg_red = 55bg_red =
diff --git a/src/citra_qt/CMakeLists.txt b/src/citra_qt/CMakeLists.txt
index 6660d9879..cc9e0c624 100644
--- a/src/citra_qt/CMakeLists.txt
+++ b/src/citra_qt/CMakeLists.txt
@@ -92,7 +92,7 @@ else()
92endif() 92endif()
93target_link_libraries(citra-qt core video_core audio_core common qhexedit) 93target_link_libraries(citra-qt core video_core audio_core common qhexedit)
94target_link_libraries(citra-qt ${OPENGL_gl_LIBRARY} ${CITRA_QT_LIBS}) 94target_link_libraries(citra-qt ${OPENGL_gl_LIBRARY} ${CITRA_QT_LIBS})
95target_link_libraries(citra-qt ${PLATFORM_LIBRARIES}) 95target_link_libraries(citra-qt ${PLATFORM_LIBRARIES} Threads::Threads)
96 96
97if(${CMAKE_SYSTEM_NAME} MATCHES "Linux|FreeBSD|OpenBSD|NetBSD") 97if(${CMAKE_SYSTEM_NAME} MATCHES "Linux|FreeBSD|OpenBSD|NetBSD")
98 install(TARGETS citra-qt RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/bin") 98 install(TARGETS citra-qt RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/bin")
diff --git a/src/citra_qt/bootmanager.cpp b/src/citra_qt/bootmanager.cpp
index 8e60b9cad..01b81c11c 100644
--- a/src/citra_qt/bootmanager.cpp
+++ b/src/citra_qt/bootmanager.cpp
@@ -71,7 +71,9 @@ void EmuThread::run() {
71 // Shutdown the core emulation 71 // Shutdown the core emulation
72 System::Shutdown(); 72 System::Shutdown();
73 73
74#if MICROPROFILE_ENABLED
74 MicroProfileOnThreadExit(); 75 MicroProfileOnThreadExit();
76#endif
75 77
76 render_window->moveContext(); 78 render_window->moveContext();
77} 79}
diff --git a/src/citra_qt/config.cpp b/src/citra_qt/config.cpp
index e363be38a..7dc61fe40 100644
--- a/src/citra_qt/config.cpp
+++ b/src/citra_qt/config.cpp
@@ -45,6 +45,7 @@ void Config::ReadValues() {
45 qt_config->beginGroup("Renderer"); 45 qt_config->beginGroup("Renderer");
46 Settings::values.use_hw_renderer = qt_config->value("use_hw_renderer", false).toBool(); 46 Settings::values.use_hw_renderer = qt_config->value("use_hw_renderer", false).toBool();
47 Settings::values.use_shader_jit = qt_config->value("use_shader_jit", true).toBool(); 47 Settings::values.use_shader_jit = qt_config->value("use_shader_jit", true).toBool();
48 Settings::values.use_scaled_resolution = qt_config->value("use_scaled_resolution", false).toBool();
48 49
49 Settings::values.bg_red = qt_config->value("bg_red", 1.0).toFloat(); 50 Settings::values.bg_red = qt_config->value("bg_red", 1.0).toFloat();
50 Settings::values.bg_green = qt_config->value("bg_green", 1.0).toFloat(); 51 Settings::values.bg_green = qt_config->value("bg_green", 1.0).toFloat();
@@ -129,6 +130,7 @@ void Config::SaveValues() {
129 qt_config->beginGroup("Renderer"); 130 qt_config->beginGroup("Renderer");
130 qt_config->setValue("use_hw_renderer", Settings::values.use_hw_renderer); 131 qt_config->setValue("use_hw_renderer", Settings::values.use_hw_renderer);
131 qt_config->setValue("use_shader_jit", Settings::values.use_shader_jit); 132 qt_config->setValue("use_shader_jit", Settings::values.use_shader_jit);
133 qt_config->setValue("use_scaled_resolution", Settings::values.use_scaled_resolution);
132 134
133 // Cast to double because Qt's written float values are not human-readable 135 // Cast to double because Qt's written float values are not human-readable
134 qt_config->setValue("bg_red", (double)Settings::values.bg_red); 136 qt_config->setValue("bg_red", (double)Settings::values.bg_red);
diff --git a/src/citra_qt/configure_general.cpp b/src/citra_qt/configure_general.cpp
index a27d0d26c..62648e665 100644
--- a/src/citra_qt/configure_general.cpp
+++ b/src/citra_qt/configure_general.cpp
@@ -25,6 +25,7 @@ void ConfigureGeneral::setConfiguration() {
25 ui->region_combobox->setCurrentIndex(Settings::values.region_value); 25 ui->region_combobox->setCurrentIndex(Settings::values.region_value);
26 ui->toogle_hw_renderer->setChecked(Settings::values.use_hw_renderer); 26 ui->toogle_hw_renderer->setChecked(Settings::values.use_hw_renderer);
27 ui->toogle_shader_jit->setChecked(Settings::values.use_shader_jit); 27 ui->toogle_shader_jit->setChecked(Settings::values.use_shader_jit);
28 ui->toogle_scaled_resolution->setChecked(Settings::values.use_scaled_resolution);
28} 29}
29 30
30void ConfigureGeneral::applyConfiguration() { 31void ConfigureGeneral::applyConfiguration() {
@@ -33,5 +34,6 @@ void ConfigureGeneral::applyConfiguration() {
33 Settings::values.region_value = ui->region_combobox->currentIndex(); 34 Settings::values.region_value = ui->region_combobox->currentIndex();
34 Settings::values.use_hw_renderer = ui->toogle_hw_renderer->isChecked(); 35 Settings::values.use_hw_renderer = ui->toogle_hw_renderer->isChecked();
35 Settings::values.use_shader_jit = ui->toogle_shader_jit->isChecked(); 36 Settings::values.use_shader_jit = ui->toogle_shader_jit->isChecked();
37 Settings::values.use_scaled_resolution = ui->toogle_scaled_resolution->isChecked();
36 Settings::Apply(); 38 Settings::Apply();
37} 39}
diff --git a/src/citra_qt/configure_general.ui b/src/citra_qt/configure_general.ui
index 47184c5c6..5eb309793 100644
--- a/src/citra_qt/configure_general.ui
+++ b/src/citra_qt/configure_general.ui
@@ -128,6 +128,13 @@
128 </property> 128 </property>
129 </widget> 129 </widget>
130 </item> 130 </item>
131 <item>
132 <widget class="QCheckBox" name="toogle_scaled_resolution">
133 <property name="text">
134 <string>Enable scaled resolution</string>
135 </property>
136 </widget>
137 </item>
131 </layout> 138 </layout>
132 </item> 139 </item>
133 </layout> 140 </layout>
diff --git a/src/citra_qt/debugger/graphics_breakpoints.cpp b/src/citra_qt/debugger/graphics_breakpoints.cpp
index 819ec7707..c8510128a 100644
--- a/src/citra_qt/debugger/graphics_breakpoints.cpp
+++ b/src/citra_qt/debugger/graphics_breakpoints.cpp
@@ -75,7 +75,7 @@ QVariant BreakPointModel::data(const QModelIndex& index, int role) const
75 case Role_IsEnabled: 75 case Role_IsEnabled:
76 { 76 {
77 auto context = context_weak.lock(); 77 auto context = context_weak.lock();
78 return context && context->breakpoints[event].enabled; 78 return context && context->breakpoints[(int)event].enabled;
79 } 79 }
80 80
81 default: 81 default:
@@ -110,7 +110,7 @@ bool BreakPointModel::setData(const QModelIndex& index, const QVariant& value, i
110 if (!context) 110 if (!context)
111 return false; 111 return false;
112 112
113 context->breakpoints[event].enabled = value == Qt::Checked; 113 context->breakpoints[(int)event].enabled = value == Qt::Checked;
114 QModelIndex changed_index = createIndex(index.row(), 0); 114 QModelIndex changed_index = createIndex(index.row(), 0);
115 emit dataChanged(changed_index, changed_index); 115 emit dataChanged(changed_index, changed_index);
116 return true; 116 return true;
diff --git a/src/citra_qt/debugger/graphics_framebuffer.cpp b/src/citra_qt/debugger/graphics_framebuffer.cpp
index c30e75933..68cff78b2 100644
--- a/src/citra_qt/debugger/graphics_framebuffer.cpp
+++ b/src/citra_qt/debugger/graphics_framebuffer.cpp
@@ -346,5 +346,11 @@ u32 GraphicsFramebufferWidget::BytesPerPixel(GraphicsFramebufferWidget::Format f
346 case Format::RGBA4: 346 case Format::RGBA4:
347 case Format::D16: 347 case Format::D16:
348 return 2; 348 return 2;
349 default:
350 UNREACHABLE_MSG("GraphicsFramebufferWidget::BytesPerPixel: this "
351 "should not be reached as this function should "
352 "be given a format which is in "
353 "GraphicsFramebufferWidget::Format. Instead got %i",
354 static_cast<int>(format));
349 } 355 }
350} 356}
diff --git a/src/citra_qt/debugger/profiler.cpp b/src/citra_qt/debugger/profiler.cpp
index 4f6ba0e1f..7bb010f77 100644
--- a/src/citra_qt/debugger/profiler.cpp
+++ b/src/citra_qt/debugger/profiler.cpp
@@ -9,13 +9,16 @@
9#include "citra_qt/debugger/profiler.h" 9#include "citra_qt/debugger/profiler.h"
10#include "citra_qt/util/util.h" 10#include "citra_qt/util/util.h"
11 11
12#include "common/common_types.h"
12#include "common/microprofile.h" 13#include "common/microprofile.h"
13#include "common/profiler_reporting.h" 14#include "common/profiler_reporting.h"
14 15
15// Include the implementation of the UI in this file. This isn't in microprofile.cpp because the 16// Include the implementation of the UI in this file. This isn't in microprofile.cpp because the
16// non-Qt frontends don't need it (and don't implement the UI drawing hooks either). 17// non-Qt frontends don't need it (and don't implement the UI drawing hooks either).
18#if MICROPROFILE_ENABLED
17#define MICROPROFILEUI_IMPL 1 19#define MICROPROFILEUI_IMPL 1
18#include "common/microprofileui.h" 20#include "common/microprofileui.h"
21#endif
19 22
20using namespace Common::Profiling; 23using namespace Common::Profiling;
21 24
@@ -34,21 +37,9 @@ static QVariant GetDataForColumn(int col, const AggregatedDuration& duration)
34 } 37 }
35} 38}
36 39
37static const TimingCategoryInfo* GetCategoryInfo(int id)
38{
39 const auto& categories = GetProfilingManager().GetTimingCategoriesInfo();
40 if ((size_t)id >= categories.size()) {
41 return nullptr;
42 } else {
43 return &categories[id];
44 }
45}
46
47ProfilerModel::ProfilerModel(QObject* parent) : QAbstractItemModel(parent) 40ProfilerModel::ProfilerModel(QObject* parent) : QAbstractItemModel(parent)
48{ 41{
49 updateProfilingInfo(); 42 updateProfilingInfo();
50 const auto& categories = GetProfilingManager().GetTimingCategoriesInfo();
51 results.time_per_category.resize(categories.size());
52} 43}
53 44
54QVariant ProfilerModel::headerData(int section, Qt::Orientation orientation, int role) const 45QVariant ProfilerModel::headerData(int section, Qt::Orientation orientation, int role) const
@@ -85,7 +76,7 @@ int ProfilerModel::rowCount(const QModelIndex& parent) const
85 if (parent.isValid()) { 76 if (parent.isValid()) {
86 return 0; 77 return 0;
87 } else { 78 } else {
88 return static_cast<int>(results.time_per_category.size() + 2); 79 return 2;
89 } 80 }
90} 81}
91 82
@@ -104,17 +95,6 @@ QVariant ProfilerModel::data(const QModelIndex& index, int role) const
104 } else { 95 } else {
105 return GetDataForColumn(index.column(), results.interframe_time); 96 return GetDataForColumn(index.column(), results.interframe_time);
106 } 97 }
107 } else {
108 if (index.column() == 0) {
109 const TimingCategoryInfo* info = GetCategoryInfo(index.row() - 2);
110 return info != nullptr ? QString(info->name) : QVariant();
111 } else {
112 if (index.row() - 2 < (int)results.time_per_category.size()) {
113 return GetDataForColumn(index.column(), results.time_per_category[index.row() - 2]);
114 } else {
115 return QVariant();
116 }
117 }
118 } 98 }
119 } 99 }
120 100
@@ -148,6 +128,8 @@ void ProfilerWidget::setProfilingInfoUpdateEnabled(bool enable)
148 } 128 }
149} 129}
150 130
131#if MICROPROFILE_ENABLED
132
151class MicroProfileWidget : public QWidget { 133class MicroProfileWidget : public QWidget {
152public: 134public:
153 MicroProfileWidget(QWidget* parent = nullptr); 135 MicroProfileWidget(QWidget* parent = nullptr);
@@ -171,6 +153,8 @@ private:
171 QTimer update_timer; 153 QTimer update_timer;
172}; 154};
173 155
156#endif
157
174MicroProfileDialog::MicroProfileDialog(QWidget* parent) 158MicroProfileDialog::MicroProfileDialog(QWidget* parent)
175 : QWidget(parent, Qt::Dialog) 159 : QWidget(parent, Qt::Dialog)
176{ 160{
@@ -180,6 +164,8 @@ MicroProfileDialog::MicroProfileDialog(QWidget* parent)
180 // Remove the "?" button from the titlebar and enable the maximize button 164 // Remove the "?" button from the titlebar and enable the maximize button
181 setWindowFlags(windowFlags() & ~Qt::WindowContextHelpButtonHint | Qt::WindowMaximizeButtonHint); 165 setWindowFlags(windowFlags() & ~Qt::WindowContextHelpButtonHint | Qt::WindowMaximizeButtonHint);
182 166
167#if MICROPROFILE_ENABLED
168
183 MicroProfileWidget* widget = new MicroProfileWidget(this); 169 MicroProfileWidget* widget = new MicroProfileWidget(this);
184 170
185 QLayout* layout = new QVBoxLayout(this); 171 QLayout* layout = new QVBoxLayout(this);
@@ -191,6 +177,7 @@ MicroProfileDialog::MicroProfileDialog(QWidget* parent)
191 setFocusProxy(widget); 177 setFocusProxy(widget);
192 widget->setFocusPolicy(Qt::StrongFocus); 178 widget->setFocusPolicy(Qt::StrongFocus);
193 widget->setFocus(); 179 widget->setFocus();
180#endif
194} 181}
195 182
196QAction* MicroProfileDialog::toggleViewAction() { 183QAction* MicroProfileDialog::toggleViewAction() {
@@ -218,6 +205,9 @@ void MicroProfileDialog::hideEvent(QHideEvent* ev) {
218 QWidget::hideEvent(ev); 205 QWidget::hideEvent(ev);
219} 206}
220 207
208
209#if MICROPROFILE_ENABLED
210
221/// There's no way to pass a user pointer to MicroProfile, so this variable is used to make the 211/// There's no way to pass a user pointer to MicroProfile, so this variable is used to make the
222/// QPainter available inside the drawing callbacks. 212/// QPainter available inside the drawing callbacks.
223static QPainter* mp_painter = nullptr; 213static QPainter* mp_painter = nullptr;
@@ -337,3 +327,4 @@ void MicroProfileDrawLine2D(u32 vertices_length, float* vertices, u32 hex_color)
337 mp_painter->drawPolyline(point_buf.data(), vertices_length); 327 mp_painter->drawPolyline(point_buf.data(), vertices_length);
338 point_buf.clear(); 328 point_buf.clear();
339} 329}
330#endif
diff --git a/src/citra_qt/debugger/profiler.h b/src/citra_qt/debugger/profiler.h
index 036054740..3b38ed8ec 100644
--- a/src/citra_qt/debugger/profiler.h
+++ b/src/citra_qt/debugger/profiler.h
@@ -7,8 +7,10 @@
7#include <QAbstractItemModel> 7#include <QAbstractItemModel>
8#include <QDockWidget> 8#include <QDockWidget>
9#include <QTimer> 9#include <QTimer>
10
10#include "ui_profiler.h" 11#include "ui_profiler.h"
11 12
13#include "common/microprofile.h"
12#include "common/profiler_reporting.h" 14#include "common/profiler_reporting.h"
13 15
14class ProfilerModel : public QAbstractItemModel 16class ProfilerModel : public QAbstractItemModel
@@ -49,6 +51,7 @@ private:
49 QTimer update_timer; 51 QTimer update_timer;
50}; 52};
51 53
54
52class MicroProfileDialog : public QWidget { 55class MicroProfileDialog : public QWidget {
53 Q_OBJECT 56 Q_OBJECT
54 57
diff --git a/src/citra_qt/main.cpp b/src/citra_qt/main.cpp
index 2ca1e51f6..f1ab29755 100644
--- a/src/citra_qt/main.cpp
+++ b/src/citra_qt/main.cpp
@@ -69,8 +69,10 @@ GMainWindow::GMainWindow() : config(new Config()), emu_thread(nullptr)
69 addDockWidget(Qt::BottomDockWidgetArea, profilerWidget); 69 addDockWidget(Qt::BottomDockWidgetArea, profilerWidget);
70 profilerWidget->hide(); 70 profilerWidget->hide();
71 71
72#if MICROPROFILE_ENABLED
72 microProfileDialog = new MicroProfileDialog(this); 73 microProfileDialog = new MicroProfileDialog(this);
73 microProfileDialog->hide(); 74 microProfileDialog->hide();
75#endif
74 76
75 disasmWidget = new DisassemblerWidget(this, emu_thread.get()); 77 disasmWidget = new DisassemblerWidget(this, emu_thread.get());
76 addDockWidget(Qt::BottomDockWidgetArea, disasmWidget); 78 addDockWidget(Qt::BottomDockWidgetArea, disasmWidget);
@@ -110,7 +112,9 @@ GMainWindow::GMainWindow() : config(new Config()), emu_thread(nullptr)
110 112
111 QMenu* debug_menu = ui.menu_View->addMenu(tr("Debugging")); 113 QMenu* debug_menu = ui.menu_View->addMenu(tr("Debugging"));
112 debug_menu->addAction(profilerWidget->toggleViewAction()); 114 debug_menu->addAction(profilerWidget->toggleViewAction());
115#if MICROPROFILE_ENABLED
113 debug_menu->addAction(microProfileDialog->toggleViewAction()); 116 debug_menu->addAction(microProfileDialog->toggleViewAction());
117#endif
114 debug_menu->addAction(disasmWidget->toggleViewAction()); 118 debug_menu->addAction(disasmWidget->toggleViewAction());
115 debug_menu->addAction(registersWidget->toggleViewAction()); 119 debug_menu->addAction(registersWidget->toggleViewAction());
116 debug_menu->addAction(callstackWidget->toggleViewAction()); 120 debug_menu->addAction(callstackWidget->toggleViewAction());
@@ -136,8 +140,10 @@ GMainWindow::GMainWindow() : config(new Config()), emu_thread(nullptr)
136 restoreGeometry(UISettings::values.geometry); 140 restoreGeometry(UISettings::values.geometry);
137 restoreState(UISettings::values.state); 141 restoreState(UISettings::values.state);
138 render_window->restoreGeometry(UISettings::values.renderwindow_geometry); 142 render_window->restoreGeometry(UISettings::values.renderwindow_geometry);
143#if MICROPROFILE_ENABLED
139 microProfileDialog->restoreGeometry(UISettings::values.microprofile_geometry); 144 microProfileDialog->restoreGeometry(UISettings::values.microprofile_geometry);
140 microProfileDialog->setVisible(UISettings::values.microprofile_visible); 145 microProfileDialog->setVisible(UISettings::values.microprofile_visible);
146#endif
141 147
142 game_list->LoadInterfaceLayout(); 148 game_list->LoadInterfaceLayout();
143 149
@@ -511,9 +517,10 @@ void GMainWindow::closeEvent(QCloseEvent* event) {
511 UISettings::values.geometry = saveGeometry(); 517 UISettings::values.geometry = saveGeometry();
512 UISettings::values.state = saveState(); 518 UISettings::values.state = saveState();
513 UISettings::values.renderwindow_geometry = render_window->saveGeometry(); 519 UISettings::values.renderwindow_geometry = render_window->saveGeometry();
520#if MICROPROFILE_ENABLED
514 UISettings::values.microprofile_geometry = microProfileDialog->saveGeometry(); 521 UISettings::values.microprofile_geometry = microProfileDialog->saveGeometry();
515 UISettings::values.microprofile_visible = microProfileDialog->isVisible(); 522 UISettings::values.microprofile_visible = microProfileDialog->isVisible();
516 523#endif
517 UISettings::values.single_window_mode = ui.action_Single_Window_Mode->isChecked(); 524 UISettings::values.single_window_mode = ui.action_Single_Window_Mode->isChecked();
518 UISettings::values.display_titlebar = ui.actionDisplay_widget_title_bars->isChecked(); 525 UISettings::values.display_titlebar = ui.actionDisplay_widget_title_bars->isChecked();
519 UISettings::values.first_start = false; 526 UISettings::values.first_start = false;
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index c839ce173..aa6eee2a3 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -47,7 +47,6 @@ set(HEADERS
47 microprofile.h 47 microprofile.h
48 microprofileui.h 48 microprofileui.h
49 platform.h 49 platform.h
50 profiler.h
51 profiler_reporting.h 50 profiler_reporting.h
52 scm_rev.h 51 scm_rev.h
53 scope_exit.h 52 scope_exit.h
diff --git a/src/common/assert.h b/src/common/assert.h
index 6849778b7..cd9b819a9 100644
--- a/src/common/assert.h
+++ b/src/common/assert.h
@@ -39,6 +39,7 @@ static void assert_noinline_call(const Fn& fn) {
39 }); } while (0) 39 }); } while (0)
40 40
41#define UNREACHABLE() ASSERT_MSG(false, "Unreachable code!") 41#define UNREACHABLE() ASSERT_MSG(false, "Unreachable code!")
42#define UNREACHABLE_MSG(...) ASSERT_MSG(false, __VA_ARGS__)
42 43
43#ifdef _DEBUG 44#ifdef _DEBUG
44#define DEBUG_ASSERT(_a_) ASSERT(_a_) 45#define DEBUG_ASSERT(_a_) ASSERT(_a_)
@@ -49,3 +50,4 @@ static void assert_noinline_call(const Fn& fn) {
49#endif 50#endif
50 51
51#define UNIMPLEMENTED() DEBUG_ASSERT_MSG(false, "Unimplemented code!") 52#define UNIMPLEMENTED() DEBUG_ASSERT_MSG(false, "Unimplemented code!")
53#define UNIMPLEMENTED_MSG(_a_, ...) ASSERT_MSG(false, _a_, __VA_ARGS__) \ No newline at end of file
diff --git a/src/common/file_util.h b/src/common/file_util.h
index b54a9fb72..3aac4fa46 100644
--- a/src/common/file_util.h
+++ b/src/common/file_util.h
@@ -192,7 +192,9 @@ public:
192 size_t ReadArray(T* data, size_t length) 192 size_t ReadArray(T* data, size_t length)
193 { 193 {
194 static_assert(std::is_standard_layout<T>(), "Given array does not consist of standard layout objects"); 194 static_assert(std::is_standard_layout<T>(), "Given array does not consist of standard layout objects");
195#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER)
195 static_assert(std::is_trivially_copyable<T>(), "Given array does not consist of trivially copyable objects"); 196 static_assert(std::is_trivially_copyable<T>(), "Given array does not consist of trivially copyable objects");
197#endif
196 198
197 if (!IsOpen()) { 199 if (!IsOpen()) {
198 m_good = false; 200 m_good = false;
@@ -210,7 +212,9 @@ public:
210 size_t WriteArray(const T* data, size_t length) 212 size_t WriteArray(const T* data, size_t length)
211 { 213 {
212 static_assert(std::is_standard_layout<T>(), "Given array does not consist of standard layout objects"); 214 static_assert(std::is_standard_layout<T>(), "Given array does not consist of standard layout objects");
215#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER)
213 static_assert(std::is_trivially_copyable<T>(), "Given array does not consist of trivially copyable objects"); 216 static_assert(std::is_trivially_copyable<T>(), "Given array does not consist of trivially copyable objects");
217#endif
214 218
215 if (!IsOpen()) { 219 if (!IsOpen()) {
216 m_good = false; 220 m_good = false;
diff --git a/src/common/microprofile.h b/src/common/microprofile.h
index d3b6cb97c..ef312c6e1 100644
--- a/src/common/microprofile.h
+++ b/src/common/microprofile.h
@@ -4,6 +4,10 @@
4 4
5#pragma once 5#pragma once
6 6
7// Uncomment this to disable microprofile. This will get you cleaner profiles when using
8// external sampling profilers like "Very Sleepy", and will improve performance somewhat.
9// #define MICROPROFILE_ENABLED 0
10
7// Customized Citra settings. 11// Customized Citra settings.
8// This file wraps the MicroProfile header so that these are consistent everywhere. 12// This file wraps the MicroProfile header so that these are consistent everywhere.
9#define MICROPROFILE_WEBSERVER 0 13#define MICROPROFILE_WEBSERVER 0
diff --git a/src/common/microprofileui.h b/src/common/microprofileui.h
index 97c369bd9..41abe6b75 100644
--- a/src/common/microprofileui.h
+++ b/src/common/microprofileui.h
@@ -13,4 +13,7 @@
13#define MICROPROFILE_HELP_ALT "Right-Click" 13#define MICROPROFILE_HELP_ALT "Right-Click"
14#define MICROPROFILE_HELP_MOD "Ctrl" 14#define MICROPROFILE_HELP_MOD "Ctrl"
15 15
16// This isn't included by microprofileui.h :(
17#include <cstdlib> // For std::abs
18
16#include <microprofileui.h> 19#include <microprofileui.h>
diff --git a/src/common/profiler.cpp b/src/common/profiler.cpp
index 7792edd2f..49eb3f40c 100644
--- a/src/common/profiler.cpp
+++ b/src/common/profiler.cpp
@@ -7,71 +7,16 @@
7#include <vector> 7#include <vector>
8 8
9#include "common/assert.h" 9#include "common/assert.h"
10#include "common/profiler.h"
11#include "common/profiler_reporting.h" 10#include "common/profiler_reporting.h"
12#include "common/synchronized_wrapper.h" 11#include "common/synchronized_wrapper.h"
13 12
14#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013.
15 #define WIN32_LEAN_AND_MEAN
16 #include <Windows.h> // For QueryPerformanceCounter/Frequency
17#endif
18
19namespace Common { 13namespace Common {
20namespace Profiling { 14namespace Profiling {
21 15
22#if ENABLE_PROFILING
23thread_local Timer* Timer::current_timer = nullptr;
24#endif
25
26#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013
27QPCClock::time_point QPCClock::now() {
28 static LARGE_INTEGER freq;
29 // Use this dummy local static to ensure this gets initialized once.
30 static BOOL dummy = QueryPerformanceFrequency(&freq);
31
32 LARGE_INTEGER ticks;
33 QueryPerformanceCounter(&ticks);
34
35 // This is prone to overflow when multiplying, which is why I'm using micro instead of nano. The
36 // correct way to approach this would be to just return ticks as a time_point and then subtract
37 // and do this conversion when creating a duration from two time_points, however, as far as I
38 // could tell the C++ requirements for these types are incompatible with this approach.
39 return time_point(duration(ticks.QuadPart * std::micro::den / freq.QuadPart));
40}
41#endif
42
43TimingCategory::TimingCategory(const char* name, TimingCategory* parent)
44 : accumulated_duration(0) {
45
46 ProfilingManager& manager = GetProfilingManager();
47 category_id = manager.RegisterTimingCategory(this, name);
48 if (parent != nullptr)
49 manager.SetTimingCategoryParent(category_id, parent->category_id);
50}
51
52ProfilingManager::ProfilingManager() 16ProfilingManager::ProfilingManager()
53 : last_frame_end(Clock::now()), this_frame_start(Clock::now()) { 17 : last_frame_end(Clock::now()), this_frame_start(Clock::now()) {
54} 18}
55 19
56unsigned int ProfilingManager::RegisterTimingCategory(TimingCategory* category, const char* name) {
57 TimingCategoryInfo info;
58 info.category = category;
59 info.name = name;
60 info.parent = TimingCategoryInfo::NO_PARENT;
61
62 unsigned int id = (unsigned int)timing_categories.size();
63 timing_categories.push_back(std::move(info));
64
65 return id;
66}
67
68void ProfilingManager::SetTimingCategoryParent(unsigned int category, unsigned int parent) {
69 ASSERT(category < timing_categories.size());
70 ASSERT(parent < timing_categories.size());
71
72 timing_categories[category].parent = parent;
73}
74
75void ProfilingManager::BeginFrame() { 20void ProfilingManager::BeginFrame() {
76 this_frame_start = Clock::now(); 21 this_frame_start = Clock::now();
77} 22}
@@ -82,11 +27,6 @@ void ProfilingManager::FinishFrame() {
82 results.interframe_time = now - last_frame_end; 27 results.interframe_time = now - last_frame_end;
83 results.frame_time = now - this_frame_start; 28 results.frame_time = now - this_frame_start;
84 29
85 results.time_per_category.resize(timing_categories.size());
86 for (size_t i = 0; i < timing_categories.size(); ++i) {
87 results.time_per_category[i] = timing_categories[i].category->GetAccumulatedTime();
88 }
89
90 last_frame_end = now; 30 last_frame_end = now;
91} 31}
92 32
@@ -100,26 +40,9 @@ void TimingResultsAggregator::Clear() {
100 window_size = cursor = 0; 40 window_size = cursor = 0;
101} 41}
102 42
103void TimingResultsAggregator::SetNumberOfCategories(size_t n) {
104 size_t old_size = times_per_category.size();
105 if (n == old_size)
106 return;
107
108 times_per_category.resize(n);
109
110 for (size_t i = old_size; i < n; ++i) {
111 times_per_category[i].resize(max_window_size, Duration::zero());
112 }
113}
114
115void TimingResultsAggregator::AddFrame(const ProfilingFrameResult& frame_result) { 43void TimingResultsAggregator::AddFrame(const ProfilingFrameResult& frame_result) {
116 SetNumberOfCategories(frame_result.time_per_category.size());
117
118 interframe_times[cursor] = frame_result.interframe_time; 44 interframe_times[cursor] = frame_result.interframe_time;
119 frame_times[cursor] = frame_result.frame_time; 45 frame_times[cursor] = frame_result.frame_time;
120 for (size_t i = 0; i < frame_result.time_per_category.size(); ++i) {
121 times_per_category[i][cursor] = frame_result.time_per_category[i];
122 }
123 46
124 ++cursor; 47 ++cursor;
125 if (cursor == max_window_size) 48 if (cursor == max_window_size)
@@ -162,11 +85,6 @@ AggregatedFrameResult TimingResultsAggregator::GetAggregatedResults() const {
162 result.fps = 0.0f; 85 result.fps = 0.0f;
163 } 86 }
164 87
165 result.time_per_category.resize(times_per_category.size());
166 for (size_t i = 0; i < times_per_category.size(); ++i) {
167 result.time_per_category[i] = AggregateField(times_per_category[i], window_size);
168 }
169
170 return result; 88 return result;
171} 89}
172 90
diff --git a/src/common/profiler.h b/src/common/profiler.h
deleted file mode 100644
index 3e967b4bc..000000000
--- a/src/common/profiler.h
+++ /dev/null
@@ -1,152 +0,0 @@
1// Copyright 2015 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <atomic>
8#include <chrono>
9
10#include "common/assert.h"
11#include "common/thread.h"
12
13namespace Common {
14namespace Profiling {
15
16// If this is defined to 0, it turns all Timers into no-ops.
17#ifndef ENABLE_PROFILING
18#define ENABLE_PROFILING 1
19#endif
20
21#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013
22// MSVC up to 2013 doesn't use QueryPerformanceCounter for high_resolution_clock, so it has bad
23// precision. We manually implement a clock based on QPC to get good results.
24
25struct QPCClock {
26 using duration = std::chrono::microseconds;
27 using time_point = std::chrono::time_point<QPCClock>;
28 using rep = duration::rep;
29 using period = duration::period;
30 static const bool is_steady = false;
31
32 static time_point now();
33};
34
35using Clock = QPCClock;
36#else
37using Clock = std::chrono::high_resolution_clock;
38#endif
39
40using Duration = Clock::duration;
41
42/**
43 * Represents a timing category that measured time can be accounted towards. Should be declared as a
44 * global variable and passed to Timers.
45 */
46class TimingCategory final {
47public:
48 TimingCategory(const char* name, TimingCategory* parent = nullptr);
49
50 unsigned int GetCategoryId() const {
51 return category_id;
52 }
53
54 /// Adds some time to this category. Can safely be called from multiple threads at the same time.
55 void AddTime(Duration amount) {
56 std::atomic_fetch_add_explicit(
57 &accumulated_duration, amount.count(),
58 std::memory_order_relaxed);
59 }
60
61 /**
62 * Atomically retrieves the accumulated measured time for this category and resets the counter
63 * to zero. Can be safely called concurrently with AddTime.
64 */
65 Duration GetAccumulatedTime() {
66 return Duration(std::atomic_exchange_explicit(
67 &accumulated_duration, (Duration::rep)0,
68 std::memory_order_relaxed));
69 }
70
71private:
72 unsigned int category_id;
73 std::atomic<Duration::rep> accumulated_duration;
74};
75
76/**
77 * Measures time elapsed between a call to Start and a call to Stop and attributes it to the given
78 * TimingCategory. Start/Stop can be called multiple times on the same timer, but each call must be
79 * appropriately paired.
80 *
81 * When a Timer is started, it automatically pauses a previously running timer on the same thread,
82 * which is resumed when it is stopped. As such, no special action needs to be taken to avoid
83 * double-accounting of time on two categories.
84 */
85class Timer {
86public:
87 Timer(TimingCategory& category) : category(category) {
88 }
89
90 void Start() {
91#if ENABLE_PROFILING
92 ASSERT(!running);
93 previous_timer = current_timer;
94 current_timer = this;
95 if (previous_timer != nullptr)
96 previous_timer->StopTiming();
97
98 StartTiming();
99#endif
100 }
101
102 void Stop() {
103#if ENABLE_PROFILING
104 ASSERT(running);
105 StopTiming();
106
107 if (previous_timer != nullptr)
108 previous_timer->StartTiming();
109 current_timer = previous_timer;
110#endif
111 }
112
113private:
114#if ENABLE_PROFILING
115 void StartTiming() {
116 start = Clock::now();
117 running = true;
118 }
119
120 void StopTiming() {
121 auto duration = Clock::now() - start;
122 running = false;
123 category.AddTime(std::chrono::duration_cast<Duration>(duration));
124 }
125
126 Clock::time_point start;
127 bool running = false;
128
129 Timer* previous_timer;
130 static thread_local Timer* current_timer;
131#endif
132
133 TimingCategory& category;
134};
135
136/**
137 * A Timer that automatically starts timing when created and stops at the end of the scope. Should
138 * be used in the majority of cases.
139 */
140class ScopeTimer : public Timer {
141public:
142 ScopeTimer(TimingCategory& category) : Timer(category) {
143 Start();
144 }
145
146 ~ScopeTimer() {
147 Stop();
148 }
149};
150
151} // namespace Profiling
152} // namespace Common
diff --git a/src/common/profiler_reporting.h b/src/common/profiler_reporting.h
index df98e05b7..fa1ac883f 100644
--- a/src/common/profiler_reporting.h
+++ b/src/common/profiler_reporting.h
@@ -4,22 +4,17 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <chrono>
7#include <cstddef> 8#include <cstddef>
8#include <vector> 9#include <vector>
9 10
10#include "common/profiler.h"
11#include "common/synchronized_wrapper.h" 11#include "common/synchronized_wrapper.h"
12 12
13namespace Common { 13namespace Common {
14namespace Profiling { 14namespace Profiling {
15 15
16struct TimingCategoryInfo { 16using Clock = std::chrono::high_resolution_clock;
17 static const unsigned int NO_PARENT = -1; 17using Duration = Clock::duration;
18
19 TimingCategory* category;
20 const char* name;
21 unsigned int parent;
22};
23 18
24struct ProfilingFrameResult { 19struct ProfilingFrameResult {
25 /// Time since the last delivered frame 20 /// Time since the last delivered frame
@@ -27,22 +22,12 @@ struct ProfilingFrameResult {
27 22
28 /// Time spent processing a frame, excluding VSync 23 /// Time spent processing a frame, excluding VSync
29 Duration frame_time; 24 Duration frame_time;
30
31 /// Total amount of time spent inside each category in this frame. Indexed by the category id
32 std::vector<Duration> time_per_category;
33}; 25};
34 26
35class ProfilingManager final { 27class ProfilingManager final {
36public: 28public:
37 ProfilingManager(); 29 ProfilingManager();
38 30
39 unsigned int RegisterTimingCategory(TimingCategory* category, const char* name);
40 void SetTimingCategoryParent(unsigned int category, unsigned int parent);
41
42 const std::vector<TimingCategoryInfo>& GetTimingCategoriesInfo() const {
43 return timing_categories;
44 }
45
46 /// This should be called after swapping screen buffers. 31 /// This should be called after swapping screen buffers.
47 void BeginFrame(); 32 void BeginFrame();
48 /// This should be called before swapping screen buffers. 33 /// This should be called before swapping screen buffers.
@@ -54,7 +39,6 @@ public:
54 } 39 }
55 40
56private: 41private:
57 std::vector<TimingCategoryInfo> timing_categories;
58 Clock::time_point last_frame_end; 42 Clock::time_point last_frame_end;
59 Clock::time_point this_frame_start; 43 Clock::time_point this_frame_start;
60 44
@@ -73,9 +57,6 @@ struct AggregatedFrameResult {
73 AggregatedDuration frame_time; 57 AggregatedDuration frame_time;
74 58
75 float fps; 59 float fps;
76
77 /// Total amount of time spent inside each category in this frame. Indexed by the category id
78 std::vector<AggregatedDuration> time_per_category;
79}; 60};
80 61
81class TimingResultsAggregator final { 62class TimingResultsAggregator final {
@@ -83,7 +64,6 @@ public:
83 TimingResultsAggregator(size_t window_size); 64 TimingResultsAggregator(size_t window_size);
84 65
85 void Clear(); 66 void Clear();
86 void SetNumberOfCategories(size_t n);
87 67
88 void AddFrame(const ProfilingFrameResult& frame_result); 68 void AddFrame(const ProfilingFrameResult& frame_result);
89 69
@@ -95,7 +75,6 @@ public:
95 75
96 std::vector<Duration> interframe_times; 76 std::vector<Duration> interframe_times;
97 std::vector<Duration> frame_times; 77 std::vector<Duration> frame_times;
98 std::vector<std::vector<Duration>> times_per_category;
99}; 78};
100 79
101ProfilingManager& GetProfilingManager(); 80ProfilingManager& GetProfilingManager();
diff --git a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
index 647784208..8d4b26815 100644
--- a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
+++ b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
@@ -10,7 +10,6 @@
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "common/logging/log.h" 11#include "common/logging/log.h"
12#include "common/microprofile.h" 12#include "common/microprofile.h"
13#include "common/profiler.h"
14 13
15#include "core/memory.h" 14#include "core/memory.h"
16#include "core/hle/svc.h" 15#include "core/hle/svc.h"
@@ -25,9 +24,6 @@
25 24
26#include "core/gdbstub/gdbstub.h" 25#include "core/gdbstub/gdbstub.h"
27 26
28Common::Profiling::TimingCategory profile_execute("DynCom::Execute");
29Common::Profiling::TimingCategory profile_decode("DynCom::Decode");
30
31enum { 27enum {
32 COND = (1 << 0), 28 COND = (1 << 0),
33 NON_BRANCH = (1 << 1), 29 NON_BRANCH = (1 << 1),
@@ -3496,7 +3492,6 @@ static unsigned int InterpreterTranslateInstruction(const ARMul_State* cpu, cons
3496} 3492}
3497 3493
3498static int InterpreterTranslateBlock(ARMul_State* cpu, int& bb_start, u32 addr) { 3494static int InterpreterTranslateBlock(ARMul_State* cpu, int& bb_start, u32 addr) {
3499 Common::Profiling::ScopeTimer timer_decode(profile_decode);
3500 MICROPROFILE_SCOPE(DynCom_Decode); 3495 MICROPROFILE_SCOPE(DynCom_Decode);
3501 3496
3502 // Decode instruction, get index 3497 // Decode instruction, get index
@@ -3530,7 +3525,6 @@ static int InterpreterTranslateBlock(ARMul_State* cpu, int& bb_start, u32 addr)
3530} 3525}
3531 3526
3532static int InterpreterTranslateSingle(ARMul_State* cpu, int& bb_start, u32 addr) { 3527static int InterpreterTranslateSingle(ARMul_State* cpu, int& bb_start, u32 addr) {
3533 Common::Profiling::ScopeTimer timer_decode(profile_decode);
3534 MICROPROFILE_SCOPE(DynCom_Decode); 3528 MICROPROFILE_SCOPE(DynCom_Decode);
3535 3529
3536 ARM_INST_PTR inst_base = nullptr; 3530 ARM_INST_PTR inst_base = nullptr;
@@ -3565,7 +3559,6 @@ static int clz(unsigned int x) {
3565MICROPROFILE_DEFINE(DynCom_Execute, "DynCom", "Execute", MP_RGB(255, 0, 0)); 3559MICROPROFILE_DEFINE(DynCom_Execute, "DynCom", "Execute", MP_RGB(255, 0, 0));
3566 3560
3567unsigned InterpreterMainLoop(ARMul_State* cpu) { 3561unsigned InterpreterMainLoop(ARMul_State* cpu) {
3568 Common::Profiling::ScopeTimer timer_execute(profile_execute);
3569 MICROPROFILE_SCOPE(DynCom_Execute); 3562 MICROPROFILE_SCOPE(DynCom_Execute);
3570 3563
3571 GDBStub::BreakpointAddress breakpoint_data; 3564 GDBStub::BreakpointAddress breakpoint_data;
diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp
index c1a7ec5bf..ae0c116ef 100644
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -529,7 +529,7 @@ static void ReadRegister() {
529 id |= HexCharToValue(command_buffer[2]); 529 id |= HexCharToValue(command_buffer[2]);
530 } 530 }
531 531
532 if (id >= R0_REGISTER && id <= R15_REGISTER) { 532 if (id <= R15_REGISTER) {
533 IntToGdbHex(reply, Core::g_app_core->GetReg(id)); 533 IntToGdbHex(reply, Core::g_app_core->GetReg(id));
534 } else if (id == CPSR_REGISTER) { 534 } else if (id == CPSR_REGISTER) {
535 IntToGdbHex(reply, Core::g_app_core->GetCPSR()); 535 IntToGdbHex(reply, Core::g_app_core->GetCPSR());
@@ -584,7 +584,7 @@ static void WriteRegister() {
584 id |= HexCharToValue(command_buffer[2]); 584 id |= HexCharToValue(command_buffer[2]);
585 } 585 }
586 586
587 if (id >= R0_REGISTER && id <= R15_REGISTER) { 587 if (id <= R15_REGISTER) {
588 Core::g_app_core->SetReg(id, GdbHexToInt(buffer_ptr)); 588 Core::g_app_core->SetReg(id, GdbHexToInt(buffer_ptr));
589 } else if (id == CPSR_REGISTER) { 589 } else if (id == CPSR_REGISTER) {
590 Core::g_app_core->SetCPSR(GdbHexToInt(buffer_ptr)); 590 Core::g_app_core->SetCPSR(GdbHexToInt(buffer_ptr));
diff --git a/src/core/hle/result.h b/src/core/hle/result.h
index 2d22652d9..53931a106 100644
--- a/src/core/hle/result.h
+++ b/src/core/hle/result.h
@@ -18,6 +18,7 @@
18/// Detailed description of the error. This listing is likely incomplete. 18/// Detailed description of the error. This listing is likely incomplete.
19enum class ErrorDescription : u32 { 19enum class ErrorDescription : u32 {
20 Success = 0, 20 Success = 0,
21 OS_InvalidBufferDescriptor = 48,
21 WrongAddress = 53, 22 WrongAddress = 53,
22 FS_NotFound = 120, 23 FS_NotFound = 120,
23 FS_AlreadyExists = 190, 24 FS_AlreadyExists = 190,
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index 9591522e5..3f71e7f2b 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -43,7 +43,7 @@ void FindContentInfos(Service::Interface* self) {
43 am_content_count[media_type] = cmd_buff[4]; 43 am_content_count[media_type] = cmd_buff[4];
44 44
45 cmd_buff[1] = RESULT_SUCCESS.raw; 45 cmd_buff[1] = RESULT_SUCCESS.raw;
46 LOG_WARNING(Service_AM, "(STUBBED) media_type=%u, title_id=0x%016lx, content_cound=%u, content_ids_pointer=0x%08x, content_info_pointer=0x%08x", 46 LOG_WARNING(Service_AM, "(STUBBED) media_type=%u, title_id=0x%016llx, content_cound=%u, content_ids_pointer=0x%08x, content_info_pointer=0x%08x",
47 media_type, title_id, am_content_count[media_type], content_ids_pointer, content_info_pointer); 47 media_type, title_id, am_content_count[media_type], content_ids_pointer, content_info_pointer);
48} 48}
49 49
diff --git a/src/core/hle/service/dsp_dsp.cpp b/src/core/hle/service/dsp_dsp.cpp
index 08e437125..995bee3f9 100644
--- a/src/core/hle/service/dsp_dsp.cpp
+++ b/src/core/hle/service/dsp_dsp.cpp
@@ -2,6 +2,7 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
5#include <cinttypes> 6#include <cinttypes>
6 7
7#include "audio_core/hle/pipe.h" 8#include "audio_core/hle/pipe.h"
@@ -12,37 +13,80 @@
12#include "core/hle/kernel/event.h" 13#include "core/hle/kernel/event.h"
13#include "core/hle/service/dsp_dsp.h" 14#include "core/hle/service/dsp_dsp.h"
14 15
16using DspPipe = DSP::HLE::DspPipe;
17
15//////////////////////////////////////////////////////////////////////////////////////////////////// 18////////////////////////////////////////////////////////////////////////////////////////////////////
16// Namespace DSP_DSP 19// Namespace DSP_DSP
17 20
18namespace DSP_DSP { 21namespace DSP_DSP {
19 22
20static u32 read_pipe_count;
21static Kernel::SharedPtr<Kernel::Event> semaphore_event; 23static Kernel::SharedPtr<Kernel::Event> semaphore_event;
22 24
23struct PairHash { 25/// There are three types of interrupts
24 template <typename T, typename U> 26enum class InterruptType {
25 std::size_t operator()(const std::pair<T, U> &x) const { 27 Zero, One, Pipe
26 // TODO(yuriks): Replace with better hash combining function. 28};
27 return std::hash<T>()(x.first) ^ std::hash<U>()(x.second); 29constexpr size_t NUM_INTERRUPT_TYPE = 3;
30
31class InterruptEvents final {
32public:
33 void Signal(InterruptType type, DspPipe pipe) {
34 Kernel::SharedPtr<Kernel::Event>& event = Get(type, pipe);
35 if (event) {
36 event->Signal();
37 }
28 } 38 }
39
40 Kernel::SharedPtr<Kernel::Event>& Get(InterruptType type, DspPipe dsp_pipe) {
41 switch (type) {
42 case InterruptType::Zero:
43 return zero;
44 case InterruptType::One:
45 return one;
46 case InterruptType::Pipe: {
47 const size_t pipe_index = static_cast<size_t>(dsp_pipe);
48 ASSERT(pipe_index < DSP::HLE::NUM_DSP_PIPE);
49 return pipe[pipe_index];
50 }
51 }
52
53 UNREACHABLE_MSG("Invalid interrupt type = %zu", static_cast<size_t>(type));
54 }
55
56 bool HasTooManyEventsRegistered() const {
57 // Actual service implementation only has 6 'slots' for interrupts.
58 constexpr size_t max_number_of_interrupt_events = 6;
59
60 size_t number = std::count_if(pipe.begin(), pipe.end(), [](const auto& evt) {
61 return evt != nullptr;
62 });
63
64 if (zero != nullptr)
65 number++;
66 if (one != nullptr)
67 number++;
68
69 return number >= max_number_of_interrupt_events;
70 }
71
72private:
73 /// Currently unknown purpose
74 Kernel::SharedPtr<Kernel::Event> zero = nullptr;
75 /// Currently unknown purpose
76 Kernel::SharedPtr<Kernel::Event> one = nullptr;
77 /// Each DSP pipe has an associated interrupt
78 std::array<Kernel::SharedPtr<Kernel::Event>, DSP::HLE::NUM_DSP_PIPE> pipe = {{}};
29}; 79};
30 80
31/// Map of (audio interrupt number, channel number) to Kernel::Events. See: RegisterInterruptEvents 81static InterruptEvents interrupt_events;
32static std::unordered_map<std::pair<u32, u32>, Kernel::SharedPtr<Kernel::Event>, PairHash> interrupt_events;
33 82
34// DSP Interrupts: 83// DSP Interrupts:
35// Interrupt #2 occurs every frame tick. Userland programs normally have a thread that's waiting 84// The audio-pipe interrupt occurs every frame tick. Userland programs normally have a thread
36// for an interrupt event. Immediately after this interrupt event, userland normally updates the 85// that's waiting for an interrupt event. Immediately after this interrupt event, userland
37// state in the next region and increments the relevant frame counter by two. 86// normally updates the state in the next region and increments the relevant frame counter by
38void SignalAllInterrupts() { 87// two.
39 // HACK: The other interrupts have currently unknown purpose, we trigger them each tick in any case. 88void SignalPipeInterrupt(DspPipe pipe) {
40 for (auto& interrupt_event : interrupt_events) 89 interrupt_events.Signal(InterruptType::Pipe, pipe);
41 interrupt_event.second->Signal();
42}
43
44void SignalInterrupt(u32 interrupt, u32 channel) {
45 interrupt_events[std::make_pair(interrupt, channel)]->Signal();
46} 90}
47 91
48/** 92/**
@@ -58,7 +102,10 @@ static void ConvertProcessAddressFromDspDram(Service::Interface* self) {
58 102
59 u32 addr = cmd_buff[1]; 103 u32 addr = cmd_buff[1];
60 104
105 cmd_buff[0] = IPC::MakeHeader(0xC, 2, 0);
61 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 106 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
107
108 // TODO(merry): There is a per-region offset missing in this calculation (that seems to be always zero).
62 cmd_buff[2] = (addr << 1) + (Memory::DSP_RAM_VADDR + 0x40000); 109 cmd_buff[2] = (addr << 1) + (Memory::DSP_RAM_VADDR + 0x40000);
63 110
64 LOG_DEBUG(Service_DSP, "addr=0x%08X", addr); 111 LOG_DEBUG(Service_DSP, "addr=0x%08X", addr);
@@ -113,7 +160,9 @@ static void LoadComponent(Service::Interface* self) {
113static void GetSemaphoreEventHandle(Service::Interface* self) { 160static void GetSemaphoreEventHandle(Service::Interface* self) {
114 u32* cmd_buff = Kernel::GetCommandBuffer(); 161 u32* cmd_buff = Kernel::GetCommandBuffer();
115 162
163 cmd_buff[0] = IPC::MakeHeader(0x16, 1, 2);
116 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 164 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
165 // cmd_buff[2] not set
117 cmd_buff[3] = Kernel::g_handle_table.Create(semaphore_event).MoveFrom(); // Event handle 166 cmd_buff[3] = Kernel::g_handle_table.Create(semaphore_event).MoveFrom(); // Event handle
118 167
119 LOG_WARNING(Service_DSP, "(STUBBED) called"); 168 LOG_WARNING(Service_DSP, "(STUBBED) called");
@@ -138,8 +187,7 @@ static void FlushDataCache(Service::Interface* self) {
138 u32 size = cmd_buff[2]; 187 u32 size = cmd_buff[2];
139 u32 process = cmd_buff[4]; 188 u32 process = cmd_buff[4];
140 189
141 // TODO(purpasmart96): Verify return header on HW 190 cmd_buff[0] = IPC::MakeHeader(0x13, 1, 0);
142
143 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 191 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
144 192
145 LOG_TRACE(Service_DSP, "called address=0x%08X, size=0x%X, process=0x%08X", address, size, process); 193 LOG_TRACE(Service_DSP, "called address=0x%08X, size=0x%X, process=0x%08X", address, size, process);
@@ -148,8 +196,8 @@ static void FlushDataCache(Service::Interface* self) {
148/** 196/**
149 * DSP_DSP::RegisterInterruptEvents service function 197 * DSP_DSP::RegisterInterruptEvents service function
150 * Inputs: 198 * Inputs:
151 * 1 : Interrupt Number 199 * 1 : Interrupt Type
152 * 2 : Channel Number 200 * 2 : Pipe Number
153 * 4 : Interrupt event handle 201 * 4 : Interrupt event handle
154 * Outputs: 202 * Outputs:
155 * 1 : Result of function, 0 on success, otherwise error code 203 * 1 : Result of function, 0 on success, otherwise error code
@@ -157,23 +205,40 @@ static void FlushDataCache(Service::Interface* self) {
157static void RegisterInterruptEvents(Service::Interface* self) { 205static void RegisterInterruptEvents(Service::Interface* self) {
158 u32* cmd_buff = Kernel::GetCommandBuffer(); 206 u32* cmd_buff = Kernel::GetCommandBuffer();
159 207
160 u32 interrupt = cmd_buff[1]; 208 u32 type_index = cmd_buff[1];
161 u32 channel = cmd_buff[2]; 209 u32 pipe_index = cmd_buff[2];
162 u32 event_handle = cmd_buff[4]; 210 u32 event_handle = cmd_buff[4];
163 211
212 ASSERT_MSG(type_index < NUM_INTERRUPT_TYPE && pipe_index < DSP::HLE::NUM_DSP_PIPE,
213 "Invalid type or pipe: type = %u, pipe = %u", type_index, pipe_index);
214
215 InterruptType type = static_cast<InterruptType>(cmd_buff[1]);
216 DspPipe pipe = static_cast<DspPipe>(cmd_buff[2]);
217
218 cmd_buff[0] = IPC::MakeHeader(0x15, 1, 0);
219
164 if (event_handle) { 220 if (event_handle) {
165 auto evt = Kernel::g_handle_table.Get<Kernel::Event>(cmd_buff[4]); 221 auto evt = Kernel::g_handle_table.Get<Kernel::Event>(cmd_buff[4]);
166 if (evt) { 222
167 interrupt_events[std::make_pair(interrupt, channel)] = evt; 223 if (!evt) {
168 cmd_buff[1] = RESULT_SUCCESS.raw; 224 LOG_INFO(Service_DSP, "Invalid event handle! type=%u, pipe=%u, event_handle=0x%08X", type_index, pipe_index, event_handle);
169 LOG_INFO(Service_DSP, "Registered interrupt=%u, channel=%u, event_handle=0x%08X", interrupt, channel, event_handle); 225 ASSERT(false); // TODO: This should really be handled at an IPC translation layer.
170 } else { 226 }
171 LOG_CRITICAL(Service_DSP, "Invalid event handle! interrupt=%u, channel=%u, event_handle=0x%08X", interrupt, channel, event_handle); 227
172 ASSERT(false); // This should really be handled at a IPC translation layer. 228 if (interrupt_events.HasTooManyEventsRegistered()) {
229 LOG_INFO(Service_DSP, "Ran out of space to register interrupts (Attempted to register type=%u, pipe=%u, event_handle=0x%08X)",
230 type_index, pipe_index, event_handle);
231 cmd_buff[1] = ResultCode(ErrorDescription::InvalidResultValue, ErrorModule::DSP, ErrorSummary::OutOfResource, ErrorLevel::Status).raw;
232 return;
173 } 233 }
234
235 interrupt_events.Get(type, pipe) = evt;
236 LOG_INFO(Service_DSP, "Registered type=%u, pipe=%u, event_handle=0x%08X", type_index, pipe_index, event_handle);
237 cmd_buff[1] = RESULT_SUCCESS.raw;
174 } else { 238 } else {
175 interrupt_events.erase(std::make_pair(interrupt, channel)); 239 interrupt_events.Get(type, pipe) = nullptr;
176 LOG_INFO(Service_DSP, "Unregistered interrupt=%u, channel=%u, event_handle=0x%08X", interrupt, channel, event_handle); 240 LOG_INFO(Service_DSP, "Unregistered interrupt=%u, channel=%u, event_handle=0x%08X", type_index, pipe_index, event_handle);
241 cmd_buff[1] = RESULT_SUCCESS.raw;
177 } 242 }
178} 243}
179 244
@@ -187,6 +252,7 @@ static void RegisterInterruptEvents(Service::Interface* self) {
187static void SetSemaphore(Service::Interface* self) { 252static void SetSemaphore(Service::Interface* self) {
188 u32* cmd_buff = Kernel::GetCommandBuffer(); 253 u32* cmd_buff = Kernel::GetCommandBuffer();
189 254
255 cmd_buff[0] = IPC::MakeHeader(0x7, 1, 0);
190 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 256 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
191 257
192 LOG_WARNING(Service_DSP, "(STUBBED) called"); 258 LOG_WARNING(Service_DSP, "(STUBBED) called");
@@ -195,7 +261,7 @@ static void SetSemaphore(Service::Interface* self) {
195/** 261/**
196 * DSP_DSP::WriteProcessPipe service function 262 * DSP_DSP::WriteProcessPipe service function
197 * Inputs: 263 * Inputs:
198 * 1 : Channel 264 * 1 : Pipe Number
199 * 2 : Size 265 * 2 : Size
200 * 3 : (size << 14) | 0x402 266 * 3 : (size << 14) | 0x402
201 * 4 : Buffer 267 * 4 : Buffer
@@ -206,24 +272,32 @@ static void SetSemaphore(Service::Interface* self) {
206static void WriteProcessPipe(Service::Interface* self) { 272static void WriteProcessPipe(Service::Interface* self) {
207 u32* cmd_buff = Kernel::GetCommandBuffer(); 273 u32* cmd_buff = Kernel::GetCommandBuffer();
208 274
209 DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(cmd_buff[1]); 275 u32 pipe_index = cmd_buff[1];
210 u32 size = cmd_buff[2]; 276 u32 size = cmd_buff[2];
211 u32 buffer = cmd_buff[4]; 277 u32 buffer = cmd_buff[4];
212 278
213 ASSERT_MSG(IPC::StaticBufferDesc(size, 1) == cmd_buff[3], "IPC static buffer descriptor failed validation (0x%X). pipe=%u, size=0x%X, buffer=0x%08X", cmd_buff[3], pipe, size, buffer); 279 DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(pipe_index);
214 ASSERT_MSG(Memory::GetPointer(buffer) != nullptr, "Invalid Buffer: pipe=%u, size=0x%X, buffer=0x%08X", pipe, size, buffer);
215 280
216 std::vector<u8> message(size); 281 if (IPC::StaticBufferDesc(size, 1) != cmd_buff[3]) {
282 LOG_ERROR(Service_DSP, "IPC static buffer descriptor failed validation (0x%X). pipe=%u, size=0x%X, buffer=0x%08X", cmd_buff[3], pipe_index, size, buffer);
283 cmd_buff[0] = IPC::MakeHeader(0, 1, 0);
284 cmd_buff[1] = ResultCode(ErrorDescription::OS_InvalidBufferDescriptor, ErrorModule::OS, ErrorSummary::WrongArgument, ErrorLevel::Permanent).raw;
285 return;
286 }
287
288 ASSERT_MSG(Memory::GetPointer(buffer) != nullptr, "Invalid Buffer: pipe=%u, size=0x%X, buffer=0x%08X", pipe_index, size, buffer);
217 289
290 std::vector<u8> message(size);
218 for (size_t i = 0; i < size; i++) { 291 for (size_t i = 0; i < size; i++) {
219 message[i] = Memory::Read8(buffer + i); 292 message[i] = Memory::Read8(buffer + i);
220 } 293 }
221 294
222 DSP::HLE::PipeWrite(pipe, message); 295 DSP::HLE::PipeWrite(pipe, message);
223 296
297 cmd_buff[0] = IPC::MakeHeader(0xD, 1, 0);
224 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 298 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
225 299
226 LOG_DEBUG(Service_DSP, "pipe=%u, size=0x%X, buffer=0x%08X", pipe, size, buffer); 300 LOG_DEBUG(Service_DSP, "pipe=%u, size=0x%X, buffer=0x%08X", pipe_index, size, buffer);
227} 301}
228 302
229/** 303/**
@@ -243,13 +317,16 @@ static void WriteProcessPipe(Service::Interface* self) {
243static void ReadPipeIfPossible(Service::Interface* self) { 317static void ReadPipeIfPossible(Service::Interface* self) {
244 u32* cmd_buff = Kernel::GetCommandBuffer(); 318 u32* cmd_buff = Kernel::GetCommandBuffer();
245 319
246 DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(cmd_buff[1]); 320 u32 pipe_index = cmd_buff[1];
247 u32 unknown = cmd_buff[2]; 321 u32 unknown = cmd_buff[2];
248 u32 size = cmd_buff[3] & 0xFFFF; // Lower 16 bits are size 322 u32 size = cmd_buff[3] & 0xFFFF; // Lower 16 bits are size
249 VAddr addr = cmd_buff[0x41]; 323 VAddr addr = cmd_buff[0x41];
250 324
251 ASSERT_MSG(Memory::GetPointer(addr) != nullptr, "Invalid addr: pipe=0x%08X, unknown=0x%08X, size=0x%X, buffer=0x%08X", pipe, unknown, size, addr); 325 DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(pipe_index);
326
327 ASSERT_MSG(Memory::GetPointer(addr) != nullptr, "Invalid addr: pipe=%u, unknown=0x%08X, size=0x%X, buffer=0x%08X", pipe_index, unknown, size, addr);
252 328
329 cmd_buff[0] = IPC::MakeHeader(0x10, 1, 2);
253 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 330 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
254 if (DSP::HLE::GetPipeReadableSize(pipe) >= size) { 331 if (DSP::HLE::GetPipeReadableSize(pipe) >= size) {
255 std::vector<u8> response = DSP::HLE::PipeRead(pipe, size); 332 std::vector<u8> response = DSP::HLE::PipeRead(pipe, size);
@@ -260,8 +337,10 @@ static void ReadPipeIfPossible(Service::Interface* self) {
260 } else { 337 } else {
261 cmd_buff[2] = 0; // Return no data 338 cmd_buff[2] = 0; // Return no data
262 } 339 }
340 cmd_buff[3] = IPC::StaticBufferDesc(size, 0);
341 cmd_buff[4] = addr;
263 342
264 LOG_DEBUG(Service_DSP, "pipe=0x%08X, unknown=0x%08X, size=0x%X, buffer=0x%08X, return cmd_buff[2]=0x%08X", pipe, unknown, size, addr, cmd_buff[2]); 343 LOG_DEBUG(Service_DSP, "pipe=%u, unknown=0x%08X, size=0x%X, buffer=0x%08X, return cmd_buff[2]=0x%08X", pipe_index, unknown, size, addr, cmd_buff[2]);
265} 344}
266 345
267/** 346/**
@@ -278,26 +357,31 @@ static void ReadPipeIfPossible(Service::Interface* self) {
278static void ReadPipe(Service::Interface* self) { 357static void ReadPipe(Service::Interface* self) {
279 u32* cmd_buff = Kernel::GetCommandBuffer(); 358 u32* cmd_buff = Kernel::GetCommandBuffer();
280 359
281 DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(cmd_buff[1]); 360 u32 pipe_index = cmd_buff[1];
282 u32 unknown = cmd_buff[2]; 361 u32 unknown = cmd_buff[2];
283 u32 size = cmd_buff[3] & 0xFFFF; // Lower 16 bits are size 362 u32 size = cmd_buff[3] & 0xFFFF; // Lower 16 bits are size
284 VAddr addr = cmd_buff[0x41]; 363 VAddr addr = cmd_buff[0x41];
285 364
286 ASSERT_MSG(Memory::GetPointer(addr) != nullptr, "Invalid addr: pipe=0x%08X, unknown=0x%08X, size=0x%X, buffer=0x%08X", pipe, unknown, size, addr); 365 DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(pipe_index);
366
367 ASSERT_MSG(Memory::GetPointer(addr) != nullptr, "Invalid addr: pipe=%u, unknown=0x%08X, size=0x%X, buffer=0x%08X", pipe_index, unknown, size, addr);
287 368
288 if (DSP::HLE::GetPipeReadableSize(pipe) >= size) { 369 if (DSP::HLE::GetPipeReadableSize(pipe) >= size) {
289 std::vector<u8> response = DSP::HLE::PipeRead(pipe, size); 370 std::vector<u8> response = DSP::HLE::PipeRead(pipe, size);
290 371
291 Memory::WriteBlock(addr, response.data(), response.size()); 372 Memory::WriteBlock(addr, response.data(), response.size());
292 373
374 cmd_buff[0] = IPC::MakeHeader(0xE, 2, 2);
293 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 375 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
294 cmd_buff[2] = static_cast<u32>(response.size()); 376 cmd_buff[2] = static_cast<u32>(response.size());
377 cmd_buff[3] = IPC::StaticBufferDesc(size, 0);
378 cmd_buff[4] = addr;
295 } else { 379 } else {
296 // No more data is in pipe. Hardware hangs in this case; this should never happen. 380 // No more data is in pipe. Hardware hangs in this case; this should never happen.
297 UNREACHABLE(); 381 UNREACHABLE();
298 } 382 }
299 383
300 LOG_DEBUG(Service_DSP, "pipe=0x%08X, unknown=0x%08X, size=0x%X, buffer=0x%08X, return cmd_buff[2]=0x%08X", pipe, unknown, size, addr, cmd_buff[2]); 384 LOG_DEBUG(Service_DSP, "pipe=%u, unknown=0x%08X, size=0x%X, buffer=0x%08X, return cmd_buff[2]=0x%08X", pipe_index, unknown, size, addr, cmd_buff[2]);
301} 385}
302 386
303/** 387/**
@@ -312,13 +396,16 @@ static void ReadPipe(Service::Interface* self) {
312static void GetPipeReadableSize(Service::Interface* self) { 396static void GetPipeReadableSize(Service::Interface* self) {
313 u32* cmd_buff = Kernel::GetCommandBuffer(); 397 u32* cmd_buff = Kernel::GetCommandBuffer();
314 398
315 DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(cmd_buff[1]); 399 u32 pipe_index = cmd_buff[1];
316 u32 unknown = cmd_buff[2]; 400 u32 unknown = cmd_buff[2];
317 401
402 DSP::HLE::DspPipe pipe = static_cast<DSP::HLE::DspPipe>(pipe_index);
403
404 cmd_buff[0] = IPC::MakeHeader(0xF, 2, 0);
318 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 405 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
319 cmd_buff[2] = DSP::HLE::GetPipeReadableSize(pipe); 406 cmd_buff[2] = DSP::HLE::GetPipeReadableSize(pipe);
320 407
321 LOG_DEBUG(Service_DSP, "pipe=0x%08X, unknown=0x%08X, return cmd_buff[2]=0x%08X", pipe, unknown, cmd_buff[2]); 408 LOG_DEBUG(Service_DSP, "pipe=%u, unknown=0x%08X, return cmd_buff[2]=0x%08X", pipe_index, unknown, cmd_buff[2]);
322} 409}
323 410
324/** 411/**
@@ -333,6 +420,7 @@ static void SetSemaphoreMask(Service::Interface* self) {
333 420
334 u32 mask = cmd_buff[1]; 421 u32 mask = cmd_buff[1];
335 422
423 cmd_buff[0] = IPC::MakeHeader(0x17, 1, 0);
336 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 424 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
337 425
338 LOG_WARNING(Service_DSP, "(STUBBED) called mask=0x%08X", mask); 426 LOG_WARNING(Service_DSP, "(STUBBED) called mask=0x%08X", mask);
@@ -350,6 +438,7 @@ static void SetSemaphoreMask(Service::Interface* self) {
350static void GetHeadphoneStatus(Service::Interface* self) { 438static void GetHeadphoneStatus(Service::Interface* self) {
351 u32* cmd_buff = Kernel::GetCommandBuffer(); 439 u32* cmd_buff = Kernel::GetCommandBuffer();
352 440
441 cmd_buff[0] = IPC::MakeHeader(0x1F, 2, 0);
353 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 442 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
354 cmd_buff[2] = 0; // Not using headphones? 443 cmd_buff[2] = 0; // Not using headphones?
355 444
@@ -376,6 +465,7 @@ static void RecvData(Service::Interface* self) {
376 465
377 // Application reads this after requesting DSP shutdown, to verify the DSP has indeed shutdown or slept. 466 // Application reads this after requesting DSP shutdown, to verify the DSP has indeed shutdown or slept.
378 467
468 cmd_buff[0] = IPC::MakeHeader(0x1, 2, 0);
379 cmd_buff[1] = RESULT_SUCCESS.raw; 469 cmd_buff[1] = RESULT_SUCCESS.raw;
380 switch (DSP::HLE::GetDspState()) { 470 switch (DSP::HLE::GetDspState()) {
381 case DSP::HLE::DspState::On: 471 case DSP::HLE::DspState::On:
@@ -411,6 +501,7 @@ static void RecvDataIsReady(Service::Interface* self) {
411 501
412 ASSERT_MSG(register_number == 0, "Unknown register_number %u", register_number); 502 ASSERT_MSG(register_number == 0, "Unknown register_number %u", register_number);
413 503
504 cmd_buff[0] = IPC::MakeHeader(0x2, 2, 0);
414 cmd_buff[1] = RESULT_SUCCESS.raw; 505 cmd_buff[1] = RESULT_SUCCESS.raw;
415 cmd_buff[2] = 1; // Ready to read 506 cmd_buff[2] = 1; // Ready to read
416 507
@@ -458,14 +549,14 @@ const Interface::FunctionInfo FunctionTable[] = {
458 549
459Interface::Interface() { 550Interface::Interface() {
460 semaphore_event = Kernel::Event::Create(Kernel::ResetType::OneShot, "DSP_DSP::semaphore_event"); 551 semaphore_event = Kernel::Event::Create(Kernel::ResetType::OneShot, "DSP_DSP::semaphore_event");
461 read_pipe_count = 0; 552 interrupt_events = {};
462 553
463 Register(FunctionTable); 554 Register(FunctionTable);
464} 555}
465 556
466Interface::~Interface() { 557Interface::~Interface() {
467 semaphore_event = nullptr; 558 semaphore_event = nullptr;
468 interrupt_events.clear(); 559 interrupt_events = {};
469} 560}
470 561
471} // namespace 562} // namespace
diff --git a/src/core/hle/service/dsp_dsp.h b/src/core/hle/service/dsp_dsp.h
index 32b89e9bb..22f6687cc 100644
--- a/src/core/hle/service/dsp_dsp.h
+++ b/src/core/hle/service/dsp_dsp.h
@@ -8,6 +8,12 @@
8 8
9#include "core/hle/service/service.h" 9#include "core/hle/service/service.h"
10 10
11namespace DSP {
12namespace HLE {
13enum class DspPipe;
14}
15}
16
11//////////////////////////////////////////////////////////////////////////////////////////////////// 17////////////////////////////////////////////////////////////////////////////////////////////////////
12// Namespace DSP_DSP 18// Namespace DSP_DSP
13 19
@@ -23,15 +29,10 @@ public:
23 } 29 }
24}; 30};
25 31
26/// Signal all audio related interrupts.
27void SignalAllInterrupts();
28
29/** 32/**
30 * Signal a specific audio related interrupt based on interrupt id and channel id. 33 * Signal a specific DSP related interrupt of type == InterruptType::Pipe, pipe == pipe.
31 * @param interrupt_id The interrupt id 34 * @param pipe The DSP pipe for which to signal an interrupt for.
32 * @param channel_id The channel id
33 * The significance of various values of interrupt_id and channel_id is not yet known.
34 */ 35 */
35void SignalInterrupt(u32 interrupt_id, u32 channel_id); 36void SignalPipeInterrupt(DSP::HLE::DspPipe pipe);
36 37
37} // namespace 38} // namespace DSP_DSP
diff --git a/src/core/hle/service/fs/archive.cpp b/src/core/hle/service/fs/archive.cpp
index e9588cb72..cc51ede0c 100644
--- a/src/core/hle/service/fs/archive.cpp
+++ b/src/core/hle/service/fs/archive.cpp
@@ -114,6 +114,7 @@ ResultVal<bool> File::SyncRequest() {
114 return read.Code(); 114 return read.Code();
115 } 115 }
116 cmd_buff[2] = static_cast<u32>(*read); 116 cmd_buff[2] = static_cast<u32>(*read);
117 Memory::RasterizerFlushAndInvalidateRegion(Memory::VirtualToPhysicalAddress(address), length);
117 break; 118 break;
118 } 119 }
119 120
diff --git a/src/core/hle/service/fs/fs_user.cpp b/src/core/hle/service/fs/fs_user.cpp
index 3ec7ceb30..7df7da5a4 100644
--- a/src/core/hle/service/fs/fs_user.cpp
+++ b/src/core/hle/service/fs/fs_user.cpp
@@ -250,7 +250,7 @@ static void CreateFile(Service::Interface* self) {
250 250
251 FileSys::Path file_path(filename_type, filename_size, filename_ptr); 251 FileSys::Path file_path(filename_type, filename_size, filename_ptr);
252 252
253 LOG_DEBUG(Service_FS, "type=%d size=%llu data=%s", filename_type, filename_size, file_path.DebugStr().c_str()); 253 LOG_DEBUG(Service_FS, "type=%d size=%llu data=%s", filename_type, file_size, file_path.DebugStr().c_str());
254 254
255 cmd_buff[1] = CreateFileInArchive(archive_handle, file_path, file_size).raw; 255 cmd_buff[1] = CreateFileInArchive(archive_handle, file_path, file_size).raw;
256} 256}
diff --git a/src/core/hle/service/gsp_gpu.cpp b/src/core/hle/service/gsp_gpu.cpp
index 0c655395e..b4c146e08 100644
--- a/src/core/hle/service/gsp_gpu.cpp
+++ b/src/core/hle/service/gsp_gpu.cpp
@@ -15,8 +15,6 @@
15 15
16#include "video_core/gpu_debugger.h" 16#include "video_core/gpu_debugger.h"
17#include "video_core/debug_utils/debug_utils.h" 17#include "video_core/debug_utils/debug_utils.h"
18#include "video_core/renderer_base.h"
19#include "video_core/video_core.h"
20 18
21#include "gsp_gpu.h" 19#include "gsp_gpu.h"
22 20
@@ -45,6 +43,8 @@ Kernel::SharedPtr<Kernel::SharedMemory> g_shared_memory;
45/// Thread index into interrupt relay queue 43/// Thread index into interrupt relay queue
46u32 g_thread_id = 0; 44u32 g_thread_id = 0;
47 45
46static bool gpu_right_acquired = false;
47
48/// Gets a pointer to a thread command buffer in GSP shared memory 48/// Gets a pointer to a thread command buffer in GSP shared memory
49static inline u8* GetCommandBuffer(u32 thread_id) { 49static inline u8* GetCommandBuffer(u32 thread_id) {
50 return g_shared_memory->GetPointer(0x800 + (thread_id * sizeof(CommandBuffer))); 50 return g_shared_memory->GetPointer(0x800 + (thread_id * sizeof(CommandBuffer)));
@@ -291,8 +291,6 @@ static void FlushDataCache(Service::Interface* self) {
291 u32 size = cmd_buff[2]; 291 u32 size = cmd_buff[2];
292 u32 process = cmd_buff[4]; 292 u32 process = cmd_buff[4];
293 293
294 VideoCore::g_renderer->Rasterizer()->InvalidateRegion(Memory::VirtualToPhysicalAddress(address), size);
295
296 // TODO(purpasmart96): Verify return header on HW 294 // TODO(purpasmart96): Verify return header on HW
297 295
298 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 296 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
@@ -374,6 +372,9 @@ static void UnregisterInterruptRelayQueue(Service::Interface* self) {
374 * @todo This probably does not belong in the GSP module, instead move to video_core 372 * @todo This probably does not belong in the GSP module, instead move to video_core
375 */ 373 */
376void SignalInterrupt(InterruptId interrupt_id) { 374void SignalInterrupt(InterruptId interrupt_id) {
375 if (!gpu_right_acquired) {
376 return;
377 }
377 if (nullptr == g_interrupt_event) { 378 if (nullptr == g_interrupt_event) {
378 LOG_WARNING(Service_GSP, "cannot synchronize until GSP event has been created!"); 379 LOG_WARNING(Service_GSP, "cannot synchronize until GSP event has been created!");
379 return; 380 return;
@@ -408,6 +409,8 @@ void SignalInterrupt(InterruptId interrupt_id) {
408 g_interrupt_event->Signal(); 409 g_interrupt_event->Signal();
409} 410}
410 411
412MICROPROFILE_DEFINE(GPU_GSP_DMA, "GPU", "GSP DMA", MP_RGB(100, 0, 255));
413
411/// Executes the next GSP command 414/// Executes the next GSP command
412static void ExecuteCommand(const Command& command, u32 thread_id) { 415static void ExecuteCommand(const Command& command, u32 thread_id) {
413 // Utility function to convert register ID to address 416 // Utility function to convert register ID to address
@@ -419,18 +422,21 @@ static void ExecuteCommand(const Command& command, u32 thread_id) {
419 422
420 // GX request DMA - typically used for copying memory from GSP heap to VRAM 423 // GX request DMA - typically used for copying memory from GSP heap to VRAM
421 case CommandId::REQUEST_DMA: 424 case CommandId::REQUEST_DMA:
422 VideoCore::g_renderer->Rasterizer()->FlushRegion(Memory::VirtualToPhysicalAddress(command.dma_request.source_address), 425 {
423 command.dma_request.size); 426 MICROPROFILE_SCOPE(GPU_GSP_DMA);
427
428 // TODO: Consider attempting rasterizer-accelerated surface blit if that usage is ever possible/likely
429 Memory::RasterizerFlushRegion(Memory::VirtualToPhysicalAddress(command.dma_request.source_address),
430 command.dma_request.size);
431 Memory::RasterizerFlushAndInvalidateRegion(Memory::VirtualToPhysicalAddress(command.dma_request.dest_address),
432 command.dma_request.size);
424 433
425 memcpy(Memory::GetPointer(command.dma_request.dest_address), 434 memcpy(Memory::GetPointer(command.dma_request.dest_address),
426 Memory::GetPointer(command.dma_request.source_address), 435 Memory::GetPointer(command.dma_request.source_address),
427 command.dma_request.size); 436 command.dma_request.size);
428 SignalInterrupt(InterruptId::DMA); 437 SignalInterrupt(InterruptId::DMA);
429
430 VideoCore::g_renderer->Rasterizer()->InvalidateRegion(Memory::VirtualToPhysicalAddress(command.dma_request.dest_address),
431 command.dma_request.size);
432 break; 438 break;
433 439 }
434 // TODO: This will need some rework in the future. (why?) 440 // TODO: This will need some rework in the future. (why?)
435 case CommandId::SUBMIT_GPU_CMDLIST: 441 case CommandId::SUBMIT_GPU_CMDLIST:
436 { 442 {
@@ -517,13 +523,8 @@ static void ExecuteCommand(const Command& command, u32 thread_id) {
517 523
518 case CommandId::CACHE_FLUSH: 524 case CommandId::CACHE_FLUSH:
519 { 525 {
520 for (auto& region : command.cache_flush.regions) { 526 // NOTE: Rasterizer flushing handled elsewhere in CPU read/write and other GPU handlers
521 if (region.size == 0) 527 // Use command.cache_flush.regions to implement this handler
522 break;
523
524 VideoCore::g_renderer->Rasterizer()->InvalidateRegion(
525 Memory::VirtualToPhysicalAddress(region.address), region.size);
526 }
527 break; 528 break;
528 } 529 }
529 530
@@ -628,6 +629,35 @@ static void ImportDisplayCaptureInfo(Service::Interface* self) {
628 LOG_WARNING(Service_GSP, "called"); 629 LOG_WARNING(Service_GSP, "called");
629} 630}
630 631
632/**
633 * GSP_GPU::AcquireRight service function
634 * Outputs:
635 * 1: Result code
636 */
637static void AcquireRight(Service::Interface* self) {
638 u32* cmd_buff = Kernel::GetCommandBuffer();
639
640 gpu_right_acquired = true;
641
642 cmd_buff[1] = RESULT_SUCCESS.raw;
643
644 LOG_WARNING(Service_GSP, "called");
645}
646
647/**
648 * GSP_GPU::ReleaseRight service function
649 * Outputs:
650 * 1: Result code
651 */
652static void ReleaseRight(Service::Interface* self) {
653 u32* cmd_buff = Kernel::GetCommandBuffer();
654
655 gpu_right_acquired = false;
656
657 cmd_buff[1] = RESULT_SUCCESS.raw;
658
659 LOG_WARNING(Service_GSP, "called");
660}
631 661
632const Interface::FunctionInfo FunctionTable[] = { 662const Interface::FunctionInfo FunctionTable[] = {
633 {0x00010082, WriteHWRegs, "WriteHWRegs"}, 663 {0x00010082, WriteHWRegs, "WriteHWRegs"},
@@ -651,8 +681,8 @@ const Interface::FunctionInfo FunctionTable[] = {
651 {0x00130042, RegisterInterruptRelayQueue, "RegisterInterruptRelayQueue"}, 681 {0x00130042, RegisterInterruptRelayQueue, "RegisterInterruptRelayQueue"},
652 {0x00140000, UnregisterInterruptRelayQueue, "UnregisterInterruptRelayQueue"}, 682 {0x00140000, UnregisterInterruptRelayQueue, "UnregisterInterruptRelayQueue"},
653 {0x00150002, nullptr, "TryAcquireRight"}, 683 {0x00150002, nullptr, "TryAcquireRight"},
654 {0x00160042, nullptr, "AcquireRight"}, 684 {0x00160042, AcquireRight, "AcquireRight"},
655 {0x00170000, nullptr, "ReleaseRight"}, 685 {0x00170000, ReleaseRight, "ReleaseRight"},
656 {0x00180000, ImportDisplayCaptureInfo, "ImportDisplayCaptureInfo"}, 686 {0x00180000, ImportDisplayCaptureInfo, "ImportDisplayCaptureInfo"},
657 {0x00190000, nullptr, "SaveVramSysArea"}, 687 {0x00190000, nullptr, "SaveVramSysArea"},
658 {0x001A0000, nullptr, "RestoreVramSysArea"}, 688 {0x001A0000, nullptr, "RestoreVramSysArea"},
@@ -673,11 +703,13 @@ Interface::Interface() {
673 g_shared_memory = nullptr; 703 g_shared_memory = nullptr;
674 704
675 g_thread_id = 0; 705 g_thread_id = 0;
706 gpu_right_acquired = false;
676} 707}
677 708
678Interface::~Interface() { 709Interface::~Interface() {
679 g_interrupt_event = nullptr; 710 g_interrupt_event = nullptr;
680 g_shared_memory = nullptr; 711 g_shared_memory = nullptr;
712 gpu_right_acquired = false;
681} 713}
682 714
683} // namespace 715} // namespace
diff --git a/src/core/hle/service/y2r_u.cpp b/src/core/hle/service/y2r_u.cpp
index 22f373adf..d16578f87 100644
--- a/src/core/hle/service/y2r_u.cpp
+++ b/src/core/hle/service/y2r_u.cpp
@@ -4,6 +4,7 @@
4 4
5#include <cstring> 5#include <cstring>
6 6
7#include "common/common_funcs.h"
7#include "common/common_types.h" 8#include "common/common_types.h"
8#include "common/logging/log.h" 9#include "common/logging/log.h"
9 10
@@ -12,9 +13,6 @@
12#include "core/hle/service/y2r_u.h" 13#include "core/hle/service/y2r_u.h"
13#include "core/hw/y2r.h" 14#include "core/hw/y2r.h"
14 15
15#include "video_core/renderer_base.h"
16#include "video_core/video_core.h"
17
18//////////////////////////////////////////////////////////////////////////////////////////////////// 16////////////////////////////////////////////////////////////////////////////////////////////////////
19// Namespace Y2R_U 17// Namespace Y2R_U
20 18
@@ -28,13 +26,17 @@ struct ConversionParameters {
28 u16 input_line_width; 26 u16 input_line_width;
29 u16 input_lines; 27 u16 input_lines;
30 StandardCoefficient standard_coefficient; 28 StandardCoefficient standard_coefficient;
31 u8 reserved; 29 u8 padding;
32 u16 alpha; 30 u16 alpha;
33}; 31};
34static_assert(sizeof(ConversionParameters) == 12, "ConversionParameters struct has incorrect size"); 32static_assert(sizeof(ConversionParameters) == 12, "ConversionParameters struct has incorrect size");
35 33
36static Kernel::SharedPtr<Kernel::Event> completion_event; 34static Kernel::SharedPtr<Kernel::Event> completion_event;
37static ConversionConfiguration conversion; 35static ConversionConfiguration conversion;
36static DitheringWeightParams dithering_weight_params;
37static u32 temporal_dithering_enabled = 0;
38static u32 transfer_end_interrupt_enabled = 0;
39static u32 spacial_dithering_enabled = 0;
38 40
39static const CoefficientSet standard_coefficients[4] = { 41static const CoefficientSet standard_coefficients[4] = {
40 {{ 0x100, 0x166, 0xB6, 0x58, 0x1C5, -0x166F, 0x10EE, -0x1C5B }}, // ITU_Rec601 42 {{ 0x100, 0x166, 0xB6, 0x58, 0x1C5, -0x166F, 0x10EE, -0x1C5B }}, // ITU_Rec601
@@ -73,7 +75,7 @@ ResultCode ConversionConfiguration::SetInputLines(u16 lines) {
73 75
74ResultCode ConversionConfiguration::SetStandardCoefficient(StandardCoefficient standard_coefficient) { 76ResultCode ConversionConfiguration::SetStandardCoefficient(StandardCoefficient standard_coefficient) {
75 size_t index = static_cast<size_t>(standard_coefficient); 77 size_t index = static_cast<size_t>(standard_coefficient);
76 if (index >= 4) { 78 if (index >= ARRAY_SIZE(standard_coefficients)) {
77 return ResultCode(ErrorDescription::InvalidEnumValue, ErrorModule::CAM, 79 return ResultCode(ErrorDescription::InvalidEnumValue, ErrorModule::CAM,
78 ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E053ED 80 ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E053ED
79 } 81 }
@@ -86,44 +88,183 @@ static void SetInputFormat(Service::Interface* self) {
86 u32* cmd_buff = Kernel::GetCommandBuffer(); 88 u32* cmd_buff = Kernel::GetCommandBuffer();
87 89
88 conversion.input_format = static_cast<InputFormat>(cmd_buff[1]); 90 conversion.input_format = static_cast<InputFormat>(cmd_buff[1]);
91
92 cmd_buff[0] = IPC::MakeHeader(0x1, 1, 0);
93 cmd_buff[1] = RESULT_SUCCESS.raw;
94
89 LOG_DEBUG(Service_Y2R, "called input_format=%hhu", conversion.input_format); 95 LOG_DEBUG(Service_Y2R, "called input_format=%hhu", conversion.input_format);
96}
97
98static void GetInputFormat(Service::Interface* self) {
99 u32* cmd_buff = Kernel::GetCommandBuffer();
90 100
101 cmd_buff[0] = IPC::MakeHeader(0x2, 2, 0);
91 cmd_buff[1] = RESULT_SUCCESS.raw; 102 cmd_buff[1] = RESULT_SUCCESS.raw;
103 cmd_buff[2] = static_cast<u32>(conversion.input_format);
104
105 LOG_DEBUG(Service_Y2R, "called input_format=%hhu", conversion.input_format);
92} 106}
93 107
94static void SetOutputFormat(Service::Interface* self) { 108static void SetOutputFormat(Service::Interface* self) {
95 u32* cmd_buff = Kernel::GetCommandBuffer(); 109 u32* cmd_buff = Kernel::GetCommandBuffer();
96 110
97 conversion.output_format = static_cast<OutputFormat>(cmd_buff[1]); 111 conversion.output_format = static_cast<OutputFormat>(cmd_buff[1]);
112
113 cmd_buff[0] = IPC::MakeHeader(0x3, 1, 0);
114 cmd_buff[1] = RESULT_SUCCESS.raw;
115
98 LOG_DEBUG(Service_Y2R, "called output_format=%hhu", conversion.output_format); 116 LOG_DEBUG(Service_Y2R, "called output_format=%hhu", conversion.output_format);
117}
118
119static void GetOutputFormat(Service::Interface* self) {
120 u32* cmd_buff = Kernel::GetCommandBuffer();
99 121
122 cmd_buff[0] = IPC::MakeHeader(0x4, 2, 0);
100 cmd_buff[1] = RESULT_SUCCESS.raw; 123 cmd_buff[1] = RESULT_SUCCESS.raw;
124 cmd_buff[2] = static_cast<u32>(conversion.output_format);
125
126 LOG_DEBUG(Service_Y2R, "called output_format=%hhu", conversion.output_format);
101} 127}
102 128
103static void SetRotation(Service::Interface* self) { 129static void SetRotation(Service::Interface* self) {
104 u32* cmd_buff = Kernel::GetCommandBuffer(); 130 u32* cmd_buff = Kernel::GetCommandBuffer();
105 131
106 conversion.rotation = static_cast<Rotation>(cmd_buff[1]); 132 conversion.rotation = static_cast<Rotation>(cmd_buff[1]);
133
134 cmd_buff[0] = IPC::MakeHeader(0x5, 1, 0);
135 cmd_buff[1] = RESULT_SUCCESS.raw;
136
107 LOG_DEBUG(Service_Y2R, "called rotation=%hhu", conversion.rotation); 137 LOG_DEBUG(Service_Y2R, "called rotation=%hhu", conversion.rotation);
138}
139
140static void GetRotation(Service::Interface* self) {
141 u32* cmd_buff = Kernel::GetCommandBuffer();
108 142
143 cmd_buff[0] = IPC::MakeHeader(0x6, 2, 0);
109 cmd_buff[1] = RESULT_SUCCESS.raw; 144 cmd_buff[1] = RESULT_SUCCESS.raw;
145 cmd_buff[2] = static_cast<u32>(conversion.rotation);
146
147 LOG_DEBUG(Service_Y2R, "called rotation=%hhu", conversion.rotation);
110} 148}
111 149
112static void SetBlockAlignment(Service::Interface* self) { 150static void SetBlockAlignment(Service::Interface* self) {
113 u32* cmd_buff = Kernel::GetCommandBuffer(); 151 u32* cmd_buff = Kernel::GetCommandBuffer();
114 152
115 conversion.block_alignment = static_cast<BlockAlignment>(cmd_buff[1]); 153 conversion.block_alignment = static_cast<BlockAlignment>(cmd_buff[1]);
116 LOG_DEBUG(Service_Y2R, "called alignment=%hhu", conversion.block_alignment);
117 154
155 cmd_buff[0] = IPC::MakeHeader(0x7, 1, 0);
156 cmd_buff[1] = RESULT_SUCCESS.raw;
157
158 LOG_DEBUG(Service_Y2R, "called block_alignment=%hhu", conversion.block_alignment);
159}
160
161static void GetBlockAlignment(Service::Interface* self) {
162 u32* cmd_buff = Kernel::GetCommandBuffer();
163
164 cmd_buff[0] = IPC::MakeHeader(0x8, 2, 0);
165 cmd_buff[1] = RESULT_SUCCESS.raw;
166 cmd_buff[2] = static_cast<u32>(conversion.block_alignment);
167
168 LOG_DEBUG(Service_Y2R, "called block_alignment=%hhu", conversion.block_alignment);
169}
170
171/**
172 * Y2R_U::SetSpacialDithering service function
173 * Inputs:
174 * 1 : u8, 0 = Disabled, 1 = Enabled
175 * Outputs:
176 * 1 : Result of function, 0 on success, otherwise error code
177 */
178static void SetSpacialDithering(Service::Interface* self) {
179 u32* cmd_buff = Kernel::GetCommandBuffer();
180 spacial_dithering_enabled = cmd_buff[1] & 0xF;
181
182 cmd_buff[0] = IPC::MakeHeader(0x9, 1, 0);
183 cmd_buff[1] = RESULT_SUCCESS.raw;
184
185 LOG_WARNING(Service_Y2R, "(STUBBED) called");
186}
187
188/**
189 * Y2R_U::GetSpacialDithering service function
190 * Outputs:
191 * 1 : Result of function, 0 on success, otherwise error code
192 * 2 : u8, 0 = Disabled, 1 = Enabled
193 */
194static void GetSpacialDithering(Service::Interface* self) {
195 u32* cmd_buff = Kernel::GetCommandBuffer();
196
197 cmd_buff[0] = IPC::MakeHeader(0xA, 2, 0);
198 cmd_buff[1] = RESULT_SUCCESS.raw;
199 cmd_buff[2] = spacial_dithering_enabled;
200
201 LOG_WARNING(Service_Y2R, "(STUBBED) called");
202}
203
204/**
205 * Y2R_U::SetTemporalDithering service function
206 * Inputs:
207 * 1 : u8, 0 = Disabled, 1 = Enabled
208 * Outputs:
209 * 1 : Result of function, 0 on success, otherwise error code
210 */
211static void SetTemporalDithering(Service::Interface* self) {
212 u32* cmd_buff = Kernel::GetCommandBuffer();
213 temporal_dithering_enabled = cmd_buff[1] & 0xF;
214
215 cmd_buff[0] = IPC::MakeHeader(0xB, 1, 0);
118 cmd_buff[1] = RESULT_SUCCESS.raw; 216 cmd_buff[1] = RESULT_SUCCESS.raw;
217
218 LOG_WARNING(Service_Y2R, "(STUBBED) called");
119} 219}
120 220
221/**
222 * Y2R_U::GetTemporalDithering service function
223 * Outputs:
224 * 1 : Result of function, 0 on success, otherwise error code
225 * 2 : u8, 0 = Disabled, 1 = Enabled
226 */
227static void GetTemporalDithering(Service::Interface* self) {
228 u32* cmd_buff = Kernel::GetCommandBuffer();
229
230 cmd_buff[0] = IPC::MakeHeader(0xC, 2, 0);
231 cmd_buff[1] = RESULT_SUCCESS.raw;
232 cmd_buff[2] = temporal_dithering_enabled;
233
234 LOG_WARNING(Service_Y2R, "(STUBBED) called");
235}
236
237/**
238 * Y2R_U::SetTransferEndInterrupt service function
239 * Inputs:
240 * 1 : u8, 0 = Disabled, 1 = Enabled
241 * Outputs:
242 * 1 : Result of function, 0 on success, otherwise error code
243 */
121static void SetTransferEndInterrupt(Service::Interface* self) { 244static void SetTransferEndInterrupt(Service::Interface* self) {
122 u32* cmd_buff = Kernel::GetCommandBuffer(); 245 u32* cmd_buff = Kernel::GetCommandBuffer();
246 transfer_end_interrupt_enabled = cmd_buff[1] & 0xf;
123 247
124 cmd_buff[0] = IPC::MakeHeader(0xD, 1, 0); 248 cmd_buff[0] = IPC::MakeHeader(0xD, 1, 0);
125 cmd_buff[1] = RESULT_SUCCESS.raw; 249 cmd_buff[1] = RESULT_SUCCESS.raw;
126 LOG_DEBUG(Service_Y2R, "(STUBBED) called"); 250
251 LOG_WARNING(Service_Y2R, "(STUBBED) called");
252}
253
254/**
255 * Y2R_U::GetTransferEndInterrupt service function
256 * Outputs:
257 * 1 : Result of function, 0 on success, otherwise error code
258 * 2 : u8, 0 = Disabled, 1 = Enabled
259 */
260static void GetTransferEndInterrupt(Service::Interface* self) {
261 u32* cmd_buff = Kernel::GetCommandBuffer();
262
263 cmd_buff[0] = IPC::MakeHeader(0xE, 2, 0);
264 cmd_buff[1] = RESULT_SUCCESS.raw;
265 cmd_buff[2] = transfer_end_interrupt_enabled;
266
267 LOG_WARNING(Service_Y2R, "(STUBBED) called");
127} 268}
128 269
129/** 270/**
@@ -135,8 +276,10 @@ static void SetTransferEndInterrupt(Service::Interface* self) {
135static void GetTransferEndEvent(Service::Interface* self) { 276static void GetTransferEndEvent(Service::Interface* self) {
136 u32* cmd_buff = Kernel::GetCommandBuffer(); 277 u32* cmd_buff = Kernel::GetCommandBuffer();
137 278
279 cmd_buff[0] = IPC::MakeHeader(0xF, 2, 0);
138 cmd_buff[1] = RESULT_SUCCESS.raw; 280 cmd_buff[1] = RESULT_SUCCESS.raw;
139 cmd_buff[3] = Kernel::g_handle_table.Create(completion_event).MoveFrom(); 281 cmd_buff[3] = Kernel::g_handle_table.Create(completion_event).MoveFrom();
282
140 LOG_DEBUG(Service_Y2R, "called"); 283 LOG_DEBUG(Service_Y2R, "called");
141} 284}
142 285
@@ -147,12 +290,12 @@ static void SetSendingY(Service::Interface* self) {
147 conversion.src_Y.image_size = cmd_buff[2]; 290 conversion.src_Y.image_size = cmd_buff[2];
148 conversion.src_Y.transfer_unit = cmd_buff[3]; 291 conversion.src_Y.transfer_unit = cmd_buff[3];
149 conversion.src_Y.gap = cmd_buff[4]; 292 conversion.src_Y.gap = cmd_buff[4];
150 u32 src_process_handle = cmd_buff[6];
151 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
152 "src_process_handle=0x%08X", conversion.src_Y.image_size,
153 conversion.src_Y.transfer_unit, conversion.src_Y.gap, src_process_handle);
154 293
294 cmd_buff[0] = IPC::MakeHeader(0x10, 1, 0);
155 cmd_buff[1] = RESULT_SUCCESS.raw; 295 cmd_buff[1] = RESULT_SUCCESS.raw;
296
297 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, src_process_handle=0x%08X",
298 conversion.src_Y.image_size, conversion.src_Y.transfer_unit, conversion.src_Y.gap, cmd_buff[6]);
156} 299}
157 300
158static void SetSendingU(Service::Interface* self) { 301static void SetSendingU(Service::Interface* self) {
@@ -162,12 +305,12 @@ static void SetSendingU(Service::Interface* self) {
162 conversion.src_U.image_size = cmd_buff[2]; 305 conversion.src_U.image_size = cmd_buff[2];
163 conversion.src_U.transfer_unit = cmd_buff[3]; 306 conversion.src_U.transfer_unit = cmd_buff[3];
164 conversion.src_U.gap = cmd_buff[4]; 307 conversion.src_U.gap = cmd_buff[4];
165 u32 src_process_handle = cmd_buff[6];
166 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
167 "src_process_handle=0x%08X", conversion.src_U.image_size,
168 conversion.src_U.transfer_unit, conversion.src_U.gap, src_process_handle);
169 308
309 cmd_buff[0] = IPC::MakeHeader(0x11, 1, 0);
170 cmd_buff[1] = RESULT_SUCCESS.raw; 310 cmd_buff[1] = RESULT_SUCCESS.raw;
311
312 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, src_process_handle=0x%08X",
313 conversion.src_U.image_size, conversion.src_U.transfer_unit, conversion.src_U.gap, cmd_buff[6]);
171} 314}
172 315
173static void SetSendingV(Service::Interface* self) { 316static void SetSendingV(Service::Interface* self) {
@@ -177,12 +320,12 @@ static void SetSendingV(Service::Interface* self) {
177 conversion.src_V.image_size = cmd_buff[2]; 320 conversion.src_V.image_size = cmd_buff[2];
178 conversion.src_V.transfer_unit = cmd_buff[3]; 321 conversion.src_V.transfer_unit = cmd_buff[3];
179 conversion.src_V.gap = cmd_buff[4]; 322 conversion.src_V.gap = cmd_buff[4];
180 u32 src_process_handle = cmd_buff[6];
181 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
182 "src_process_handle=0x%08X", conversion.src_V.image_size,
183 conversion.src_V.transfer_unit, conversion.src_V.gap, src_process_handle);
184 323
324 cmd_buff[0] = IPC::MakeHeader(0x12, 1, 0);
185 cmd_buff[1] = RESULT_SUCCESS.raw; 325 cmd_buff[1] = RESULT_SUCCESS.raw;
326
327 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, src_process_handle=0x%08X",
328 conversion.src_V.image_size, conversion.src_V.transfer_unit, conversion.src_V.gap, cmd_buff[6]);
186} 329}
187 330
188static void SetSendingYUYV(Service::Interface* self) { 331static void SetSendingYUYV(Service::Interface* self) {
@@ -192,12 +335,76 @@ static void SetSendingYUYV(Service::Interface* self) {
192 conversion.src_YUYV.image_size = cmd_buff[2]; 335 conversion.src_YUYV.image_size = cmd_buff[2];
193 conversion.src_YUYV.transfer_unit = cmd_buff[3]; 336 conversion.src_YUYV.transfer_unit = cmd_buff[3];
194 conversion.src_YUYV.gap = cmd_buff[4]; 337 conversion.src_YUYV.gap = cmd_buff[4];
195 u32 src_process_handle = cmd_buff[6];
196 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
197 "src_process_handle=0x%08X", conversion.src_YUYV.image_size,
198 conversion.src_YUYV.transfer_unit, conversion.src_YUYV.gap, src_process_handle);
199 338
339 cmd_buff[0] = IPC::MakeHeader(0x13, 1, 0);
340 cmd_buff[1] = RESULT_SUCCESS.raw;
341
342 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, src_process_handle=0x%08X",
343 conversion.src_YUYV.image_size, conversion.src_YUYV.transfer_unit, conversion.src_YUYV.gap, cmd_buff[6]);
344}
345
346/**
347 * Y2R::IsFinishedSendingYuv service function
348 * Output:
349 * 1 : Result of the function, 0 on success, otherwise error code
350 * 2 : u8, 0 = Not Finished, 1 = Finished
351 */
352static void IsFinishedSendingYuv(Service::Interface* self) {
353 u32* cmd_buff = Kernel::GetCommandBuffer();
354
355 cmd_buff[0] = IPC::MakeHeader(0x14, 2, 0);
356 cmd_buff[1] = RESULT_SUCCESS.raw;
357 cmd_buff[2] = 1;
358
359 LOG_WARNING(Service_Y2R, "(STUBBED) called");
360}
361
362/**
363 * Y2R::IsFinishedSendingY service function
364 * Output:
365 * 1 : Result of the function, 0 on success, otherwise error code
366 * 2 : u8, 0 = Not Finished, 1 = Finished
367 */
368static void IsFinishedSendingY(Service::Interface* self) {
369 u32* cmd_buff = Kernel::GetCommandBuffer();
370
371 cmd_buff[0] = IPC::MakeHeader(0x15, 2, 0);
200 cmd_buff[1] = RESULT_SUCCESS.raw; 372 cmd_buff[1] = RESULT_SUCCESS.raw;
373 cmd_buff[2] = 1;
374
375 LOG_WARNING(Service_Y2R, "(STUBBED) called");
376}
377
378/**
379 * Y2R::IsFinishedSendingU service function
380 * Output:
381 * 1 : Result of the function, 0 on success, otherwise error code
382 * 2 : u8, 0 = Not Finished, 1 = Finished
383 */
384static void IsFinishedSendingU(Service::Interface* self) {
385 u32* cmd_buff = Kernel::GetCommandBuffer();
386
387 cmd_buff[0] = IPC::MakeHeader(0x16, 2, 0);
388 cmd_buff[1] = RESULT_SUCCESS.raw;
389 cmd_buff[2] = 1;
390
391 LOG_WARNING(Service_Y2R, "(STUBBED) called");
392}
393
394/**
395 * Y2R::IsFinishedSendingV service function
396 * Output:
397 * 1 : Result of the function, 0 on success, otherwise error code
398 * 2 : u8, 0 = Not Finished, 1 = Finished
399 */
400static void IsFinishedSendingV(Service::Interface* self) {
401 u32* cmd_buff = Kernel::GetCommandBuffer();
402
403 cmd_buff[0] = IPC::MakeHeader(0x17, 2, 0);
404 cmd_buff[1] = RESULT_SUCCESS.raw;
405 cmd_buff[2] = 1;
406
407 LOG_WARNING(Service_Y2R, "(STUBBED) called");
201} 408}
202 409
203static void SetReceiving(Service::Interface* self) { 410static void SetReceiving(Service::Interface* self) {
@@ -207,27 +414,66 @@ static void SetReceiving(Service::Interface* self) {
207 conversion.dst.image_size = cmd_buff[2]; 414 conversion.dst.image_size = cmd_buff[2];
208 conversion.dst.transfer_unit = cmd_buff[3]; 415 conversion.dst.transfer_unit = cmd_buff[3];
209 conversion.dst.gap = cmd_buff[4]; 416 conversion.dst.gap = cmd_buff[4];
210 u32 dst_process_handle = cmd_buff[6];
211 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, "
212 "dst_process_handle=0x%08X", conversion.dst.image_size,
213 conversion.dst.transfer_unit, conversion.dst.gap,
214 dst_process_handle);
215 417
418 cmd_buff[0] = IPC::MakeHeader(0x18, 1, 0);
216 cmd_buff[1] = RESULT_SUCCESS.raw; 419 cmd_buff[1] = RESULT_SUCCESS.raw;
420
421 LOG_DEBUG(Service_Y2R, "called image_size=0x%08X, transfer_unit=%hu, transfer_stride=%hu, dst_process_handle=0x%08X",
422 conversion.dst.image_size, conversion.dst.transfer_unit, conversion.dst.gap, cmd_buff[6]);
423}
424
425/**
426 * Y2R::IsFinishedReceiving service function
427 * Output:
428 * 1 : Result of the function, 0 on success, otherwise error code
429 * 2 : u8, 0 = Not Finished, 1 = Finished
430 */
431static void IsFinishedReceiving(Service::Interface* self) {
432 u32* cmd_buff = Kernel::GetCommandBuffer();
433
434 cmd_buff[0] = IPC::MakeHeader(0x19, 2, 0);
435 cmd_buff[1] = RESULT_SUCCESS.raw;
436 cmd_buff[2] = 1;
437
438 LOG_WARNING(Service_Y2R, "(STUBBED) called");
217} 439}
218 440
219static void SetInputLineWidth(Service::Interface* self) { 441static void SetInputLineWidth(Service::Interface* self) {
220 u32* cmd_buff = Kernel::GetCommandBuffer(); 442 u32* cmd_buff = Kernel::GetCommandBuffer();
221 443
222 LOG_DEBUG(Service_Y2R, "called input_line_width=%u", cmd_buff[1]); 444 cmd_buff[0] = IPC::MakeHeader(0x1A, 1, 0);
223 cmd_buff[1] = conversion.SetInputLineWidth(cmd_buff[1]).raw; 445 cmd_buff[1] = conversion.SetInputLineWidth(cmd_buff[1]).raw;
446
447 LOG_DEBUG(Service_Y2R, "called input_line_width=%u", cmd_buff[1]);
448}
449
450static void GetInputLineWidth(Service::Interface* self) {
451 u32* cmd_buff = Kernel::GetCommandBuffer();
452
453 cmd_buff[0] = IPC::MakeHeader(0x1B, 2, 0);
454 cmd_buff[1] = RESULT_SUCCESS.raw;
455 cmd_buff[2] = conversion.input_line_width;
456
457 LOG_DEBUG(Service_Y2R, "called input_line_width=%u", conversion.input_line_width);
224} 458}
225 459
226static void SetInputLines(Service::Interface* self) { 460static void SetInputLines(Service::Interface* self) {
227 u32* cmd_buff = Kernel::GetCommandBuffer(); 461 u32* cmd_buff = Kernel::GetCommandBuffer();
228 462
229 LOG_DEBUG(Service_Y2R, "called input_line_number=%u", cmd_buff[1]); 463 cmd_buff[0] = IPC::MakeHeader(0x1C, 1, 0);
230 cmd_buff[1] = conversion.SetInputLines(cmd_buff[1]).raw; 464 cmd_buff[1] = conversion.SetInputLines(cmd_buff[1]).raw;
465
466 LOG_DEBUG(Service_Y2R, "called input_lines=%u", cmd_buff[1]);
467}
468
469static void GetInputLines(Service::Interface* self) {
470 u32* cmd_buff = Kernel::GetCommandBuffer();
471
472 cmd_buff[0] = IPC::MakeHeader(0x1D, 2, 0);
473 cmd_buff[1] = RESULT_SUCCESS.raw;
474 cmd_buff[2] = static_cast<u32>(conversion.input_lines);
475
476 LOG_DEBUG(Service_Y2R, "called input_lines=%u", conversion.input_lines);
231} 477}
232 478
233static void SetCoefficient(Service::Interface* self) { 479static void SetCoefficient(Service::Interface* self) {
@@ -235,45 +481,111 @@ static void SetCoefficient(Service::Interface* self) {
235 481
236 const u16* coefficients = reinterpret_cast<const u16*>(&cmd_buff[1]); 482 const u16* coefficients = reinterpret_cast<const u16*>(&cmd_buff[1]);
237 std::memcpy(conversion.coefficients.data(), coefficients, sizeof(CoefficientSet)); 483 std::memcpy(conversion.coefficients.data(), coefficients, sizeof(CoefficientSet));
484
485 cmd_buff[0] = IPC::MakeHeader(0x1E, 1, 0);
486 cmd_buff[1] = RESULT_SUCCESS.raw;
487
238 LOG_DEBUG(Service_Y2R, "called coefficients=[%hX, %hX, %hX, %hX, %hX, %hX, %hX, %hX]", 488 LOG_DEBUG(Service_Y2R, "called coefficients=[%hX, %hX, %hX, %hX, %hX, %hX, %hX, %hX]",
239 coefficients[0], coefficients[1], coefficients[2], coefficients[3], 489 coefficients[0], coefficients[1], coefficients[2], coefficients[3],
240 coefficients[4], coefficients[5], coefficients[6], coefficients[7]); 490 coefficients[4], coefficients[5], coefficients[6], coefficients[7]);
491}
241 492
493static void GetCoefficient(Service::Interface* self) {
494 u32* cmd_buff = Kernel::GetCommandBuffer();
495
496 cmd_buff[0] = IPC::MakeHeader(0x1F, 5, 0);
242 cmd_buff[1] = RESULT_SUCCESS.raw; 497 cmd_buff[1] = RESULT_SUCCESS.raw;
498 std::memcpy(&cmd_buff[2], conversion.coefficients.data(), sizeof(CoefficientSet));
499
500 LOG_DEBUG(Service_Y2R, "called");
243} 501}
244 502
245static void SetStandardCoefficient(Service::Interface* self) { 503static void SetStandardCoefficient(Service::Interface* self) {
246 u32* cmd_buff = Kernel::GetCommandBuffer(); 504 u32* cmd_buff = Kernel::GetCommandBuffer();
247 505
248 LOG_DEBUG(Service_Y2R, "called standard_coefficient=%u", cmd_buff[1]); 506 u32 index = cmd_buff[1];
507
508 cmd_buff[0] = IPC::MakeHeader(0x20, 1, 0);
509 cmd_buff[1] = conversion.SetStandardCoefficient((StandardCoefficient)index).raw;
510
511 LOG_DEBUG(Service_Y2R, "called standard_coefficient=%u", index);
512}
513
514static void GetStandardCoefficient(Service::Interface* self) {
515 u32* cmd_buff = Kernel::GetCommandBuffer();
516
517 u32 index = cmd_buff[1];
518
519 if (index < ARRAY_SIZE(standard_coefficients)) {
520 cmd_buff[0] = IPC::MakeHeader(0x21, 5, 0);
521 cmd_buff[1] = RESULT_SUCCESS.raw;
522 std::memcpy(&cmd_buff[2], &standard_coefficients[index], sizeof(CoefficientSet));
249 523
250 cmd_buff[1] = conversion.SetStandardCoefficient((StandardCoefficient)cmd_buff[1]).raw; 524 LOG_DEBUG(Service_Y2R, "called standard_coefficient=%u ", index);
525 } else {
526 cmd_buff[0] = IPC::MakeHeader(0x21, 1, 0);
527 cmd_buff[1] = -1; // TODO(bunnei): Identify the correct error code for this
528
529 LOG_ERROR(Service_Y2R, "called standard_coefficient=%u The argument is invalid!", index);
530 }
251} 531}
252 532
253static void SetAlpha(Service::Interface* self) { 533static void SetAlpha(Service::Interface* self) {
254 u32* cmd_buff = Kernel::GetCommandBuffer(); 534 u32* cmd_buff = Kernel::GetCommandBuffer();
255 535
256 conversion.alpha = cmd_buff[1]; 536 conversion.alpha = cmd_buff[1];
537
538 cmd_buff[0] = IPC::MakeHeader(0x22, 1, 0);
539 cmd_buff[1] = RESULT_SUCCESS.raw;
540
257 LOG_DEBUG(Service_Y2R, "called alpha=%hu", conversion.alpha); 541 LOG_DEBUG(Service_Y2R, "called alpha=%hu", conversion.alpha);
542}
543
544static void GetAlpha(Service::Interface* self) {
545 u32* cmd_buff = Kernel::GetCommandBuffer();
258 546
547 cmd_buff[0] = IPC::MakeHeader(0x23, 2, 0);
259 cmd_buff[1] = RESULT_SUCCESS.raw; 548 cmd_buff[1] = RESULT_SUCCESS.raw;
549 cmd_buff[2] = conversion.alpha;
550
551 LOG_DEBUG(Service_Y2R, "called alpha=%hu", conversion.alpha);
260} 552}
261 553
262static void StartConversion(Service::Interface* self) { 554static void SetDitheringWeightParams(Service::Interface* self) {
263 u32* cmd_buff = Kernel::GetCommandBuffer(); 555 u32* cmd_buff = Kernel::GetCommandBuffer();
556 std::memcpy(&dithering_weight_params, &cmd_buff[1], sizeof(DitheringWeightParams));
264 557
265 HW::Y2R::PerformConversion(conversion); 558 cmd_buff[0] = IPC::MakeHeader(0x24, 1, 0);
559 cmd_buff[1] = RESULT_SUCCESS.raw;
266 560
267 // dst_image_size would seem to be perfect for this, but it doesn't include the gap :( 561 LOG_DEBUG(Service_Y2R, "called");
268 u32 total_output_size = conversion.input_lines * 562}
269 (conversion.dst.transfer_unit + conversion.dst.gap); 563
270 VideoCore::g_renderer->Rasterizer()->InvalidateRegion( 564static void GetDitheringWeightParams(Service::Interface* self) {
271 Memory::VirtualToPhysicalAddress(conversion.dst.address), total_output_size); 565 u32* cmd_buff = Kernel::GetCommandBuffer();
566
567 cmd_buff[0] = IPC::MakeHeader(0x25, 9, 0);
568 cmd_buff[1] = RESULT_SUCCESS.raw;
569 std::memcpy(&cmd_buff[2], &dithering_weight_params, sizeof(DitheringWeightParams));
272 570
273 LOG_DEBUG(Service_Y2R, "called"); 571 LOG_DEBUG(Service_Y2R, "called");
572}
573
574static void StartConversion(Service::Interface* self) {
575 u32* cmd_buff = Kernel::GetCommandBuffer();
576
577 // dst_image_size would seem to be perfect for this, but it doesn't include the gap :(
578 u32 total_output_size = conversion.input_lines * (conversion.dst.transfer_unit + conversion.dst.gap);
579 Memory::RasterizerFlushAndInvalidateRegion(Memory::VirtualToPhysicalAddress(conversion.dst.address), total_output_size);
580
581 HW::Y2R::PerformConversion(conversion);
582
274 completion_event->Signal(); 583 completion_event->Signal();
275 584
585 cmd_buff[0] = IPC::MakeHeader(0x26, 1, 0);
276 cmd_buff[1] = RESULT_SUCCESS.raw; 586 cmd_buff[1] = RESULT_SUCCESS.raw;
587
588 LOG_DEBUG(Service_Y2R, "called");
277} 589}
278 590
279static void StopConversion(Service::Interface* self) { 591static void StopConversion(Service::Interface* self) {
@@ -281,6 +593,7 @@ static void StopConversion(Service::Interface* self) {
281 593
282 cmd_buff[0] = IPC::MakeHeader(0x27, 1, 0); 594 cmd_buff[0] = IPC::MakeHeader(0x27, 1, 0);
283 cmd_buff[1] = RESULT_SUCCESS.raw; 595 cmd_buff[1] = RESULT_SUCCESS.raw;
596
284 LOG_DEBUG(Service_Y2R, "called"); 597 LOG_DEBUG(Service_Y2R, "called");
285} 598}
286 599
@@ -293,50 +606,61 @@ static void StopConversion(Service::Interface* self) {
293static void IsBusyConversion(Service::Interface* self) { 606static void IsBusyConversion(Service::Interface* self) {
294 u32* cmd_buff = Kernel::GetCommandBuffer(); 607 u32* cmd_buff = Kernel::GetCommandBuffer();
295 608
609 cmd_buff[0] = IPC::MakeHeader(0x28, 2, 0);
296 cmd_buff[1] = RESULT_SUCCESS.raw; 610 cmd_buff[1] = RESULT_SUCCESS.raw;
297 cmd_buff[2] = 0; // StartConversion always finishes immediately 611 cmd_buff[2] = 0; // StartConversion always finishes immediately
612
298 LOG_DEBUG(Service_Y2R, "called"); 613 LOG_DEBUG(Service_Y2R, "called");
299} 614}
300 615
301/** 616/**
302 * Y2R_U::SetConversionParams service function 617 * Y2R_U::SetPackageParameter service function
303 */ 618 */
304static void SetConversionParams(Service::Interface* self) { 619static void SetPackageParameter(Service::Interface* self) {
305 u32* cmd_buff = Kernel::GetCommandBuffer(); 620 u32* cmd_buff = Kernel::GetCommandBuffer();
306 621
307 auto params = reinterpret_cast<const ConversionParameters*>(&cmd_buff[1]); 622 auto params = reinterpret_cast<const ConversionParameters*>(&cmd_buff[1]);
308 LOG_DEBUG(Service_Y2R,
309 "called input_format=%hhu output_format=%hhu rotation=%hhu block_alignment=%hhu "
310 "input_line_width=%hu input_lines=%hu standard_coefficient=%hhu "
311 "reserved=%hhu alpha=%hX",
312 params->input_format, params->output_format, params->rotation, params->block_alignment,
313 params->input_line_width, params->input_lines, params->standard_coefficient,
314 params->reserved, params->alpha);
315
316 ResultCode result = RESULT_SUCCESS;
317 623
318 conversion.input_format = params->input_format; 624 conversion.input_format = params->input_format;
319 conversion.output_format = params->output_format; 625 conversion.output_format = params->output_format;
320 conversion.rotation = params->rotation; 626 conversion.rotation = params->rotation;
321 conversion.block_alignment = params->block_alignment; 627 conversion.block_alignment = params->block_alignment;
322 result = conversion.SetInputLineWidth(params->input_line_width); 628
323 if (result.IsError()) goto cleanup; 629 ResultCode result = conversion.SetInputLineWidth(params->input_line_width);
630
631 if (result.IsError())
632 goto cleanup;
633
324 result = conversion.SetInputLines(params->input_lines); 634 result = conversion.SetInputLines(params->input_lines);
325 if (result.IsError()) goto cleanup; 635
636 if (result.IsError())
637 goto cleanup;
638
326 result = conversion.SetStandardCoefficient(params->standard_coefficient); 639 result = conversion.SetStandardCoefficient(params->standard_coefficient);
327 if (result.IsError()) goto cleanup; 640
641 if (result.IsError())
642 goto cleanup;
643
644 conversion.padding = params->padding;
328 conversion.alpha = params->alpha; 645 conversion.alpha = params->alpha;
329 646
330cleanup: 647cleanup:
331 cmd_buff[0] = IPC::MakeHeader(0x29, 1, 0); 648 cmd_buff[0] = IPC::MakeHeader(0x29, 1, 0);
332 cmd_buff[1] = result.raw; 649 cmd_buff[1] = result.raw;
650
651 LOG_DEBUG(Service_Y2R, "called input_format=%hhu output_format=%hhu rotation=%hhu block_alignment=%hhu "
652 "input_line_width=%hu input_lines=%hu standard_coefficient=%hhu reserved=%hhu alpha=%hX",
653 params->input_format, params->output_format, params->rotation, params->block_alignment,
654 params->input_line_width, params->input_lines, params->standard_coefficient, params->padding, params->alpha);
333} 655}
334 656
335static void PingProcess(Service::Interface* self) { 657static void PingProcess(Service::Interface* self) {
336 u32* cmd_buff = Kernel::GetCommandBuffer(); 658 u32* cmd_buff = Kernel::GetCommandBuffer();
337 659
660 cmd_buff[0] = IPC::MakeHeader(0x2A, 2, 0);
338 cmd_buff[1] = RESULT_SUCCESS.raw; 661 cmd_buff[1] = RESULT_SUCCESS.raw;
339 cmd_buff[2] = 0; 662 cmd_buff[2] = 0;
663
340 LOG_WARNING(Service_Y2R, "(STUBBED) called"); 664 LOG_WARNING(Service_Y2R, "(STUBBED) called");
341} 665}
342 666
@@ -362,6 +686,7 @@ static void DriverInitialize(Service::Interface* self) {
362 686
363 cmd_buff[0] = IPC::MakeHeader(0x2B, 1, 0); 687 cmd_buff[0] = IPC::MakeHeader(0x2B, 1, 0);
364 cmd_buff[1] = RESULT_SUCCESS.raw; 688 cmd_buff[1] = RESULT_SUCCESS.raw;
689
365 LOG_DEBUG(Service_Y2R, "called"); 690 LOG_DEBUG(Service_Y2R, "called");
366} 691}
367 692
@@ -370,54 +695,67 @@ static void DriverFinalize(Service::Interface* self) {
370 695
371 cmd_buff[0] = IPC::MakeHeader(0x2C, 1, 0); 696 cmd_buff[0] = IPC::MakeHeader(0x2C, 1, 0);
372 cmd_buff[1] = RESULT_SUCCESS.raw; 697 cmd_buff[1] = RESULT_SUCCESS.raw;
698
699 LOG_DEBUG(Service_Y2R, "called");
700}
701
702
703static void GetPackageParameter(Service::Interface* self) {
704 u32* cmd_buff = Kernel::GetCommandBuffer();
705
706 cmd_buff[0] = IPC::MakeHeader(0x2D, 4, 0);
707 cmd_buff[1] = RESULT_SUCCESS.raw;
708 std::memcpy(&cmd_buff[2], &conversion, sizeof(ConversionParameters));
709
373 LOG_DEBUG(Service_Y2R, "called"); 710 LOG_DEBUG(Service_Y2R, "called");
374} 711}
375 712
376const Interface::FunctionInfo FunctionTable[] = { 713const Interface::FunctionInfo FunctionTable[] = {
377 {0x00010040, SetInputFormat, "SetInputFormat"}, 714 {0x00010040, SetInputFormat, "SetInputFormat"},
378 {0x00020000, nullptr, "GetInputFormat"}, 715 {0x00020000, GetInputFormat, "GetInputFormat"},
379 {0x00030040, SetOutputFormat, "SetOutputFormat"}, 716 {0x00030040, SetOutputFormat, "SetOutputFormat"},
380 {0x00040000, nullptr, "GetOutputFormat"}, 717 {0x00040000, GetOutputFormat, "GetOutputFormat"},
381 {0x00050040, SetRotation, "SetRotation"}, 718 {0x00050040, SetRotation, "SetRotation"},
382 {0x00060000, nullptr, "GetRotation"}, 719 {0x00060000, GetRotation, "GetRotation"},
383 {0x00070040, SetBlockAlignment, "SetBlockAlignment"}, 720 {0x00070040, SetBlockAlignment, "SetBlockAlignment"},
384 {0x00080000, nullptr, "GetBlockAlignment"}, 721 {0x00080000, GetBlockAlignment, "GetBlockAlignment"},
385 {0x00090040, nullptr, "SetSpacialDithering"}, 722 {0x00090040, SetSpacialDithering, "SetSpacialDithering"},
386 {0x000A0000, nullptr, "GetSpacialDithering"}, 723 {0x000A0000, GetSpacialDithering, "GetSpacialDithering"},
387 {0x000B0040, nullptr, "SetTemporalDithering"}, 724 {0x000B0040, SetTemporalDithering, "SetTemporalDithering"},
388 {0x000C0000, nullptr, "GetTemporalDithering"}, 725 {0x000C0000, GetTemporalDithering, "GetTemporalDithering"},
389 {0x000D0040, SetTransferEndInterrupt, "SetTransferEndInterrupt"}, 726 {0x000D0040, SetTransferEndInterrupt, "SetTransferEndInterrupt"},
727 {0x000E0000, GetTransferEndInterrupt, "GetTransferEndInterrupt"},
390 {0x000F0000, GetTransferEndEvent, "GetTransferEndEvent"}, 728 {0x000F0000, GetTransferEndEvent, "GetTransferEndEvent"},
391 {0x00100102, SetSendingY, "SetSendingY"}, 729 {0x00100102, SetSendingY, "SetSendingY"},
392 {0x00110102, SetSendingU, "SetSendingU"}, 730 {0x00110102, SetSendingU, "SetSendingU"},
393 {0x00120102, SetSendingV, "SetSendingV"}, 731 {0x00120102, SetSendingV, "SetSendingV"},
394 {0x00130102, SetSendingYUYV, "SetSendingYUYV"}, 732 {0x00130102, SetSendingYUYV, "SetSendingYUYV"},
395 {0x00140000, nullptr, "IsFinishedSendingYuv"}, 733 {0x00140000, IsFinishedSendingYuv, "IsFinishedSendingYuv"},
396 {0x00150000, nullptr, "IsFinishedSendingY"}, 734 {0x00150000, IsFinishedSendingY, "IsFinishedSendingY"},
397 {0x00160000, nullptr, "IsFinishedSendingU"}, 735 {0x00160000, IsFinishedSendingU, "IsFinishedSendingU"},
398 {0x00170000, nullptr, "IsFinishedSendingV"}, 736 {0x00170000, IsFinishedSendingV, "IsFinishedSendingV"},
399 {0x00180102, SetReceiving, "SetReceiving"}, 737 {0x00180102, SetReceiving, "SetReceiving"},
400 {0x00190000, nullptr, "IsFinishedReceiving"}, 738 {0x00190000, IsFinishedReceiving, "IsFinishedReceiving"},
401 {0x001A0040, SetInputLineWidth, "SetInputLineWidth"}, 739 {0x001A0040, SetInputLineWidth, "SetInputLineWidth"},
402 {0x001B0000, nullptr, "GetInputLineWidth"}, 740 {0x001B0000, GetInputLineWidth, "GetInputLineWidth"},
403 {0x001C0040, SetInputLines, "SetInputLines"}, 741 {0x001C0040, SetInputLines, "SetInputLines"},
404 {0x001D0000, nullptr, "GetInputLines"}, 742 {0x001D0000, GetInputLines, "GetInputLines"},
405 {0x001E0100, SetCoefficient, "SetCoefficient"}, 743 {0x001E0100, SetCoefficient, "SetCoefficient"},
406 {0x001F0000, nullptr, "GetCoefficient"}, 744 {0x001F0000, GetCoefficient, "GetCoefficient"},
407 {0x00200040, SetStandardCoefficient, "SetStandardCoefficient"}, 745 {0x00200040, SetStandardCoefficient, "SetStandardCoefficient"},
408 {0x00210040, nullptr, "GetStandardCoefficientParams"}, 746 {0x00210040, GetStandardCoefficient, "GetStandardCoefficient"},
409 {0x00220040, SetAlpha, "SetAlpha"}, 747 {0x00220040, SetAlpha, "SetAlpha"},
410 {0x00230000, nullptr, "GetAlpha"}, 748 {0x00230000, GetAlpha, "GetAlpha"},
411 {0x00240200, nullptr, "SetDitheringWeightParams"}, 749 {0x00240200, SetDitheringWeightParams,"SetDitheringWeightParams"},
412 {0x00250000, nullptr, "GetDitheringWeightParams"}, 750 {0x00250000, GetDitheringWeightParams,"GetDitheringWeightParams"},
413 {0x00260000, StartConversion, "StartConversion"}, 751 {0x00260000, StartConversion, "StartConversion"},
414 {0x00270000, StopConversion, "StopConversion"}, 752 {0x00270000, StopConversion, "StopConversion"},
415 {0x00280000, IsBusyConversion, "IsBusyConversion"}, 753 {0x00280000, IsBusyConversion, "IsBusyConversion"},
416 {0x002901C0, SetConversionParams, "SetConversionParams"}, 754 {0x002901C0, SetPackageParameter, "SetPackageParameter"},
417 {0x002A0000, PingProcess, "PingProcess"}, 755 {0x002A0000, PingProcess, "PingProcess"},
418 {0x002B0000, DriverInitialize, "DriverInitialize"}, 756 {0x002B0000, DriverInitialize, "DriverInitialize"},
419 {0x002C0000, DriverFinalize, "DriverFinalize"}, 757 {0x002C0000, DriverFinalize, "DriverFinalize"},
420 {0x002D0000, nullptr, "GetPackageParameter"}, 758 {0x002D0000, GetPackageParameter, "GetPackageParameter"},
421}; 759};
422 760
423//////////////////////////////////////////////////////////////////////////////////////////////////// 761////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/core/hle/service/y2r_u.h b/src/core/hle/service/y2r_u.h
index 3965a5545..95fa2fdb7 100644
--- a/src/core/hle/service/y2r_u.h
+++ b/src/core/hle/service/y2r_u.h
@@ -97,6 +97,7 @@ struct ConversionConfiguration {
97 u16 input_line_width; 97 u16 input_line_width;
98 u16 input_lines; 98 u16 input_lines;
99 CoefficientSet coefficients; 99 CoefficientSet coefficients;
100 u8 padding;
100 u16 alpha; 101 u16 alpha;
101 102
102 /// Input parameters for the Y (luma) plane 103 /// Input parameters for the Y (luma) plane
@@ -109,6 +110,25 @@ struct ConversionConfiguration {
109 ResultCode SetStandardCoefficient(StandardCoefficient standard_coefficient); 110 ResultCode SetStandardCoefficient(StandardCoefficient standard_coefficient);
110}; 111};
111 112
113struct DitheringWeightParams {
114 u16 w0_xEven_yEven;
115 u16 w0_xOdd_yEven;
116 u16 w0_xEven_yOdd;
117 u16 w0_xOdd_yOdd;
118 u16 w1_xEven_yEven;
119 u16 w1_xOdd_yEven;
120 u16 w1_xEven_yOdd;
121 u16 w1_xOdd_yOdd;
122 u16 w2_xEven_yEven;
123 u16 w2_xOdd_yEven;
124 u16 w2_xEven_yOdd;
125 u16 w2_xOdd_yOdd;
126 u16 w3_xEven_yEven;
127 u16 w3_xOdd_yEven;
128 u16 w3_xEven_yOdd;
129 u16 w3_xOdd_yOdd;
130};
131
112class Interface : public Service::Interface { 132class Interface : public Service::Interface {
113public: 133public:
114 Interface(); 134 Interface();
diff --git a/src/core/hle/svc.cpp b/src/core/hle/svc.cpp
index 1112a905e..fb2aecbf2 100644
--- a/src/core/hle/svc.cpp
+++ b/src/core/hle/svc.cpp
@@ -6,7 +6,6 @@
6 6
7#include "common/logging/log.h" 7#include "common/logging/log.h"
8#include "common/microprofile.h" 8#include "common/microprofile.h"
9#include "common/profiler.h"
10#include "common/string_util.h" 9#include "common/string_util.h"
11#include "common/symbols.h" 10#include "common/symbols.h"
12 11
@@ -1035,8 +1034,6 @@ static const FunctionDef SVC_Table[] = {
1035 {0x7D, HLE::Wrap<QueryProcessMemory>, "QueryProcessMemory"}, 1034 {0x7D, HLE::Wrap<QueryProcessMemory>, "QueryProcessMemory"},
1036}; 1035};
1037 1036
1038Common::Profiling::TimingCategory profiler_svc("SVC Calls");
1039
1040static const FunctionDef* GetSVCInfo(u32 func_num) { 1037static const FunctionDef* GetSVCInfo(u32 func_num) {
1041 if (func_num >= ARRAY_SIZE(SVC_Table)) { 1038 if (func_num >= ARRAY_SIZE(SVC_Table)) {
1042 LOG_ERROR(Kernel_SVC, "unknown svc=0x%02X", func_num); 1039 LOG_ERROR(Kernel_SVC, "unknown svc=0x%02X", func_num);
@@ -1048,7 +1045,6 @@ static const FunctionDef* GetSVCInfo(u32 func_num) {
1048MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70)); 1045MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70));
1049 1046
1050void CallSVC(u32 immediate) { 1047void CallSVC(u32 immediate) {
1051 Common::Profiling::ScopeTimer timer_svc(profiler_svc);
1052 MICROPROFILE_SCOPE(Kernel_SVC); 1048 MICROPROFILE_SCOPE(Kernel_SVC);
1053 1049
1054 const FunctionDef* info = GetSVCInfo(immediate); 1050 const FunctionDef* info = GetSVCInfo(immediate);
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index 7e2f9cdfa..2fe856293 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -115,21 +115,39 @@ inline void Write(u32 addr, const T data) {
115 u8* start = Memory::GetPhysicalPointer(config.GetStartAddress()); 115 u8* start = Memory::GetPhysicalPointer(config.GetStartAddress());
116 u8* end = Memory::GetPhysicalPointer(config.GetEndAddress()); 116 u8* end = Memory::GetPhysicalPointer(config.GetEndAddress());
117 117
118 if (config.fill_24bit) { 118 // TODO: Consider always accelerating and returning vector of
119 // fill with 24-bit values 119 // regions that the accelerated fill did not cover to
120 for (u8* ptr = start; ptr < end; ptr += 3) { 120 // reduce/eliminate the fill that the cpu has to do.
121 ptr[0] = config.value_24bit_r; 121 // This would also mean that the flush below is not needed.
122 ptr[1] = config.value_24bit_g; 122 // Fill should first flush all surfaces that touch but are
123 ptr[2] = config.value_24bit_b; 123 // not completely within the fill range.
124 // Then fill all completely covered surfaces, and return the
125 // regions that were between surfaces or within the touching
126 // ones for cpu to manually fill here.
127 if (!VideoCore::g_renderer->Rasterizer()->AccelerateFill(config)) {
128 Memory::RasterizerFlushAndInvalidateRegion(config.GetStartAddress(), config.GetEndAddress() - config.GetStartAddress());
129
130 if (config.fill_24bit) {
131 // fill with 24-bit values
132 for (u8* ptr = start; ptr < end; ptr += 3) {
133 ptr[0] = config.value_24bit_r;
134 ptr[1] = config.value_24bit_g;
135 ptr[2] = config.value_24bit_b;
136 }
137 } else if (config.fill_32bit) {
138 // fill with 32-bit values
139 if (end > start) {
140 u32 value = config.value_32bit;
141 size_t len = (end - start) / sizeof(u32);
142 for (size_t i = 0; i < len; ++i)
143 memcpy(&start[i * sizeof(u32)], &value, sizeof(u32));
144 }
145 } else {
146 // fill with 16-bit values
147 u16 value_16bit = config.value_16bit.Value();
148 for (u8* ptr = start; ptr < end; ptr += sizeof(u16))
149 memcpy(ptr, &value_16bit, sizeof(u16));
124 } 150 }
125 } else if (config.fill_32bit) {
126 // fill with 32-bit values
127 for (u32* ptr = (u32*)start; ptr < (u32*)end; ++ptr)
128 *ptr = config.value_32bit;
129 } else {
130 // fill with 16-bit values
131 for (u16* ptr = (u16*)start; ptr < (u16*)end; ++ptr)
132 *ptr = config.value_16bit;
133 } 151 }
134 152
135 LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), config.GetEndAddress()); 153 LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), config.GetEndAddress());
@@ -139,8 +157,6 @@ inline void Write(u32 addr, const T data) {
139 } else { 157 } else {
140 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC1); 158 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC1);
141 } 159 }
142
143 VideoCore::g_renderer->Rasterizer()->InvalidateRegion(config.GetStartAddress(), config.GetEndAddress() - config.GetStartAddress());
144 } 160 }
145 161
146 // Reset "trigger" flag and set the "finish" flag 162 // Reset "trigger" flag and set the "finish" flag
@@ -161,184 +177,185 @@ inline void Write(u32 addr, const T data) {
161 if (Pica::g_debug_context) 177 if (Pica::g_debug_context)
162 Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer, nullptr); 178 Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer, nullptr);
163 179
164 u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress()); 180 if (!VideoCore::g_renderer->Rasterizer()->AccelerateDisplayTransfer(config)) {
165 u8* dst_pointer = Memory::GetPhysicalPointer(config.GetPhysicalOutputAddress()); 181 u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress());
166 182 u8* dst_pointer = Memory::GetPhysicalPointer(config.GetPhysicalOutputAddress());
167 if (config.is_texture_copy) {
168 u32 input_width = config.texture_copy.input_width * 16;
169 u32 input_gap = config.texture_copy.input_gap * 16;
170 u32 output_width = config.texture_copy.output_width * 16;
171 u32 output_gap = config.texture_copy.output_gap * 16;
172
173 size_t contiguous_input_size = config.texture_copy.size / input_width * (input_width + input_gap);
174 VideoCore::g_renderer->Rasterizer()->FlushRegion(config.GetPhysicalInputAddress(), contiguous_input_size);
175
176 u32 remaining_size = config.texture_copy.size;
177 u32 remaining_input = input_width;
178 u32 remaining_output = output_width;
179 while (remaining_size > 0) {
180 u32 copy_size = std::min({ remaining_input, remaining_output, remaining_size });
181 183
182 std::memcpy(dst_pointer, src_pointer, copy_size); 184 if (config.is_texture_copy) {
183 src_pointer += copy_size; 185 u32 input_width = config.texture_copy.input_width * 16;
184 dst_pointer += copy_size; 186 u32 input_gap = config.texture_copy.input_gap * 16;
187 u32 output_width = config.texture_copy.output_width * 16;
188 u32 output_gap = config.texture_copy.output_gap * 16;
185 189
186 remaining_input -= copy_size; 190 size_t contiguous_input_size = config.texture_copy.size / input_width * (input_width + input_gap);
187 remaining_output -= copy_size; 191 Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), contiguous_input_size);
188 remaining_size -= copy_size;
189 192
190 if (remaining_input == 0) { 193 size_t contiguous_output_size = config.texture_copy.size / output_width * (output_width + output_gap);
191 remaining_input = input_width; 194 Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), contiguous_output_size);
192 src_pointer += input_gap;
193 }
194 if (remaining_output == 0) {
195 remaining_output = output_width;
196 dst_pointer += output_gap;
197 }
198 }
199 195
200 LOG_TRACE(HW_GPU, "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X", 196 u32 remaining_size = config.texture_copy.size;
201 config.texture_copy.size, 197 u32 remaining_input = input_width;
202 config.GetPhysicalInputAddress(), input_width, input_gap, 198 u32 remaining_output = output_width;
203 config.GetPhysicalOutputAddress(), output_width, output_gap, 199 while (remaining_size > 0) {
204 config.flags); 200 u32 copy_size = std::min({ remaining_input, remaining_output, remaining_size });
205 201
206 size_t contiguous_output_size = config.texture_copy.size / output_width * (output_width + output_gap); 202 std::memcpy(dst_pointer, src_pointer, copy_size);
207 VideoCore::g_renderer->Rasterizer()->InvalidateRegion(config.GetPhysicalOutputAddress(), contiguous_output_size); 203 src_pointer += copy_size;
204 dst_pointer += copy_size;
208 205
209 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); 206 remaining_input -= copy_size;
210 break; 207 remaining_output -= copy_size;
211 } 208 remaining_size -= copy_size;
212 209
213 if (config.scaling > config.ScaleXY) { 210 if (remaining_input == 0) {
214 LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", config.scaling.Value()); 211 remaining_input = input_width;
215 UNIMPLEMENTED(); 212 src_pointer += input_gap;
216 break; 213 }
217 } 214 if (remaining_output == 0) {
215 remaining_output = output_width;
216 dst_pointer += output_gap;
217 }
218 }
218 219
219 if (config.input_linear && config.scaling != config.NoScale) { 220 LOG_TRACE(HW_GPU, "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X",
220 LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input"); 221 config.texture_copy.size,
221 UNIMPLEMENTED(); 222 config.GetPhysicalInputAddress(), input_width, input_gap,
222 break; 223 config.GetPhysicalOutputAddress(), output_width, output_gap,
223 } 224 config.flags);
224 225
225 bool horizontal_scale = config.scaling != config.NoScale; 226 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF);
226 bool vertical_scale = config.scaling == config.ScaleXY; 227 break;
228 }
227 229
228 u32 output_width = config.output_width >> horizontal_scale; 230 if (config.scaling > config.ScaleXY) {
229 u32 output_height = config.output_height >> vertical_scale; 231 LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", config.scaling.Value());
232 UNIMPLEMENTED();
233 break;
234 }
230 235
231 u32 input_size = config.input_width * config.input_height * GPU::Regs::BytesPerPixel(config.input_format); 236 if (config.input_linear && config.scaling != config.NoScale) {
232 u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format); 237 LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input");
238 UNIMPLEMENTED();
239 break;
240 }
233 241
234 VideoCore::g_renderer->Rasterizer()->FlushRegion(config.GetPhysicalInputAddress(), input_size); 242 int horizontal_scale = config.scaling != config.NoScale ? 1 : 0;
243 int vertical_scale = config.scaling == config.ScaleXY ? 1 : 0;
235 244
236 for (u32 y = 0; y < output_height; ++y) { 245 u32 output_width = config.output_width >> horizontal_scale;
237 for (u32 x = 0; x < output_width; ++x) { 246 u32 output_height = config.output_height >> vertical_scale;
238 Math::Vec4<u8> src_color;
239 247
240 // Calculate the [x,y] position of the input image 248 u32 input_size = config.input_width * config.input_height * GPU::Regs::BytesPerPixel(config.input_format);
241 // based on the current output position and the scale 249 u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format);
242 u32 input_x = x << horizontal_scale;
243 u32 input_y = y << vertical_scale;
244 250
245 if (config.flip_vertically) { 251 Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), input_size);
246 // Flip the y value of the output data, 252 Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), output_size);
247 // we do this after calculating the [x,y] position of the input image
248 // to account for the scaling options.
249 y = output_height - y - 1;
250 }
251 253
252 u32 dst_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.output_format); 254 for (u32 y = 0; y < output_height; ++y) {
253 u32 src_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.input_format); 255 for (u32 x = 0; x < output_width; ++x) {
254 u32 src_offset; 256 Math::Vec4<u8> src_color;
255 u32 dst_offset;
256 257
257 if (config.input_linear) { 258 // Calculate the [x,y] position of the input image
258 if (!config.dont_swizzle) { 259 // based on the current output position and the scale
259 // Interpret the input as linear and the output as tiled 260 u32 input_x = x << horizontal_scale;
260 u32 coarse_y = y & ~7; 261 u32 input_y = y << vertical_scale;
261 u32 stride = output_width * dst_bytes_per_pixel;
262 262
263 src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel; 263 if (config.flip_vertically) {
264 dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + coarse_y * stride; 264 // Flip the y value of the output data,
265 } else { 265 // we do this after calculating the [x,y] position of the input image
266 // Both input and output are linear 266 // to account for the scaling options.
267 src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel; 267 y = output_height - y - 1;
268 dst_offset = (x + y * output_width) * dst_bytes_per_pixel;
269 } 268 }
270 } else {
271 if (!config.dont_swizzle) {
272 // Interpret the input as tiled and the output as linear
273 u32 coarse_y = input_y & ~7;
274 u32 stride = config.input_width * src_bytes_per_pixel;
275 269
276 src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + coarse_y * stride; 270 u32 dst_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.output_format);
277 dst_offset = (x + y * output_width) * dst_bytes_per_pixel; 271 u32 src_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.input_format);
272 u32 src_offset;
273 u32 dst_offset;
274
275 if (config.input_linear) {
276 if (!config.dont_swizzle) {
277 // Interpret the input as linear and the output as tiled
278 u32 coarse_y = y & ~7;
279 u32 stride = output_width * dst_bytes_per_pixel;
280
281 src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel;
282 dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + coarse_y * stride;
283 } else {
284 // Both input and output are linear
285 src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel;
286 dst_offset = (x + y * output_width) * dst_bytes_per_pixel;
287 }
278 } else { 288 } else {
279 // Both input and output are tiled 289 if (!config.dont_swizzle) {
280 u32 out_coarse_y = y & ~7; 290 // Interpret the input as tiled and the output as linear
281 u32 out_stride = output_width * dst_bytes_per_pixel; 291 u32 coarse_y = input_y & ~7;
282 292 u32 stride = config.input_width * src_bytes_per_pixel;
283 u32 in_coarse_y = input_y & ~7; 293
284 u32 in_stride = config.input_width * src_bytes_per_pixel; 294 src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + coarse_y * stride;
285 295 dst_offset = (x + y * output_width) * dst_bytes_per_pixel;
286 src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + in_coarse_y * in_stride; 296 } else {
287 dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + out_coarse_y * out_stride; 297 // Both input and output are tiled
298 u32 out_coarse_y = y & ~7;
299 u32 out_stride = output_width * dst_bytes_per_pixel;
300
301 u32 in_coarse_y = input_y & ~7;
302 u32 in_stride = config.input_width * src_bytes_per_pixel;
303
304 src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + in_coarse_y * in_stride;
305 dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + out_coarse_y * out_stride;
306 }
288 } 307 }
289 }
290 308
291 const u8* src_pixel = src_pointer + src_offset; 309 const u8* src_pixel = src_pointer + src_offset;
292 src_color = DecodePixel(config.input_format, src_pixel); 310 src_color = DecodePixel(config.input_format, src_pixel);
293 if (config.scaling == config.ScaleX) { 311 if (config.scaling == config.ScaleX) {
294 Math::Vec4<u8> pixel = DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel); 312 Math::Vec4<u8> pixel = DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel);
295 src_color = ((src_color + pixel) / 2).Cast<u8>(); 313 src_color = ((src_color + pixel) / 2).Cast<u8>();
296 } else if (config.scaling == config.ScaleXY) { 314 } else if (config.scaling == config.ScaleXY) {
297 Math::Vec4<u8> pixel1 = DecodePixel(config.input_format, src_pixel + 1 * src_bytes_per_pixel); 315 Math::Vec4<u8> pixel1 = DecodePixel(config.input_format, src_pixel + 1 * src_bytes_per_pixel);
298 Math::Vec4<u8> pixel2 = DecodePixel(config.input_format, src_pixel + 2 * src_bytes_per_pixel); 316 Math::Vec4<u8> pixel2 = DecodePixel(config.input_format, src_pixel + 2 * src_bytes_per_pixel);
299 Math::Vec4<u8> pixel3 = DecodePixel(config.input_format, src_pixel + 3 * src_bytes_per_pixel); 317 Math::Vec4<u8> pixel3 = DecodePixel(config.input_format, src_pixel + 3 * src_bytes_per_pixel);
300 src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast<u8>(); 318 src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast<u8>();
301 } 319 }
302 320
303 u8* dst_pixel = dst_pointer + dst_offset; 321 u8* dst_pixel = dst_pointer + dst_offset;
304 switch (config.output_format) { 322 switch (config.output_format) {
305 case Regs::PixelFormat::RGBA8: 323 case Regs::PixelFormat::RGBA8:
306 Color::EncodeRGBA8(src_color, dst_pixel); 324 Color::EncodeRGBA8(src_color, dst_pixel);
307 break; 325 break;
308 326
309 case Regs::PixelFormat::RGB8: 327 case Regs::PixelFormat::RGB8:
310 Color::EncodeRGB8(src_color, dst_pixel); 328 Color::EncodeRGB8(src_color, dst_pixel);
311 break; 329 break;
312 330
313 case Regs::PixelFormat::RGB565: 331 case Regs::PixelFormat::RGB565:
314 Color::EncodeRGB565(src_color, dst_pixel); 332 Color::EncodeRGB565(src_color, dst_pixel);
315 break; 333 break;
316 334
317 case Regs::PixelFormat::RGB5A1: 335 case Regs::PixelFormat::RGB5A1:
318 Color::EncodeRGB5A1(src_color, dst_pixel); 336 Color::EncodeRGB5A1(src_color, dst_pixel);
319 break; 337 break;
320 338
321 case Regs::PixelFormat::RGBA4: 339 case Regs::PixelFormat::RGBA4:
322 Color::EncodeRGBA4(src_color, dst_pixel); 340 Color::EncodeRGBA4(src_color, dst_pixel);
323 break; 341 break;
324 342
325 default: 343 default:
326 LOG_ERROR(HW_GPU, "Unknown destination framebuffer format %x", config.output_format.Value()); 344 LOG_ERROR(HW_GPU, "Unknown destination framebuffer format %x", config.output_format.Value());
327 break; 345 break;
346 }
328 } 347 }
329 } 348 }
330 }
331 349
332 LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), dst format %x, flags 0x%08X", 350 LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), dst format %x, flags 0x%08X",
333 config.output_height * output_width * GPU::Regs::BytesPerPixel(config.output_format), 351 config.output_height * output_width * GPU::Regs::BytesPerPixel(config.output_format),
334 config.GetPhysicalInputAddress(), config.input_width.Value(), config.input_height.Value(), 352 config.GetPhysicalInputAddress(), config.input_width.Value(), config.input_height.Value(),
335 config.GetPhysicalOutputAddress(), output_width, output_height, 353 config.GetPhysicalOutputAddress(), output_width, output_height,
336 config.output_format.Value(), config.flags); 354 config.output_format.Value(), config.flags);
355 }
337 356
338 g_regs.display_transfer_config.trigger = 0; 357 g_regs.display_transfer_config.trigger = 0;
339 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); 358 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF);
340
341 VideoCore::g_renderer->Rasterizer()->InvalidateRegion(config.GetPhysicalOutputAddress(), output_size);
342 } 359 }
343 break; 360 break;
344 } 361 }
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h
index a00adbf53..da4c345b4 100644
--- a/src/core/hw/gpu.h
+++ b/src/core/hw/gpu.h
@@ -78,7 +78,7 @@ struct Regs {
78 78
79 INSERT_PADDING_WORDS(0x4); 79 INSERT_PADDING_WORDS(0x4);
80 80
81 struct { 81 struct MemoryFillConfig {
82 u32 address_start; 82 u32 address_start;
83 u32 address_end; 83 u32 address_end;
84 84
@@ -165,7 +165,7 @@ struct Regs {
165 165
166 INSERT_PADDING_WORDS(0x169); 166 INSERT_PADDING_WORDS(0x169);
167 167
168 struct { 168 struct DisplayTransferConfig {
169 u32 input_address; 169 u32 input_address;
170 u32 output_address; 170 u32 output_address;
171 171
diff --git a/src/core/loader/ncch.cpp b/src/core/loader/ncch.cpp
index a4b47ef8c..066e91a9e 100644
--- a/src/core/loader/ncch.cpp
+++ b/src/core/loader/ncch.cpp
@@ -255,7 +255,7 @@ ResultStatus AppLoader_NCCH::Load() {
255 resource_limit_category = exheader_header.arm11_system_local_caps.resource_limit_category; 255 resource_limit_category = exheader_header.arm11_system_local_caps.resource_limit_category;
256 256
257 LOG_INFO(Loader, "Name: %s" , exheader_header.codeset_info.name); 257 LOG_INFO(Loader, "Name: %s" , exheader_header.codeset_info.name);
258 LOG_INFO(Loader, "Program ID: %016X" , ncch_header.program_id); 258 LOG_INFO(Loader, "Program ID: %016llX" , ncch_header.program_id);
259 LOG_DEBUG(Loader, "Code compressed: %s" , is_compressed ? "yes" : "no"); 259 LOG_DEBUG(Loader, "Code compressed: %s" , is_compressed ? "yes" : "no");
260 LOG_DEBUG(Loader, "Entry point: 0x%08X", entry_point); 260 LOG_DEBUG(Loader, "Entry point: 0x%08X", entry_point);
261 LOG_DEBUG(Loader, "Code size: 0x%08X", code_size); 261 LOG_DEBUG(Loader, "Code size: 0x%08X", code_size);
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 7de5bd15d..ee9b69f81 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -15,6 +15,9 @@
15#include "core/memory_setup.h" 15#include "core/memory_setup.h"
16#include "core/mmio.h" 16#include "core/mmio.h"
17 17
18#include "video_core/renderer_base.h"
19#include "video_core/video_core.h"
20
18namespace Memory { 21namespace Memory {
19 22
20enum class PageType { 23enum class PageType {
@@ -22,8 +25,12 @@ enum class PageType {
22 Unmapped, 25 Unmapped,
23 /// Page is mapped to regular memory. This is the only type you can get pointers to. 26 /// Page is mapped to regular memory. This is the only type you can get pointers to.
24 Memory, 27 Memory,
28 /// Page is mapped to regular memory, but also needs to check for rasterizer cache flushing and invalidation
29 RasterizerCachedMemory,
25 /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions. 30 /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions.
26 Special, 31 Special,
32 /// Page is mapped to a I/O region, but also needs to check for rasterizer cache flushing and invalidation
33 RasterizerCachedSpecial,
27}; 34};
28 35
29struct SpecialRegion { 36struct SpecialRegion {
@@ -57,6 +64,12 @@ struct PageTable {
57 * the corresponding entry in `pointers` MUST be set to null. 64 * the corresponding entry in `pointers` MUST be set to null.
58 */ 65 */
59 std::array<PageType, NUM_ENTRIES> attributes; 66 std::array<PageType, NUM_ENTRIES> attributes;
67
68 /**
69 * Indicates the number of externally cached resources touching a page that should be
70 * flushed before the memory is accessed
71 */
72 std::array<u8, NUM_ENTRIES> cached_res_count;
60}; 73};
61 74
62/// Singular page table used for the singleton process 75/// Singular page table used for the singleton process
@@ -72,8 +85,15 @@ static void MapPages(u32 base, u32 size, u8* memory, PageType type) {
72 while (base != end) { 85 while (base != end) {
73 ASSERT_MSG(base < PageTable::NUM_ENTRIES, "out of range mapping at %08X", base); 86 ASSERT_MSG(base < PageTable::NUM_ENTRIES, "out of range mapping at %08X", base);
74 87
88 // Since pages are unmapped on shutdown after video core is shutdown, the renderer may be null here
89 if (current_page_table->attributes[base] == PageType::RasterizerCachedMemory ||
90 current_page_table->attributes[base] == PageType::RasterizerCachedSpecial) {
91 RasterizerFlushAndInvalidateRegion(VirtualToPhysicalAddress(base << PAGE_BITS), PAGE_SIZE);
92 }
93
75 current_page_table->attributes[base] = type; 94 current_page_table->attributes[base] = type;
76 current_page_table->pointers[base] = memory; 95 current_page_table->pointers[base] = memory;
96 current_page_table->cached_res_count[base] = 0;
77 97
78 base += 1; 98 base += 1;
79 if (memory != nullptr) 99 if (memory != nullptr)
@@ -84,6 +104,7 @@ static void MapPages(u32 base, u32 size, u8* memory, PageType type) {
84void InitMemoryMap() { 104void InitMemoryMap() {
85 main_page_table.pointers.fill(nullptr); 105 main_page_table.pointers.fill(nullptr);
86 main_page_table.attributes.fill(PageType::Unmapped); 106 main_page_table.attributes.fill(PageType::Unmapped);
107 main_page_table.cached_res_count.fill(0);
87} 108}
88 109
89void MapMemoryRegion(VAddr base, u32 size, u8* target) { 110void MapMemoryRegion(VAddr base, u32 size, u8* target) {
@@ -107,6 +128,28 @@ void UnmapRegion(VAddr base, u32 size) {
107} 128}
108 129
109/** 130/**
131 * Gets a pointer to the exact memory at the virtual address (i.e. not page aligned)
132 * using a VMA from the current process
133 */
134static u8* GetPointerFromVMA(VAddr vaddr) {
135 u8* direct_pointer = nullptr;
136
137 auto& vma = Kernel::g_current_process->vm_manager.FindVMA(vaddr)->second;
138 switch (vma.type) {
139 case Kernel::VMAType::AllocatedMemoryBlock:
140 direct_pointer = vma.backing_block->data() + vma.offset;
141 break;
142 case Kernel::VMAType::BackingMemory:
143 direct_pointer = vma.backing_memory;
144 break;
145 default:
146 UNREACHABLE();
147 }
148
149 return direct_pointer + (vaddr - vma.base);
150}
151
152/**
110 * This function should only be called for virtual addreses with attribute `PageType::Special`. 153 * This function should only be called for virtual addreses with attribute `PageType::Special`.
111 */ 154 */
112static MMIORegionPointer GetMMIOHandler(VAddr vaddr) { 155static MMIORegionPointer GetMMIOHandler(VAddr vaddr) {
@@ -126,6 +169,7 @@ template <typename T>
126T Read(const VAddr vaddr) { 169T Read(const VAddr vaddr) {
127 const u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; 170 const u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS];
128 if (page_pointer) { 171 if (page_pointer) {
172 // NOTE: Avoid adding any extra logic to this fast-path block
129 T value; 173 T value;
130 std::memcpy(&value, &page_pointer[vaddr & PAGE_MASK], sizeof(T)); 174 std::memcpy(&value, &page_pointer[vaddr & PAGE_MASK], sizeof(T));
131 return value; 175 return value;
@@ -139,8 +183,22 @@ T Read(const VAddr vaddr) {
139 case PageType::Memory: 183 case PageType::Memory:
140 ASSERT_MSG(false, "Mapped memory page without a pointer @ %08X", vaddr); 184 ASSERT_MSG(false, "Mapped memory page without a pointer @ %08X", vaddr);
141 break; 185 break;
186 case PageType::RasterizerCachedMemory:
187 {
188 RasterizerFlushRegion(VirtualToPhysicalAddress(vaddr), sizeof(T));
189
190 T value;
191 std::memcpy(&value, GetPointerFromVMA(vaddr), sizeof(T));
192 return value;
193 }
142 case PageType::Special: 194 case PageType::Special:
143 return ReadMMIO<T>(GetMMIOHandler(vaddr), vaddr); 195 return ReadMMIO<T>(GetMMIOHandler(vaddr), vaddr);
196 case PageType::RasterizerCachedSpecial:
197 {
198 RasterizerFlushRegion(VirtualToPhysicalAddress(vaddr), sizeof(T));
199
200 return ReadMMIO<T>(GetMMIOHandler(vaddr), vaddr);
201 }
144 default: 202 default:
145 UNREACHABLE(); 203 UNREACHABLE();
146 } 204 }
@@ -153,6 +211,7 @@ template <typename T>
153void Write(const VAddr vaddr, const T data) { 211void Write(const VAddr vaddr, const T data) {
154 u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; 212 u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS];
155 if (page_pointer) { 213 if (page_pointer) {
214 // NOTE: Avoid adding any extra logic to this fast-path block
156 std::memcpy(&page_pointer[vaddr & PAGE_MASK], &data, sizeof(T)); 215 std::memcpy(&page_pointer[vaddr & PAGE_MASK], &data, sizeof(T));
157 return; 216 return;
158 } 217 }
@@ -165,9 +224,23 @@ void Write(const VAddr vaddr, const T data) {
165 case PageType::Memory: 224 case PageType::Memory:
166 ASSERT_MSG(false, "Mapped memory page without a pointer @ %08X", vaddr); 225 ASSERT_MSG(false, "Mapped memory page without a pointer @ %08X", vaddr);
167 break; 226 break;
227 case PageType::RasterizerCachedMemory:
228 {
229 RasterizerFlushAndInvalidateRegion(VirtualToPhysicalAddress(vaddr), sizeof(T));
230
231 std::memcpy(GetPointerFromVMA(vaddr), &data, sizeof(T));
232 break;
233 }
168 case PageType::Special: 234 case PageType::Special:
169 WriteMMIO<T>(GetMMIOHandler(vaddr), vaddr, data); 235 WriteMMIO<T>(GetMMIOHandler(vaddr), vaddr, data);
170 break; 236 break;
237 case PageType::RasterizerCachedSpecial:
238 {
239 RasterizerFlushAndInvalidateRegion(VirtualToPhysicalAddress(vaddr), sizeof(T));
240
241 WriteMMIO<T>(GetMMIOHandler(vaddr), vaddr, data);
242 break;
243 }
171 default: 244 default:
172 UNREACHABLE(); 245 UNREACHABLE();
173 } 246 }
@@ -179,6 +252,10 @@ u8* GetPointer(const VAddr vaddr) {
179 return page_pointer + (vaddr & PAGE_MASK); 252 return page_pointer + (vaddr & PAGE_MASK);
180 } 253 }
181 254
255 if (current_page_table->attributes[vaddr >> PAGE_BITS] == PageType::RasterizerCachedMemory) {
256 return GetPointerFromVMA(vaddr);
257 }
258
182 LOG_ERROR(HW_Memory, "unknown GetPointer @ 0x%08x", vaddr); 259 LOG_ERROR(HW_Memory, "unknown GetPointer @ 0x%08x", vaddr);
183 return nullptr; 260 return nullptr;
184} 261}
@@ -187,6 +264,69 @@ u8* GetPhysicalPointer(PAddr address) {
187 return GetPointer(PhysicalToVirtualAddress(address)); 264 return GetPointer(PhysicalToVirtualAddress(address));
188} 265}
189 266
267void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) {
268 if (start == 0) {
269 return;
270 }
271
272 u32 num_pages = ((start + size - 1) >> PAGE_BITS) - (start >> PAGE_BITS) + 1;
273 PAddr paddr = start;
274
275 for (unsigned i = 0; i < num_pages; ++i) {
276 VAddr vaddr = PhysicalToVirtualAddress(paddr);
277 u8& res_count = current_page_table->cached_res_count[vaddr >> PAGE_BITS];
278 ASSERT_MSG(count_delta <= UINT8_MAX - res_count, "Rasterizer resource cache counter overflow!");
279 ASSERT_MSG(count_delta >= -res_count, "Rasterizer resource cache counter underflow!");
280
281 // Switch page type to cached if now cached
282 if (res_count == 0) {
283 PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
284 switch (page_type) {
285 case PageType::Memory:
286 page_type = PageType::RasterizerCachedMemory;
287 current_page_table->pointers[vaddr >> PAGE_BITS] = nullptr;
288 break;
289 case PageType::Special:
290 page_type = PageType::RasterizerCachedSpecial;
291 break;
292 default:
293 UNREACHABLE();
294 }
295 }
296
297 res_count += count_delta;
298
299 // Switch page type to uncached if now uncached
300 if (res_count == 0) {
301 PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
302 switch (page_type) {
303 case PageType::RasterizerCachedMemory:
304 page_type = PageType::Memory;
305 current_page_table->pointers[vaddr >> PAGE_BITS] = GetPointerFromVMA(vaddr & ~PAGE_MASK);
306 break;
307 case PageType::RasterizerCachedSpecial:
308 page_type = PageType::Special;
309 break;
310 default:
311 UNREACHABLE();
312 }
313 }
314 paddr += PAGE_SIZE;
315 }
316}
317
318void RasterizerFlushRegion(PAddr start, u32 size) {
319 if (VideoCore::g_renderer != nullptr) {
320 VideoCore::g_renderer->Rasterizer()->FlushRegion(start, size);
321 }
322}
323
324void RasterizerFlushAndInvalidateRegion(PAddr start, u32 size) {
325 if (VideoCore::g_renderer != nullptr) {
326 VideoCore::g_renderer->Rasterizer()->FlushAndInvalidateRegion(start, size);
327 }
328}
329
190u8 Read8(const VAddr addr) { 330u8 Read8(const VAddr addr) {
191 return Read<u8>(addr); 331 return Read<u8>(addr);
192} 332}
diff --git a/src/core/memory.h b/src/core/memory.h
index 5af72b7a7..9caa3c3f5 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -148,4 +148,20 @@ VAddr PhysicalToVirtualAddress(PAddr addr);
148 */ 148 */
149u8* GetPhysicalPointer(PAddr address); 149u8* GetPhysicalPointer(PAddr address);
150 150
151/**
152 * Adds the supplied value to the rasterizer resource cache counter of each
153 * page touching the region.
154 */
155void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta);
156
157/**
158 * Flushes any externally cached rasterizer resources touching the given region.
159 */
160void RasterizerFlushRegion(PAddr start, u32 size);
161
162/**
163 * Flushes and invalidates any externally cached rasterizer resources touching the given region.
164 */
165void RasterizerFlushAndInvalidateRegion(PAddr start, u32 size);
166
151} 167}
diff --git a/src/core/settings.cpp b/src/core/settings.cpp
index 1aa26fbd2..eaf5c8461 100644
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -19,7 +19,7 @@ void Apply() {
19 19
20 VideoCore::g_hw_renderer_enabled = values.use_hw_renderer; 20 VideoCore::g_hw_renderer_enabled = values.use_hw_renderer;
21 VideoCore::g_shader_jit_enabled = values.use_shader_jit; 21 VideoCore::g_shader_jit_enabled = values.use_shader_jit;
22 22 VideoCore::g_scaled_resolution_enabled = values.use_scaled_resolution;
23} 23}
24 24
25} // namespace 25} // namespace
diff --git a/src/core/settings.h b/src/core/settings.h
index 4933a516d..d620d8461 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -55,6 +55,7 @@ struct Values {
55 // Renderer 55 // Renderer
56 bool use_hw_renderer; 56 bool use_hw_renderer;
57 bool use_shader_jit; 57 bool use_shader_jit;
58 bool use_scaled_resolution;
58 59
59 float bg_red; 60 float bg_red;
60 float bg_green; 61 float bg_green;
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 76cfd4f7d..de4082b1f 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -16,6 +16,7 @@ set(SRCS
16 shader/shader_interpreter.cpp 16 shader/shader_interpreter.cpp
17 swrasterizer.cpp 17 swrasterizer.cpp
18 utils.cpp 18 utils.cpp
19 vertex_loader.cpp
19 video_core.cpp 20 video_core.cpp
20 ) 21 )
21 22
@@ -43,6 +44,7 @@ set(HEADERS
43 shader/shader_interpreter.h 44 shader/shader_interpreter.h
44 swrasterizer.h 45 swrasterizer.h
45 utils.h 46 utils.h
47 vertex_loader.h
46 video_core.h 48 video_core.h
47 ) 49 )
48 50
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 3abe79c09..58883e374 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -7,7 +7,6 @@
7 7
8#include "common/alignment.h" 8#include "common/alignment.h"
9#include "common/microprofile.h" 9#include "common/microprofile.h"
10#include "common/profiler.h"
11 10
12#include "core/settings.h" 11#include "core/settings.h"
13#include "core/hle/service/gsp_gpu.h" 12#include "core/hle/service/gsp_gpu.h"
@@ -22,6 +21,7 @@
22#include "video_core/video_core.h" 21#include "video_core/video_core.h"
23#include "video_core/debug_utils/debug_utils.h" 22#include "video_core/debug_utils/debug_utils.h"
24#include "video_core/shader/shader_interpreter.h" 23#include "video_core/shader/shader_interpreter.h"
24#include "video_core/vertex_loader.h"
25 25
26namespace Pica { 26namespace Pica {
27 27
@@ -35,8 +35,6 @@ static int default_attr_counter = 0;
35 35
36static u32 default_attr_write_buffer[3]; 36static u32 default_attr_write_buffer[3];
37 37
38Common::Profiling::TimingCategory category_drawing("Drawing");
39
40// Expand a 4-bit mask to 4-byte mask, e.g. 0b0101 -> 0x00FF00FF 38// Expand a 4-bit mask to 4-byte mask, e.g. 0b0101 -> 0x00FF00FF
41static const u32 expand_bits_to_bytes[] = { 39static const u32 expand_bits_to_bytes[] = {
42 0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff, 40 0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff,
@@ -186,60 +184,19 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
186 case PICA_REG_INDEX(trigger_draw): 184 case PICA_REG_INDEX(trigger_draw):
187 case PICA_REG_INDEX(trigger_draw_indexed): 185 case PICA_REG_INDEX(trigger_draw_indexed):
188 { 186 {
189 Common::Profiling::ScopeTimer scope_timer(category_drawing);
190 MICROPROFILE_SCOPE(GPU_Drawing); 187 MICROPROFILE_SCOPE(GPU_Drawing);
191 188
192#if PICA_LOG_TEV 189#if PICA_LOG_TEV
193 DebugUtils::DumpTevStageConfig(regs.GetTevStages()); 190 DebugUtils::DumpTevStageConfig(regs.GetTevStages());
194#endif 191#endif
195
196 if (g_debug_context) 192 if (g_debug_context)
197 g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr); 193 g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr);
198 194
199 const auto& attribute_config = regs.vertex_attributes; 195 // Processes information about internal vertex attributes to figure out how a vertex is loaded.
200 const u32 base_address = attribute_config.GetPhysicalBaseAddress(); 196 // Later, these can be compiled and cached.
201 197 VertexLoader loader;
202 // Information about internal vertex attributes 198 const u32 base_address = regs.vertex_attributes.GetPhysicalBaseAddress();
203 u32 vertex_attribute_sources[16]; 199 loader.Setup(regs);
204 boost::fill(vertex_attribute_sources, 0xdeadbeef);
205 u32 vertex_attribute_strides[16] = {};
206 Regs::VertexAttributeFormat vertex_attribute_formats[16] = {};
207
208 u32 vertex_attribute_elements[16] = {};
209 u32 vertex_attribute_element_size[16] = {};
210
211 // Setup attribute data from loaders
212 for (int loader = 0; loader < 12; ++loader) {
213 const auto& loader_config = attribute_config.attribute_loaders[loader];
214
215 u32 offset = 0;
216
217 // TODO: What happens if a loader overwrites a previous one's data?
218 for (unsigned component = 0; component < loader_config.component_count; ++component) {
219 if (component >= 12) {
220 LOG_ERROR(HW_GPU, "Overflow in the vertex attribute loader %u trying to load component %u", loader, component);
221 continue;
222 }
223
224 u32 attribute_index = loader_config.GetComponent(component);
225 if (attribute_index < 12) {
226 int element_size = attribute_config.GetElementSizeInBytes(attribute_index);
227 offset = Common::AlignUp(offset, element_size);
228 vertex_attribute_sources[attribute_index] = base_address + loader_config.data_offset + offset;
229 vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count);
230 vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index);
231 vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index);
232 vertex_attribute_element_size[attribute_index] = element_size;
233 offset += attribute_config.GetStride(attribute_index);
234 } else if (attribute_index < 16) {
235 // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively
236 offset = Common::AlignUp(offset, 4);
237 offset += (attribute_index - 11) * 4;
238 } else {
239 UNREACHABLE(); // This is truly unreachable due to the number of bits for each component
240 }
241 }
242 }
243 200
244 // Load vertices 201 // Load vertices
245 bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed)); 202 bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed));
@@ -263,32 +220,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
263 } 220 }
264 } 221 }
265 222
266 class { 223 DebugUtils::MemoryAccessTracker memory_accesses;
267 /// Combine overlapping and close ranges
268 void SimplifyRanges() {
269 for (auto it = ranges.begin(); it != ranges.end(); ++it) {
270 // NOTE: We add 32 to the range end address to make sure "close" ranges are combined, too
271 auto it2 = std::next(it);
272 while (it2 != ranges.end() && it->first + it->second + 32 >= it2->first) {
273 it->second = std::max(it->second, it2->first + it2->second - it->first);
274 it2 = ranges.erase(it2);
275 }
276 }
277 }
278
279 public:
280 /// Record a particular memory access in the list
281 void AddAccess(u32 paddr, u32 size) {
282 // Create new range or extend existing one
283 ranges[paddr] = std::max(ranges[paddr], size);
284
285 // Simplify ranges...
286 SimplifyRanges();
287 }
288
289 /// Map of accessed ranges (mapping start address to range size)
290 std::map<u32, u32> ranges;
291 } memory_accesses;
292 224
293 // Simple circular-replacement vertex cache 225 // Simple circular-replacement vertex cache
294 // The size has been tuned for optimal balance between hit-rate and the cost of lookup 226 // The size has been tuned for optimal balance between hit-rate and the cost of lookup
@@ -332,60 +264,13 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
332 if (!vertex_cache_hit) { 264 if (!vertex_cache_hit) {
333 // Initialize data for the current vertex 265 // Initialize data for the current vertex
334 Shader::InputVertex input; 266 Shader::InputVertex input;
335 267 loader.LoadVertex(base_address, index, vertex, input, memory_accesses);
336 for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) {
337 if (vertex_attribute_elements[i] != 0) {
338 // Default attribute values set if array elements have < 4 components. This
339 // is *not* carried over from the default attribute settings even if they're
340 // enabled for this attribute.
341 static const float24 zero = float24::FromFloat32(0.0f);
342 static const float24 one = float24::FromFloat32(1.0f);
343 input.attr[i] = Math::Vec4<float24>(zero, zero, zero, one);
344
345 // Load per-vertex data from the loader arrays
346 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
347 u32 source_addr = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i];
348 const u8* srcdata = Memory::GetPhysicalPointer(source_addr);
349
350 if (g_debug_context && Pica::g_debug_context->recorder) {
351 memory_accesses.AddAccess(source_addr,
352 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4
353 : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1);
354 }
355
356 const float srcval =
357 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE) ? *reinterpret_cast<const s8*>(srcdata) :
358 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? *reinterpret_cast<const u8*>(srcdata) :
359 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? *reinterpret_cast<const s16*>(srcdata) :
360 *reinterpret_cast<const float*>(srcdata);
361
362 input.attr[i][comp] = float24::FromFloat32(srcval);
363 LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f",
364 comp, i, vertex, index,
365 attribute_config.GetPhysicalBaseAddress(),
366 vertex_attribute_sources[i] - base_address,
367 vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i],
368 input.attr[i][comp].ToFloat32());
369 }
370 } else if (attribute_config.IsDefaultAttribute(i)) {
371 // Load the default attribute if we're configured to do so
372 input.attr[i] = g_state.vs.default_attributes[i];
373 LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)",
374 i, vertex, index,
375 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(),
376 input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
377 } else {
378 // TODO(yuriks): In this case, no data gets loaded and the vertex
379 // remains with the last value it had. This isn't currently maintained
380 // as global state, however, and so won't work in Citra yet.
381 }
382 }
383 268
384 if (g_debug_context) 269 if (g_debug_context)
385 g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input); 270 g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input);
386 271
387 // Send to vertex shader 272 // Send to vertex shader
388 output = Shader::Run(shader_unit, input, attribute_config.GetNumTotalAttributes()); 273 output = Shader::Run(shader_unit, input, loader.GetNumTotalAttributes());
389 274
390 if (is_indexed) { 275 if (is_indexed) {
391 vertex_cache[vertex_cache_pos] = output; 276 vertex_cache[vertex_cache_pos] = output;
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index c3a9c9598..178a566f7 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -40,15 +40,12 @@ using nihstro::DVLPHeader;
40 40
41namespace Pica { 41namespace Pica {
42 42
43void DebugContext::OnEvent(Event event, void* data) { 43void DebugContext::DoOnEvent(Event event, void* data) {
44 if (!breakpoints[event].enabled)
45 return;
46
47 { 44 {
48 std::unique_lock<std::mutex> lock(breakpoint_mutex); 45 std::unique_lock<std::mutex> lock(breakpoint_mutex);
49 46
50 // Commit the hardware renderer's framebuffer so it will show on debug widgets 47 // Commit the rasterizer's caches so framebuffers, render targets, etc. will show on debug widgets
51 VideoCore::g_renderer->Rasterizer()->FlushFramebuffer(); 48 VideoCore::g_renderer->Rasterizer()->FlushAll();
52 49
53 // TODO: Should stop the CPU thread here once we multithread emulation. 50 // TODO: Should stop the CPU thread here once we multithread emulation.
54 51
diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h
index 7df941619..dd0828cee 100644
--- a/src/video_core/debug_utils/debug_utils.h
+++ b/src/video_core/debug_utils/debug_utils.h
@@ -114,7 +114,15 @@ public:
114 * @param event Event which has happened 114 * @param event Event which has happened
115 * @param data Optional data pointer (pass nullptr if unused). Needs to remain valid until Resume() is called. 115 * @param data Optional data pointer (pass nullptr if unused). Needs to remain valid until Resume() is called.
116 */ 116 */
117 void OnEvent(Event event, void* data); 117 void OnEvent(Event event, void* data) {
118 // This check is left in the header to allow the compiler to inline it.
119 if (!breakpoints[(int)event].enabled)
120 return;
121 // For the rest of event handling, call a separate function.
122 DoOnEvent(event, data);
123 }
124
125 void DoOnEvent(Event event, void *data);
118 126
119 /** 127 /**
120 * Resume from the current breakpoint. 128 * Resume from the current breakpoint.
@@ -126,12 +134,14 @@ public:
126 * Delete all set breakpoints and resume emulation. 134 * Delete all set breakpoints and resume emulation.
127 */ 135 */
128 void ClearBreakpoints() { 136 void ClearBreakpoints() {
129 breakpoints.clear(); 137 for (auto &bp : breakpoints) {
138 bp.enabled = false;
139 }
130 Resume(); 140 Resume();
131 } 141 }
132 142
133 // TODO: Evaluate if access to these members should be hidden behind a public interface. 143 // TODO: Evaluate if access to these members should be hidden behind a public interface.
134 std::map<Event, BreakPoint> breakpoints; 144 std::array<BreakPoint, (int)Event::NumEvents> breakpoints;
135 Event active_breakpoint; 145 Event active_breakpoint;
136 bool at_breakpoint = false; 146 bool at_breakpoint = false;
137 147
@@ -206,6 +216,36 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data);
206 216
207void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages); 217void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages);
208 218
219/**
220 * Used in the vertex loader to merge access records. TODO: Investigate if actually useful.
221 */
222class MemoryAccessTracker {
223 /// Combine overlapping and close ranges
224 void SimplifyRanges() {
225 for (auto it = ranges.begin(); it != ranges.end(); ++it) {
226 // NOTE: We add 32 to the range end address to make sure "close" ranges are combined, too
227 auto it2 = std::next(it);
228 while (it2 != ranges.end() && it->first + it->second + 32 >= it2->first) {
229 it->second = std::max(it->second, it2->first + it2->second - it->first);
230 it2 = ranges.erase(it2);
231 }
232 }
233 }
234
235public:
236 /// Record a particular memory access in the list
237 void AddAccess(u32 paddr, u32 size) {
238 // Create new range or extend existing one
239 ranges[paddr] = std::max(ranges[paddr], size);
240
241 // Simplify ranges...
242 SimplifyRanges();
243 }
244
245 /// Map of accessed ranges (mapping start address to range size)
246 std::map<u32, u32> ranges;
247};
248
209} // namespace 249} // namespace
210 250
211} // namespace 251} // namespace
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 4552ff81c..cf130d7f8 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -577,7 +577,7 @@ struct Regs {
577 } 577 }
578 } 578 }
579 579
580 struct { 580 struct FramebufferConfig {
581 INSERT_PADDING_WORDS(0x3); 581 INSERT_PADDING_WORDS(0x3);
582 582
583 union { 583 union {
@@ -747,8 +747,13 @@ struct Regs {
747 case LightingSampler::ReflectGreen: 747 case LightingSampler::ReflectGreen:
748 case LightingSampler::ReflectBlue: 748 case LightingSampler::ReflectBlue:
749 return (config == LightingConfig::Config4) || (config == LightingConfig::Config5) || (config == LightingConfig::Config7); 749 return (config == LightingConfig::Config4) || (config == LightingConfig::Config5) || (config == LightingConfig::Config7);
750 default:
751 UNREACHABLE_MSG("Regs::IsLightingSamplerSupported: Reached "
752 "unreachable section, sampler should be one "
753 "of Distribution0, Distribution1, Fresnel, "
754 "ReflectRed, ReflectGreen or ReflectBlue, instead "
755 "got %i", static_cast<int>(config));
750 } 756 }
751 return false;
752 } 757 }
753 758
754 struct { 759 struct {
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 0434ad05a..9cf77b1f2 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -9,7 +9,6 @@
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "common/math_util.h" 10#include "common/math_util.h"
11#include "common/microprofile.h" 11#include "common/microprofile.h"
12#include "common/profiler.h"
13 12
14#include "core/memory.h" 13#include "core/memory.h"
15#include "core/hw/gpu.h" 14#include "core/hw/gpu.h"
@@ -287,7 +286,6 @@ static int SignedArea (const Math::Vec2<Fix12P4>& vtx1,
287 return Math::Cross(vec1, vec2).z; 286 return Math::Cross(vec1, vec2).z;
288}; 287};
289 288
290static Common::Profiling::TimingCategory rasterization_category("Rasterization");
291MICROPROFILE_DEFINE(GPU_Rasterization, "GPU", "Rasterization", MP_RGB(50, 50, 240)); 289MICROPROFILE_DEFINE(GPU_Rasterization, "GPU", "Rasterization", MP_RGB(50, 50, 240));
292 290
293/** 291/**
@@ -300,7 +298,6 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
300 bool reversed = false) 298 bool reversed = false)
301{ 299{
302 const auto& regs = g_state.regs; 300 const auto& regs = g_state.regs;
303 Common::Profiling::ScopeTimer timer(rasterization_category);
304 MICROPROFILE_SCOPE(GPU_Rasterization); 301 MICROPROFILE_SCOPE(GPU_Rasterization);
305 302
306 // vertex positions in rasterizer coordinates 303 // vertex positions in rasterizer coordinates
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 008c5827b..bf7101665 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -6,6 +6,10 @@
6 6
7#include "common/common_types.h" 7#include "common/common_types.h"
8 8
9#include "core/hw/gpu.h"
10
11struct ScreenInfo;
12
9namespace Pica { 13namespace Pica {
10namespace Shader { 14namespace Shader {
11struct OutputVertex; 15struct OutputVertex;
@@ -18,12 +22,6 @@ class RasterizerInterface {
18public: 22public:
19 virtual ~RasterizerInterface() {} 23 virtual ~RasterizerInterface() {}
20 24
21 /// Initialize API-specific GPU objects
22 virtual void InitObjects() = 0;
23
24 /// Reset the rasterizer, such as flushing all caches and updating all state
25 virtual void Reset() = 0;
26
27 /// Queues the primitive formed by the given vertices for rendering 25 /// Queues the primitive formed by the given vertices for rendering
28 virtual void AddTriangle(const Pica::Shader::OutputVertex& v0, 26 virtual void AddTriangle(const Pica::Shader::OutputVertex& v0,
29 const Pica::Shader::OutputVertex& v1, 27 const Pica::Shader::OutputVertex& v1,
@@ -32,17 +30,26 @@ public:
32 /// Draw the current batch of triangles 30 /// Draw the current batch of triangles
33 virtual void DrawTriangles() = 0; 31 virtual void DrawTriangles() = 0;
34 32
35 /// Commit the rasterizer's framebuffer contents immediately to the current 3DS memory framebuffer
36 virtual void FlushFramebuffer() = 0;
37
38 /// Notify rasterizer that the specified PICA register has been changed 33 /// Notify rasterizer that the specified PICA register has been changed
39 virtual void NotifyPicaRegisterChanged(u32 id) = 0; 34 virtual void NotifyPicaRegisterChanged(u32 id) = 0;
40 35
41 /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory. 36 /// Notify rasterizer that all caches should be flushed to 3DS memory
37 virtual void FlushAll() = 0;
38
39 /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory
42 virtual void FlushRegion(PAddr addr, u32 size) = 0; 40 virtual void FlushRegion(PAddr addr, u32 size) = 0;
43 41
44 /// Notify rasterizer that any caches of the specified region should be discraded and reloaded from 3DS memory. 42 /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory and invalidated
45 virtual void InvalidateRegion(PAddr addr, u32 size) = 0; 43 virtual void FlushAndInvalidateRegion(PAddr addr, u32 size) = 0;
44
45 /// Attempt to use a faster method to perform a display transfer
46 virtual bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { return false; }
47
48 /// Attempt to use a faster method to fill a region
49 virtual bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { return false; }
50
51 /// Attempt to use a faster method to display the framebuffer to screen
52 virtual bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) { return false; }
46}; 53};
47 54
48} 55}
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp
index 101f84eb9..ccd497de0 100644
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -21,7 +21,5 @@ void RendererBase::RefreshRasterizerSetting() {
21 } else { 21 } else {
22 rasterizer = std::make_unique<VideoCore::SWRasterizer>(); 22 rasterizer = std::make_unique<VideoCore::SWRasterizer>();
23 } 23 }
24 rasterizer->InitObjects();
25 rasterizer->Reset();
26 } 24 }
27} 25}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 6ca9f45e2..a8c775c80 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -11,7 +11,6 @@
11#include "common/file_util.h" 11#include "common/file_util.h"
12#include "common/math_util.h" 12#include "common/math_util.h"
13#include "common/microprofile.h" 13#include "common/microprofile.h"
14#include "common/profiler.h"
15 14
16#include "core/memory.h" 15#include "core/memory.h"
17#include "core/settings.h" 16#include "core/settings.h"
@@ -36,10 +35,7 @@ static bool IsPassThroughTevStage(const Pica::Regs::TevStageConfig& stage) {
36 stage.GetAlphaMultiplier() == 1); 35 stage.GetAlphaMultiplier() == 1);
37} 36}
38 37
39RasterizerOpenGL::RasterizerOpenGL() : cached_fb_color_addr(0), cached_fb_depth_addr(0) { } 38RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
40RasterizerOpenGL::~RasterizerOpenGL() { }
41
42void RasterizerOpenGL::InitObjects() {
43 // Create sampler objects 39 // Create sampler objects
44 for (size_t i = 0; i < texture_samplers.size(); ++i) { 40 for (size_t i = 0; i < texture_samplers.size(); ++i) {
45 texture_samplers[i].Create(); 41 texture_samplers[i].Create();
@@ -61,6 +57,10 @@ void RasterizerOpenGL::InitObjects() {
61 57
62 uniform_block_data.dirty = true; 58 uniform_block_data.dirty = true;
63 59
60 for (unsigned index = 0; index < lighting_luts.size(); index++) {
61 uniform_block_data.lut_dirty[index] = true;
62 }
63
64 // Set vertex attributes 64 // Set vertex attributes
65 glVertexAttribPointer(GLShader::ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position)); 65 glVertexAttribPointer(GLShader::ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position));
66 glEnableVertexAttribArray(GLShader::ATTRIBUTE_POSITION); 66 glEnableVertexAttribArray(GLShader::ATTRIBUTE_POSITION);
@@ -81,70 +81,24 @@ void RasterizerOpenGL::InitObjects() {
81 glVertexAttribPointer(GLShader::ATTRIBUTE_VIEW, 3, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, view)); 81 glVertexAttribPointer(GLShader::ATTRIBUTE_VIEW, 3, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, view));
82 glEnableVertexAttribArray(GLShader::ATTRIBUTE_VIEW); 82 glEnableVertexAttribArray(GLShader::ATTRIBUTE_VIEW);
83 83
84 SetShader(); 84 // Create render framebuffer
85
86 // Create textures for OGL framebuffer that will be rendered to, initially 1x1 to succeed in framebuffer creation
87 fb_color_texture.texture.Create();
88 ReconfigureColorTexture(fb_color_texture, Pica::Regs::ColorFormat::RGBA8, 1, 1);
89
90 state.texture_units[0].texture_2d = fb_color_texture.texture.handle;
91 state.Apply();
92
93 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
94 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
95 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
96 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
97 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
98
99 state.texture_units[0].texture_2d = 0;
100 state.Apply();
101
102 fb_depth_texture.texture.Create();
103 ReconfigureDepthTexture(fb_depth_texture, Pica::Regs::DepthFormat::D16, 1, 1);
104
105 state.texture_units[0].texture_2d = fb_depth_texture.texture.handle;
106 state.Apply();
107
108 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
109 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
110 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
111 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
112 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
113 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_FUNC, GL_LEQUAL);
114 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_MODE, GL_NONE);
115
116 state.texture_units[0].texture_2d = 0;
117 state.Apply();
118
119 // Configure OpenGL framebuffer
120 framebuffer.Create(); 85 framebuffer.Create();
121 86
122 state.draw.framebuffer = framebuffer.handle; 87 // Allocate and bind lighting lut textures
88 for (size_t i = 0; i < lighting_luts.size(); ++i) {
89 lighting_luts[i].Create();
90 state.lighting_luts[i].texture_1d = lighting_luts[i].handle;
91 }
123 state.Apply(); 92 state.Apply();
124 93
125 glActiveTexture(GL_TEXTURE0); 94 for (size_t i = 0; i < lighting_luts.size(); ++i) {
126 glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, fb_color_texture.texture.handle, 0);
127 glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, fb_depth_texture.texture.handle, 0);
128
129 for (size_t i = 0; i < lighting_lut.size(); ++i) {
130 lighting_lut[i].Create();
131 state.lighting_lut[i].texture_1d = lighting_lut[i].handle;
132
133 glActiveTexture(GL_TEXTURE3 + i); 95 glActiveTexture(GL_TEXTURE3 + i);
134 glBindTexture(GL_TEXTURE_1D, state.lighting_lut[i].texture_1d);
135
136 glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr); 96 glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr);
137 glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); 97 glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
138 glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); 98 glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
139 } 99 }
140 state.Apply();
141
142 GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER);
143 ASSERT_MSG(status == GL_FRAMEBUFFER_COMPLETE,
144 "OpenGL rasterizer framebuffer setup failed, status %X", status);
145}
146 100
147void RasterizerOpenGL::Reset() { 101 // Sync fixed function OpenGL state
148 SyncCullMode(); 102 SyncCullMode();
149 SyncDepthModifiers(); 103 SyncDepthModifiers();
150 SyncBlendEnabled(); 104 SyncBlendEnabled();
@@ -156,10 +110,10 @@ void RasterizerOpenGL::Reset() {
156 SyncColorWriteMask(); 110 SyncColorWriteMask();
157 SyncStencilWriteMask(); 111 SyncStencilWriteMask();
158 SyncDepthWriteMask(); 112 SyncDepthWriteMask();
113}
159 114
160 SetShader(); 115RasterizerOpenGL::~RasterizerOpenGL() {
161 116
162 res_cache.InvalidateAll();
163} 117}
164 118
165/** 119/**
@@ -196,47 +150,98 @@ void RasterizerOpenGL::DrawTriangles() {
196 if (vertex_batch.empty()) 150 if (vertex_batch.empty())
197 return; 151 return;
198 152
199 SyncFramebuffer(); 153 const auto& regs = Pica::g_state.regs;
200 SyncDrawState(); 154
155 // Sync and bind the framebuffer surfaces
156 CachedSurface* color_surface;
157 CachedSurface* depth_surface;
158 MathUtil::Rectangle<int> rect;
159 std::tie(color_surface, depth_surface, rect) = res_cache.GetFramebufferSurfaces(regs.framebuffer);
160
161 state.draw.draw_framebuffer = framebuffer.handle;
162 state.Apply();
163
164 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, color_surface != nullptr ? color_surface->texture.handle : 0, 0);
165 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, depth_surface != nullptr ? depth_surface->texture.handle : 0, 0);
166 bool has_stencil = regs.framebuffer.depth_format == Pica::Regs::DepthFormat::D24S8;
167 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, (has_stencil && depth_surface != nullptr) ? depth_surface->texture.handle : 0, 0);
168
169 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
170 return;
171 }
172
173 // Sync the viewport
174 // These registers hold half-width and half-height, so must be multiplied by 2
175 GLsizei viewport_width = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_x).ToFloat32() * 2;
176 GLsizei viewport_height = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_y).ToFloat32() * 2;
177
178 glViewport((GLint)(rect.left + regs.viewport_corner.x * color_surface->res_scale_width),
179 (GLint)(rect.bottom + regs.viewport_corner.y * color_surface->res_scale_height),
180 (GLsizei)(viewport_width * color_surface->res_scale_width), (GLsizei)(viewport_height * color_surface->res_scale_height));
181
182 // Sync and bind the texture surfaces
183 const auto pica_textures = regs.GetTextures();
184 for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) {
185 const auto& texture = pica_textures[texture_index];
186
187 if (texture.enabled) {
188 texture_samplers[texture_index].SyncWithConfig(texture.config);
189 CachedSurface* surface = res_cache.GetTextureSurface(texture);
190 if (surface != nullptr) {
191 state.texture_units[texture_index].texture_2d = surface->texture.handle;
192 } else {
193 // Can occur when texture addr is null or its memory is unmapped/invalid
194 state.texture_units[texture_index].texture_2d = 0;
195 }
196 } else {
197 state.texture_units[texture_index].texture_2d = 0;
198 }
199 }
201 200
202 if (state.draw.shader_dirty) { 201 // Sync and bind the shader
202 if (shader_dirty) {
203 SetShader(); 203 SetShader();
204 state.draw.shader_dirty = false; 204 shader_dirty = false;
205 } 205 }
206 206
207 for (unsigned index = 0; index < lighting_lut.size(); index++) { 207 // Sync the lighting luts
208 for (unsigned index = 0; index < lighting_luts.size(); index++) {
208 if (uniform_block_data.lut_dirty[index]) { 209 if (uniform_block_data.lut_dirty[index]) {
209 SyncLightingLUT(index); 210 SyncLightingLUT(index);
210 uniform_block_data.lut_dirty[index] = false; 211 uniform_block_data.lut_dirty[index] = false;
211 } 212 }
212 } 213 }
213 214
215 // Sync the uniform data
214 if (uniform_block_data.dirty) { 216 if (uniform_block_data.dirty) {
215 glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data, GL_STATIC_DRAW); 217 glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data, GL_STATIC_DRAW);
216 uniform_block_data.dirty = false; 218 uniform_block_data.dirty = false;
217 } 219 }
218 220
221 state.Apply();
222
223 // Draw the vertex batch
219 glBufferData(GL_ARRAY_BUFFER, vertex_batch.size() * sizeof(HardwareVertex), vertex_batch.data(), GL_STREAM_DRAW); 224 glBufferData(GL_ARRAY_BUFFER, vertex_batch.size() * sizeof(HardwareVertex), vertex_batch.data(), GL_STREAM_DRAW);
220 glDrawArrays(GL_TRIANGLES, 0, (GLsizei)vertex_batch.size()); 225 glDrawArrays(GL_TRIANGLES, 0, (GLsizei)vertex_batch.size());
221 226
222 vertex_batch.clear(); 227 // Mark framebuffer surfaces as dirty
223 228 // TODO: Restrict invalidation area to the viewport
224 // Flush the resource cache at the current depth and color framebuffer addresses for render-to-texture 229 if (color_surface != nullptr) {
225 const auto& regs = Pica::g_state.regs; 230 color_surface->dirty = true;
226 231 res_cache.FlushRegion(color_surface->addr, color_surface->size, color_surface, true);
227 u32 cached_fb_color_size = Pica::Regs::BytesPerColorPixel(fb_color_texture.format) 232 }
228 * fb_color_texture.width * fb_color_texture.height; 233 if (depth_surface != nullptr) {
229 234 depth_surface->dirty = true;
230 u32 cached_fb_depth_size = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format) 235 res_cache.FlushRegion(depth_surface->addr, depth_surface->size, depth_surface, true);
231 * fb_depth_texture.width * fb_depth_texture.height; 236 }
232 237
233 res_cache.InvalidateInRange(cached_fb_color_addr, cached_fb_color_size, true); 238 vertex_batch.clear();
234 res_cache.InvalidateInRange(cached_fb_depth_addr, cached_fb_depth_size, true);
235}
236 239
237void RasterizerOpenGL::FlushFramebuffer() { 240 // Unbind textures for potential future use as framebuffer attachments
238 CommitColorBuffer(); 241 for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) {
239 CommitDepthBuffer(); 242 state.texture_units[texture_index].texture_2d = 0;
243 }
244 state.Apply();
240} 245}
241 246
242void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { 247void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
@@ -268,7 +273,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
268 // Alpha test 273 // Alpha test
269 case PICA_REG_INDEX(output_merger.alpha_test): 274 case PICA_REG_INDEX(output_merger.alpha_test):
270 SyncAlphaTest(); 275 SyncAlphaTest();
271 state.draw.shader_dirty = true; 276 shader_dirty = true;
272 break; 277 break;
273 278
274 // Sync GL stencil test + stencil write mask 279 // Sync GL stencil test + stencil write mask
@@ -334,7 +339,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
334 case PICA_REG_INDEX(tev_stage5.color_op): 339 case PICA_REG_INDEX(tev_stage5.color_op):
335 case PICA_REG_INDEX(tev_stage5.color_scale): 340 case PICA_REG_INDEX(tev_stage5.color_scale):
336 case PICA_REG_INDEX(tev_combiner_buffer_input): 341 case PICA_REG_INDEX(tev_combiner_buffer_input):
337 state.draw.shader_dirty = true; 342 shader_dirty = true;
338 break; 343 break;
339 case PICA_REG_INDEX(tev_stage0.const_r): 344 case PICA_REG_INDEX(tev_stage0.const_r):
340 SyncTevConstColor(0, regs.tev_stage0); 345 SyncTevConstColor(0, regs.tev_stage0);
@@ -521,41 +526,257 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
521 } 526 }
522} 527}
523 528
529void RasterizerOpenGL::FlushAll() {
530 res_cache.FlushAll();
531}
532
524void RasterizerOpenGL::FlushRegion(PAddr addr, u32 size) { 533void RasterizerOpenGL::FlushRegion(PAddr addr, u32 size) {
525 const auto& regs = Pica::g_state.regs; 534 res_cache.FlushRegion(addr, size, nullptr, false);
535}
526 536
527 u32 cached_fb_color_size = Pica::Regs::BytesPerColorPixel(fb_color_texture.format) 537void RasterizerOpenGL::FlushAndInvalidateRegion(PAddr addr, u32 size) {
528 * fb_color_texture.width * fb_color_texture.height; 538 res_cache.FlushRegion(addr, size, nullptr, true);
539}
529 540
530 u32 cached_fb_depth_size = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format) 541bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) {
531 * fb_depth_texture.width * fb_depth_texture.height; 542 using PixelFormat = CachedSurface::PixelFormat;
543 using SurfaceType = CachedSurface::SurfaceType;
532 544
533 // If source memory region overlaps 3DS framebuffers, commit them before the copy happens 545 if (config.is_texture_copy) {
534 if (MathUtil::IntervalsIntersect(addr, size, cached_fb_color_addr, cached_fb_color_size)) 546 // TODO(tfarley): Try to hardware accelerate this
535 CommitColorBuffer(); 547 return false;
548 }
536 549
537 if (MathUtil::IntervalsIntersect(addr, size, cached_fb_depth_addr, cached_fb_depth_size)) 550 CachedSurface src_params;
538 CommitDepthBuffer(); 551 src_params.addr = config.GetPhysicalInputAddress();
552 src_params.width = config.output_width;
553 src_params.height = config.output_height;
554 src_params.is_tiled = !config.input_linear;
555 src_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.input_format);
556
557 CachedSurface dst_params;
558 dst_params.addr = config.GetPhysicalOutputAddress();
559 dst_params.width = config.scaling != config.NoScale ? config.output_width / 2 : config.output_width.Value();
560 dst_params.height = config.scaling == config.ScaleXY ? config.output_height / 2 : config.output_height.Value();
561 dst_params.is_tiled = config.input_linear != config.dont_swizzle;
562 dst_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.output_format);
563
564 MathUtil::Rectangle<int> src_rect;
565 CachedSurface* src_surface = res_cache.GetSurfaceRect(src_params, false, true, src_rect);
566
567 if (src_surface == nullptr) {
568 return false;
569 }
570
571 // Require destination surface to have same resolution scale as source to preserve scaling
572 dst_params.res_scale_width = src_surface->res_scale_width;
573 dst_params.res_scale_height = src_surface->res_scale_height;
574
575 MathUtil::Rectangle<int> dst_rect;
576 CachedSurface* dst_surface = res_cache.GetSurfaceRect(dst_params, true, false, dst_rect);
577
578 if (dst_surface == nullptr) {
579 return false;
580 }
581
582 // Don't accelerate if the src and dst surfaces are the same
583 if (src_surface == dst_surface) {
584 return false;
585 }
586
587 if (config.flip_vertically) {
588 std::swap(dst_rect.top, dst_rect.bottom);
589 }
590
591 if (!res_cache.TryBlitSurfaces(src_surface, src_rect, dst_surface, dst_rect)) {
592 return false;
593 }
594
595 u32 dst_size = dst_params.width * dst_params.height * CachedSurface::GetFormatBpp(dst_params.pixel_format) / 8;
596 dst_surface->dirty = true;
597 res_cache.FlushRegion(config.GetPhysicalOutputAddress(), dst_size, dst_surface, true);
598 return true;
539} 599}
540 600
541void RasterizerOpenGL::InvalidateRegion(PAddr addr, u32 size) { 601bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) {
542 const auto& regs = Pica::g_state.regs; 602 using PixelFormat = CachedSurface::PixelFormat;
603 using SurfaceType = CachedSurface::SurfaceType;
604
605 CachedSurface* dst_surface = res_cache.TryGetFillSurface(config);
606
607 if (dst_surface == nullptr) {
608 return false;
609 }
610
611 OpenGLState cur_state = OpenGLState::GetCurState();
612
613 SurfaceType dst_type = CachedSurface::GetFormatType(dst_surface->pixel_format);
543 614
544 u32 cached_fb_color_size = Pica::Regs::BytesPerColorPixel(fb_color_texture.format) 615 GLuint old_fb = cur_state.draw.draw_framebuffer;
545 * fb_color_texture.width * fb_color_texture.height; 616 cur_state.draw.draw_framebuffer = framebuffer.handle;
617 // TODO: When scissor test is implemented, need to disable scissor test in cur_state here so Clear call isn't affected
618 cur_state.Apply();
546 619
547 u32 cached_fb_depth_size = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format) 620 if (dst_type == SurfaceType::Color || dst_type == SurfaceType::Texture) {
548 * fb_depth_texture.width * fb_depth_texture.height; 621 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_surface->texture.handle, 0);
622 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
549 623
550 // If modified memory region overlaps 3DS framebuffers, reload their contents into OpenGL 624 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
551 if (MathUtil::IntervalsIntersect(addr, size, cached_fb_color_addr, cached_fb_color_size)) 625 return false;
552 ReloadColorBuffer(); 626 }
627
628 GLfloat color_values[4] = {0.0f, 0.0f, 0.0f, 0.0f};
629
630 // TODO: Handle additional pixel format and fill value size combinations to accelerate more cases
631 // For instance, checking if fill value's bytes/bits repeat to allow filling I8/A8/I4/A4/...
632 // Currently only handles formats that are multiples of the fill value size
633
634 if (config.fill_24bit) {
635 switch (dst_surface->pixel_format) {
636 case PixelFormat::RGB8:
637 color_values[0] = config.value_24bit_r / 255.0f;
638 color_values[1] = config.value_24bit_g / 255.0f;
639 color_values[2] = config.value_24bit_b / 255.0f;
640 break;
641 default:
642 return false;
643 }
644 } else if (config.fill_32bit) {
645 u32 value = config.value_32bit;
646
647 switch (dst_surface->pixel_format) {
648 case PixelFormat::RGBA8:
649 color_values[0] = (value >> 24) / 255.0f;
650 color_values[1] = ((value >> 16) & 0xFF) / 255.0f;
651 color_values[2] = ((value >> 8) & 0xFF) / 255.0f;
652 color_values[3] = (value & 0xFF) / 255.0f;
653 break;
654 default:
655 return false;
656 }
657 } else {
658 u16 value_16bit = config.value_16bit.Value();
659 Math::Vec4<u8> color;
660
661 switch (dst_surface->pixel_format) {
662 case PixelFormat::RGBA8:
663 color_values[0] = (value_16bit >> 8) / 255.0f;
664 color_values[1] = (value_16bit & 0xFF) / 255.0f;
665 color_values[2] = color_values[0];
666 color_values[3] = color_values[1];
667 break;
668 case PixelFormat::RGB5A1:
669 color = Color::DecodeRGB5A1((const u8*)&value_16bit);
670 color_values[0] = color[0] / 31.0f;
671 color_values[1] = color[1] / 31.0f;
672 color_values[2] = color[2] / 31.0f;
673 color_values[3] = color[3];
674 break;
675 case PixelFormat::RGB565:
676 color = Color::DecodeRGB565((const u8*)&value_16bit);
677 color_values[0] = color[0] / 31.0f;
678 color_values[1] = color[1] / 63.0f;
679 color_values[2] = color[2] / 31.0f;
680 break;
681 case PixelFormat::RGBA4:
682 color = Color::DecodeRGBA4((const u8*)&value_16bit);
683 color_values[0] = color[0] / 15.0f;
684 color_values[1] = color[1] / 15.0f;
685 color_values[2] = color[2] / 15.0f;
686 color_values[3] = color[3] / 15.0f;
687 break;
688 case PixelFormat::IA8:
689 case PixelFormat::RG8:
690 color_values[0] = (value_16bit >> 8) / 255.0f;
691 color_values[1] = (value_16bit & 0xFF) / 255.0f;
692 break;
693 default:
694 return false;
695 }
696 }
697
698 cur_state.color_mask.red_enabled = true;
699 cur_state.color_mask.green_enabled = true;
700 cur_state.color_mask.blue_enabled = true;
701 cur_state.color_mask.alpha_enabled = true;
702 cur_state.Apply();
703 glClearBufferfv(GL_COLOR, 0, color_values);
704 } else if (dst_type == SurfaceType::Depth) {
705 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
706 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_surface->texture.handle, 0);
707 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
708
709 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
710 return false;
711 }
712
713 GLfloat value_float;
714 if (dst_surface->pixel_format == CachedSurface::PixelFormat::D16) {
715 value_float = config.value_32bit / 65535.0f; // 2^16 - 1
716 } else if (dst_surface->pixel_format == CachedSurface::PixelFormat::D24) {
717 value_float = config.value_32bit / 16777215.0f; // 2^24 - 1
718 }
719
720 cur_state.depth.write_mask = true;
721 cur_state.Apply();
722 glClearBufferfv(GL_DEPTH, 0, &value_float);
723 } else if (dst_type == SurfaceType::DepthStencil) {
724 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
725 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, dst_surface->texture.handle, 0);
726
727 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
728 return false;
729 }
730
731 GLfloat value_float = (config.value_32bit & 0xFFFFFF) / 16777215.0f; // 2^24 - 1
732 GLint value_int = (config.value_32bit >> 24);
733
734 cur_state.depth.write_mask = true;
735 cur_state.stencil.write_mask = true;
736 cur_state.Apply();
737 glClearBufferfi(GL_DEPTH_STENCIL, 0, value_float, value_int);
738 }
553 739
554 if (MathUtil::IntervalsIntersect(addr, size, cached_fb_depth_addr, cached_fb_depth_size)) 740 cur_state.draw.draw_framebuffer = old_fb;
555 ReloadDepthBuffer(); 741 // TODO: Return scissor test to previous value when scissor test is implemented
742 cur_state.Apply();
556 743
557 // Notify cache of flush in case the region touches a cached resource 744 dst_surface->dirty = true;
558 res_cache.InvalidateInRange(addr, size); 745 res_cache.FlushRegion(dst_surface->addr, dst_surface->size, dst_surface, true);
746 return true;
747}
748
749bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) {
750 if (framebuffer_addr == 0) {
751 return false;
752 }
753
754 CachedSurface src_params;
755 src_params.addr = framebuffer_addr;
756 src_params.width = config.width;
757 src_params.height = config.height;
758 src_params.stride = pixel_stride;
759 src_params.is_tiled = false;
760 src_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.color_format);
761
762 MathUtil::Rectangle<int> src_rect;
763 CachedSurface* src_surface = res_cache.GetSurfaceRect(src_params, false, true, src_rect);
764
765 if (src_surface == nullptr) {
766 return false;
767 }
768
769 u32 scaled_width = src_surface->GetScaledWidth();
770 u32 scaled_height = src_surface->GetScaledHeight();
771
772 screen_info.display_texcoords = MathUtil::Rectangle<float>((float)src_rect.top / (float)scaled_height,
773 (float)src_rect.left / (float)scaled_width,
774 (float)src_rect.bottom / (float)scaled_height,
775 (float)src_rect.right / (float)scaled_width);
776
777 screen_info.display_texture = src_surface->texture.handle;
778
779 return true;
559} 780}
560 781
561void RasterizerOpenGL::SamplerInfo::Create() { 782void RasterizerOpenGL::SamplerInfo::Create() {
@@ -597,108 +818,6 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Pica::Regs::TextureConf
597 } 818 }
598} 819}
599 820
600void RasterizerOpenGL::ReconfigureColorTexture(TextureInfo& texture, Pica::Regs::ColorFormat format, u32 width, u32 height) {
601 GLint internal_format;
602
603 texture.format = format;
604 texture.width = width;
605 texture.height = height;
606
607 switch (format) {
608 case Pica::Regs::ColorFormat::RGBA8:
609 internal_format = GL_RGBA;
610 texture.gl_format = GL_RGBA;
611 texture.gl_type = GL_UNSIGNED_INT_8_8_8_8;
612 break;
613
614 case Pica::Regs::ColorFormat::RGB8:
615 // This pixel format uses BGR since GL_UNSIGNED_BYTE specifies byte-order, unlike every
616 // specific OpenGL type used in this function using native-endian (that is, little-endian
617 // mostly everywhere) for words or half-words.
618 // TODO: check how those behave on big-endian processors.
619 internal_format = GL_RGB;
620 texture.gl_format = GL_BGR;
621 texture.gl_type = GL_UNSIGNED_BYTE;
622 break;
623
624 case Pica::Regs::ColorFormat::RGB5A1:
625 internal_format = GL_RGBA;
626 texture.gl_format = GL_RGBA;
627 texture.gl_type = GL_UNSIGNED_SHORT_5_5_5_1;
628 break;
629
630 case Pica::Regs::ColorFormat::RGB565:
631 internal_format = GL_RGB;
632 texture.gl_format = GL_RGB;
633 texture.gl_type = GL_UNSIGNED_SHORT_5_6_5;
634 break;
635
636 case Pica::Regs::ColorFormat::RGBA4:
637 internal_format = GL_RGBA;
638 texture.gl_format = GL_RGBA;
639 texture.gl_type = GL_UNSIGNED_SHORT_4_4_4_4;
640 break;
641
642 default:
643 LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer texture color format %x", format);
644 UNIMPLEMENTED();
645 break;
646 }
647
648 state.texture_units[0].texture_2d = texture.texture.handle;
649 state.Apply();
650
651 glActiveTexture(GL_TEXTURE0);
652 glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0,
653 texture.gl_format, texture.gl_type, nullptr);
654
655 state.texture_units[0].texture_2d = 0;
656 state.Apply();
657}
658
659void RasterizerOpenGL::ReconfigureDepthTexture(DepthTextureInfo& texture, Pica::Regs::DepthFormat format, u32 width, u32 height) {
660 GLint internal_format;
661
662 texture.format = format;
663 texture.width = width;
664 texture.height = height;
665
666 switch (format) {
667 case Pica::Regs::DepthFormat::D16:
668 internal_format = GL_DEPTH_COMPONENT16;
669 texture.gl_format = GL_DEPTH_COMPONENT;
670 texture.gl_type = GL_UNSIGNED_SHORT;
671 break;
672
673 case Pica::Regs::DepthFormat::D24:
674 internal_format = GL_DEPTH_COMPONENT24;
675 texture.gl_format = GL_DEPTH_COMPONENT;
676 texture.gl_type = GL_UNSIGNED_INT;
677 break;
678
679 case Pica::Regs::DepthFormat::D24S8:
680 internal_format = GL_DEPTH24_STENCIL8;
681 texture.gl_format = GL_DEPTH_STENCIL;
682 texture.gl_type = GL_UNSIGNED_INT_24_8;
683 break;
684
685 default:
686 LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer texture depth format %x", format);
687 UNIMPLEMENTED();
688 break;
689 }
690
691 state.texture_units[0].texture_2d = texture.texture.handle;
692 state.Apply();
693
694 glActiveTexture(GL_TEXTURE0);
695 glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0,
696 texture.gl_format, texture.gl_type, nullptr);
697
698 state.texture_units[0].texture_2d = 0;
699 state.Apply();
700}
701
702void RasterizerOpenGL::SetShader() { 821void RasterizerOpenGL::SetShader() {
703 PicaShaderConfig config = PicaShaderConfig::CurrentConfig(); 822 PicaShaderConfig config = PicaShaderConfig::CurrentConfig();
704 std::unique_ptr<PicaShader> shader = std::make_unique<PicaShader>(); 823 std::unique_ptr<PicaShader> shader = std::make_unique<PicaShader>();
@@ -754,6 +873,8 @@ void RasterizerOpenGL::SetShader() {
754 873
755 SyncGlobalAmbient(); 874 SyncGlobalAmbient();
756 for (int light_index = 0; light_index < 8; light_index++) { 875 for (int light_index = 0; light_index < 8; light_index++) {
876 SyncLightSpecular0(light_index);
877 SyncLightSpecular1(light_index);
757 SyncLightDiffuse(light_index); 878 SyncLightDiffuse(light_index);
758 SyncLightAmbient(light_index); 879 SyncLightAmbient(light_index);
759 SyncLightPosition(light_index); 880 SyncLightPosition(light_index);
@@ -761,83 +882,6 @@ void RasterizerOpenGL::SetShader() {
761 } 882 }
762} 883}
763 884
764void RasterizerOpenGL::SyncFramebuffer() {
765 const auto& regs = Pica::g_state.regs;
766
767 PAddr new_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress();
768 Pica::Regs::ColorFormat new_fb_color_format = regs.framebuffer.color_format;
769
770 PAddr new_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress();
771 Pica::Regs::DepthFormat new_fb_depth_format = regs.framebuffer.depth_format;
772
773 bool fb_size_changed = fb_color_texture.width != static_cast<GLsizei>(regs.framebuffer.GetWidth()) ||
774 fb_color_texture.height != static_cast<GLsizei>(regs.framebuffer.GetHeight());
775
776 bool color_fb_prop_changed = fb_color_texture.format != new_fb_color_format ||
777 fb_size_changed;
778
779 bool depth_fb_prop_changed = fb_depth_texture.format != new_fb_depth_format ||
780 fb_size_changed;
781
782 bool color_fb_modified = cached_fb_color_addr != new_fb_color_addr ||
783 color_fb_prop_changed;
784
785 bool depth_fb_modified = cached_fb_depth_addr != new_fb_depth_addr ||
786 depth_fb_prop_changed;
787
788 // Commit if framebuffer modified in any way
789 if (color_fb_modified)
790 CommitColorBuffer();
791
792 if (depth_fb_modified)
793 CommitDepthBuffer();
794
795 // Reconfigure framebuffer textures if any property has changed
796 if (color_fb_prop_changed) {
797 ReconfigureColorTexture(fb_color_texture, new_fb_color_format,
798 regs.framebuffer.GetWidth(), regs.framebuffer.GetHeight());
799 }
800
801 if (depth_fb_prop_changed) {
802 ReconfigureDepthTexture(fb_depth_texture, new_fb_depth_format,
803 regs.framebuffer.GetWidth(), regs.framebuffer.GetHeight());
804
805 // Only attach depth buffer as stencil if it supports stencil
806 switch (new_fb_depth_format) {
807 case Pica::Regs::DepthFormat::D16:
808 case Pica::Regs::DepthFormat::D24:
809 glFramebufferTexture2D(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
810 break;
811
812 case Pica::Regs::DepthFormat::D24S8:
813 glFramebufferTexture2D(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, fb_depth_texture.texture.handle, 0);
814 break;
815
816 default:
817 LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer depth format %x", new_fb_depth_format);
818 UNIMPLEMENTED();
819 break;
820 }
821 }
822
823 // Load buffer data again if fb modified in any way
824 if (color_fb_modified) {
825 cached_fb_color_addr = new_fb_color_addr;
826
827 ReloadColorBuffer();
828 }
829
830 if (depth_fb_modified) {
831 cached_fb_depth_addr = new_fb_depth_addr;
832
833 ReloadDepthBuffer();
834 }
835
836 GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER);
837 ASSERT_MSG(status == GL_FRAMEBUFFER_COMPLETE,
838 "OpenGL rasterizer framebuffer setup failed, status %X", status);
839}
840
841void RasterizerOpenGL::SyncCullMode() { 885void RasterizerOpenGL::SyncCullMode() {
842 const auto& regs = Pica::g_state.regs; 886 const auto& regs = Pica::g_state.regs;
843 887
@@ -1034,229 +1078,3 @@ void RasterizerOpenGL::SyncLightPosition(int light_index) {
1034 uniform_block_data.dirty = true; 1078 uniform_block_data.dirty = true;
1035 } 1079 }
1036} 1080}
1037
1038void RasterizerOpenGL::SyncDrawState() {
1039 const auto& regs = Pica::g_state.regs;
1040
1041 // Sync the viewport
1042 GLsizei viewport_width = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_x).ToFloat32() * 2;
1043 GLsizei viewport_height = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_y).ToFloat32() * 2;
1044
1045 // OpenGL uses different y coordinates, so negate corner offset and flip origin
1046 // TODO: Ensure viewport_corner.x should not be negated or origin flipped
1047 // TODO: Use floating-point viewports for accuracy if supported
1048 glViewport((GLsizei)regs.viewport_corner.x,
1049 (GLsizei)regs.viewport_corner.y,
1050 viewport_width, viewport_height);
1051
1052 // Sync bound texture(s), upload if not cached
1053 const auto pica_textures = regs.GetTextures();
1054 for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) {
1055 const auto& texture = pica_textures[texture_index];
1056
1057 if (texture.enabled) {
1058 texture_samplers[texture_index].SyncWithConfig(texture.config);
1059 res_cache.LoadAndBindTexture(state, texture_index, texture);
1060 } else {
1061 state.texture_units[texture_index].texture_2d = 0;
1062 }
1063 }
1064
1065 state.draw.uniform_buffer = uniform_buffer.handle;
1066 state.Apply();
1067}
1068
1069MICROPROFILE_DEFINE(OpenGL_FramebufferReload, "OpenGL", "FB Reload", MP_RGB(70, 70, 200));
1070
1071void RasterizerOpenGL::ReloadColorBuffer() {
1072 u8* color_buffer = Memory::GetPhysicalPointer(cached_fb_color_addr);
1073
1074 if (color_buffer == nullptr)
1075 return;
1076
1077 MICROPROFILE_SCOPE(OpenGL_FramebufferReload);
1078
1079 u32 bytes_per_pixel = Pica::Regs::BytesPerColorPixel(fb_color_texture.format);
1080
1081 std::unique_ptr<u8[]> temp_fb_color_buffer(new u8[fb_color_texture.width * fb_color_texture.height * bytes_per_pixel]);
1082
1083 // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion is necessary.
1084 for (int y = 0; y < fb_color_texture.height; ++y) {
1085 for (int x = 0; x < fb_color_texture.width; ++x) {
1086 const u32 coarse_y = y & ~7;
1087 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_color_texture.width * bytes_per_pixel;
1088 u32 gl_pixel_index = (x + (fb_color_texture.height - 1 - y) * fb_color_texture.width) * bytes_per_pixel;
1089
1090 u8* pixel = color_buffer + dst_offset;
1091 memcpy(&temp_fb_color_buffer[gl_pixel_index], pixel, bytes_per_pixel);
1092 }
1093 }
1094
1095 state.texture_units[0].texture_2d = fb_color_texture.texture.handle;
1096 state.Apply();
1097
1098 glActiveTexture(GL_TEXTURE0);
1099 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, fb_color_texture.width, fb_color_texture.height,
1100 fb_color_texture.gl_format, fb_color_texture.gl_type, temp_fb_color_buffer.get());
1101
1102 state.texture_units[0].texture_2d = 0;
1103 state.Apply();
1104}
1105
1106void RasterizerOpenGL::ReloadDepthBuffer() {
1107 if (cached_fb_depth_addr == 0)
1108 return;
1109
1110 // TODO: Appears to work, but double-check endianness of depth values and order of depth-stencil
1111 u8* depth_buffer = Memory::GetPhysicalPointer(cached_fb_depth_addr);
1112
1113 if (depth_buffer == nullptr)
1114 return;
1115
1116 MICROPROFILE_SCOPE(OpenGL_FramebufferReload);
1117
1118 u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format);
1119
1120 // OpenGL needs 4 bpp alignment for D24
1121 u32 gl_bpp = bytes_per_pixel == 3 ? 4 : bytes_per_pixel;
1122
1123 std::unique_ptr<u8[]> temp_fb_depth_buffer(new u8[fb_depth_texture.width * fb_depth_texture.height * gl_bpp]);
1124
1125 u8* temp_fb_depth_data = bytes_per_pixel == 3 ? (temp_fb_depth_buffer.get() + 1) : temp_fb_depth_buffer.get();
1126
1127 if (fb_depth_texture.format == Pica::Regs::DepthFormat::D24S8) {
1128 for (int y = 0; y < fb_depth_texture.height; ++y) {
1129 for (int x = 0; x < fb_depth_texture.width; ++x) {
1130 const u32 coarse_y = y & ~7;
1131 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel;
1132 u32 gl_pixel_index = (x + (fb_depth_texture.height - 1 - y) * fb_depth_texture.width);
1133
1134 u8* pixel = depth_buffer + dst_offset;
1135 u32 depth_stencil = *(u32*)pixel;
1136 ((u32*)temp_fb_depth_data)[gl_pixel_index] = (depth_stencil << 8) | (depth_stencil >> 24);
1137 }
1138 }
1139 } else {
1140 for (int y = 0; y < fb_depth_texture.height; ++y) {
1141 for (int x = 0; x < fb_depth_texture.width; ++x) {
1142 const u32 coarse_y = y & ~7;
1143 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel;
1144 u32 gl_pixel_index = (x + (fb_depth_texture.height - 1 - y) * fb_depth_texture.width) * gl_bpp;
1145
1146 u8* pixel = depth_buffer + dst_offset;
1147 memcpy(&temp_fb_depth_data[gl_pixel_index], pixel, bytes_per_pixel);
1148 }
1149 }
1150 }
1151
1152 state.texture_units[0].texture_2d = fb_depth_texture.texture.handle;
1153 state.Apply();
1154
1155 glActiveTexture(GL_TEXTURE0);
1156 if (fb_depth_texture.format == Pica::Regs::DepthFormat::D24S8) {
1157 // TODO(Subv): There is a bug with Intel Windows drivers that makes glTexSubImage2D not change the stencil buffer.
1158 // The bug has been reported to Intel (https://communities.intel.com/message/324464)
1159 glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH24_STENCIL8, fb_depth_texture.width, fb_depth_texture.height, 0,
1160 GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, temp_fb_depth_buffer.get());
1161 } else {
1162 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, fb_depth_texture.width, fb_depth_texture.height,
1163 fb_depth_texture.gl_format, fb_depth_texture.gl_type, temp_fb_depth_buffer.get());
1164 }
1165
1166 state.texture_units[0].texture_2d = 0;
1167 state.Apply();
1168}
1169
1170Common::Profiling::TimingCategory buffer_commit_category("Framebuffer Commit");
1171MICROPROFILE_DEFINE(OpenGL_FramebufferCommit, "OpenGL", "FB Commit", MP_RGB(70, 70, 200));
1172
1173void RasterizerOpenGL::CommitColorBuffer() {
1174 if (cached_fb_color_addr != 0) {
1175 u8* color_buffer = Memory::GetPhysicalPointer(cached_fb_color_addr);
1176
1177 if (color_buffer != nullptr) {
1178 Common::Profiling::ScopeTimer timer(buffer_commit_category);
1179 MICROPROFILE_SCOPE(OpenGL_FramebufferCommit);
1180
1181 u32 bytes_per_pixel = Pica::Regs::BytesPerColorPixel(fb_color_texture.format);
1182
1183 std::unique_ptr<u8[]> temp_gl_color_buffer(new u8[fb_color_texture.width * fb_color_texture.height * bytes_per_pixel]);
1184
1185 state.texture_units[0].texture_2d = fb_color_texture.texture.handle;
1186 state.Apply();
1187
1188 glActiveTexture(GL_TEXTURE0);
1189 glGetTexImage(GL_TEXTURE_2D, 0, fb_color_texture.gl_format, fb_color_texture.gl_type, temp_gl_color_buffer.get());
1190
1191 state.texture_units[0].texture_2d = 0;
1192 state.Apply();
1193
1194 // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion is necessary.
1195 for (int y = 0; y < fb_color_texture.height; ++y) {
1196 for (int x = 0; x < fb_color_texture.width; ++x) {
1197 const u32 coarse_y = y & ~7;
1198 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_color_texture.width * bytes_per_pixel;
1199 u32 gl_pixel_index = x * bytes_per_pixel + (fb_color_texture.height - 1 - y) * fb_color_texture.width * bytes_per_pixel;
1200
1201 u8* pixel = color_buffer + dst_offset;
1202 memcpy(pixel, &temp_gl_color_buffer[gl_pixel_index], bytes_per_pixel);
1203 }
1204 }
1205 }
1206 }
1207}
1208
1209void RasterizerOpenGL::CommitDepthBuffer() {
1210 if (cached_fb_depth_addr != 0) {
1211 // TODO: Output seems correct visually, but doesn't quite match sw renderer output. One of them is wrong.
1212 u8* depth_buffer = Memory::GetPhysicalPointer(cached_fb_depth_addr);
1213
1214 if (depth_buffer != nullptr) {
1215 Common::Profiling::ScopeTimer timer(buffer_commit_category);
1216 MICROPROFILE_SCOPE(OpenGL_FramebufferCommit);
1217
1218 u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format);
1219
1220 // OpenGL needs 4 bpp alignment for D24
1221 u32 gl_bpp = bytes_per_pixel == 3 ? 4 : bytes_per_pixel;
1222
1223 std::unique_ptr<u8[]> temp_gl_depth_buffer(new u8[fb_depth_texture.width * fb_depth_texture.height * gl_bpp]);
1224
1225 state.texture_units[0].texture_2d = fb_depth_texture.texture.handle;
1226 state.Apply();
1227
1228 glActiveTexture(GL_TEXTURE0);
1229 glGetTexImage(GL_TEXTURE_2D, 0, fb_depth_texture.gl_format, fb_depth_texture.gl_type, temp_gl_depth_buffer.get());
1230
1231 state.texture_units[0].texture_2d = 0;
1232 state.Apply();
1233
1234 u8* temp_gl_depth_data = bytes_per_pixel == 3 ? (temp_gl_depth_buffer.get() + 1) : temp_gl_depth_buffer.get();
1235
1236 if (fb_depth_texture.format == Pica::Regs::DepthFormat::D24S8) {
1237 for (int y = 0; y < fb_depth_texture.height; ++y) {
1238 for (int x = 0; x < fb_depth_texture.width; ++x) {
1239 const u32 coarse_y = y & ~7;
1240 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel;
1241 u32 gl_pixel_index = (x + (fb_depth_texture.height - 1 - y) * fb_depth_texture.width);
1242
1243 u8* pixel = depth_buffer + dst_offset;
1244 u32 depth_stencil = ((u32*)temp_gl_depth_data)[gl_pixel_index];
1245 *(u32*)pixel = (depth_stencil >> 8) | (depth_stencil << 24);
1246 }
1247 }
1248 } else {
1249 for (int y = 0; y < fb_depth_texture.height; ++y) {
1250 for (int x = 0; x < fb_depth_texture.width; ++x) {
1251 const u32 coarse_y = y & ~7;
1252 u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel;
1253 u32 gl_pixel_index = (x + (fb_depth_texture.height - 1 - y) * fb_depth_texture.width) * gl_bpp;
1254
1255 u8* pixel = depth_buffer + dst_offset;
1256 memcpy(pixel, &temp_gl_depth_data[gl_pixel_index], bytes_per_pixel);
1257 }
1258 }
1259 }
1260 }
1261 }
1262}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 390349a0c..8d6177e88 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -19,6 +19,7 @@
19#include "video_core/renderer_opengl/gl_rasterizer_cache.h" 19#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
20#include "video_core/renderer_opengl/gl_state.h" 20#include "video_core/renderer_opengl/gl_state.h"
21#include "video_core/renderer_opengl/pica_to_gl.h" 21#include "video_core/renderer_opengl/pica_to_gl.h"
22#include "video_core/renderer_opengl/renderer_opengl.h"
22#include "video_core/shader/shader_interpreter.h" 23#include "video_core/shader/shader_interpreter.h"
23 24
24/** 25/**
@@ -191,16 +192,17 @@ public:
191 RasterizerOpenGL(); 192 RasterizerOpenGL();
192 ~RasterizerOpenGL() override; 193 ~RasterizerOpenGL() override;
193 194
194 void InitObjects() override;
195 void Reset() override;
196 void AddTriangle(const Pica::Shader::OutputVertex& v0, 195 void AddTriangle(const Pica::Shader::OutputVertex& v0,
197 const Pica::Shader::OutputVertex& v1, 196 const Pica::Shader::OutputVertex& v1,
198 const Pica::Shader::OutputVertex& v2) override; 197 const Pica::Shader::OutputVertex& v2) override;
199 void DrawTriangles() override; 198 void DrawTriangles() override;
200 void FlushFramebuffer() override;
201 void NotifyPicaRegisterChanged(u32 id) override; 199 void NotifyPicaRegisterChanged(u32 id) override;
200 void FlushAll() override;
202 void FlushRegion(PAddr addr, u32 size) override; 201 void FlushRegion(PAddr addr, u32 size) override;
203 void InvalidateRegion(PAddr addr, u32 size) override; 202 void FlushAndInvalidateRegion(PAddr addr, u32 size) override;
203 bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) override;
204 bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override;
205 bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) override;
204 206
205 /// OpenGL shader generated for a given Pica register state 207 /// OpenGL shader generated for a given Pica register state
206 struct PicaShader { 208 struct PicaShader {
@@ -210,26 +212,6 @@ public:
210 212
211private: 213private:
212 214
213 /// Structure used for storing information about color textures
214 struct TextureInfo {
215 OGLTexture texture;
216 GLsizei width;
217 GLsizei height;
218 Pica::Regs::ColorFormat format;
219 GLenum gl_format;
220 GLenum gl_type;
221 };
222
223 /// Structure used for storing information about depth textures
224 struct DepthTextureInfo {
225 OGLTexture texture;
226 GLsizei width;
227 GLsizei height;
228 Pica::Regs::DepthFormat format;
229 GLenum gl_format;
230 GLenum gl_type;
231 };
232
233 struct SamplerInfo { 215 struct SamplerInfo {
234 using TextureConfig = Pica::Regs::TextureConfig; 216 using TextureConfig = Pica::Regs::TextureConfig;
235 217
@@ -311,18 +293,9 @@ private:
311 static_assert(sizeof(UniformData) == 0x310, "The size of the UniformData structure has changed, update the structure in the shader"); 293 static_assert(sizeof(UniformData) == 0x310, "The size of the UniformData structure has changed, update the structure in the shader");
312 static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec"); 294 static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec");
313 295
314 /// Reconfigure the OpenGL color texture to use the given format and dimensions
315 void ReconfigureColorTexture(TextureInfo& texture, Pica::Regs::ColorFormat format, u32 width, u32 height);
316
317 /// Reconfigure the OpenGL depth texture to use the given format and dimensions
318 void ReconfigureDepthTexture(DepthTextureInfo& texture, Pica::Regs::DepthFormat format, u32 width, u32 height);
319
320 /// Sets the OpenGL shader in accordance with the current PICA register state 296 /// Sets the OpenGL shader in accordance with the current PICA register state
321 void SetShader(); 297 void SetShader();
322 298
323 /// Syncs the state and contents of the OpenGL framebuffer to match the current PICA framebuffer
324 void SyncFramebuffer();
325
326 /// Syncs the cull mode to match the PICA register 299 /// Syncs the cull mode to match the PICA register
327 void SyncCullMode(); 300 void SyncCullMode();
328 301
@@ -359,72 +332,42 @@ private:
359 /// Syncs the depth test states to match the PICA register 332 /// Syncs the depth test states to match the PICA register
360 void SyncDepthTest(); 333 void SyncDepthTest();
361 334
362 /// Syncs the TEV constant color to match the PICA register
363 void SyncTevConstColor(int tev_index, const Pica::Regs::TevStageConfig& tev_stage);
364
365 /// Syncs the TEV combiner color buffer to match the PICA register 335 /// Syncs the TEV combiner color buffer to match the PICA register
366 void SyncCombinerColor(); 336 void SyncCombinerColor();
367 337
338 /// Syncs the TEV constant color to match the PICA register
339 void SyncTevConstColor(int tev_index, const Pica::Regs::TevStageConfig& tev_stage);
340
368 /// Syncs the lighting global ambient color to match the PICA register 341 /// Syncs the lighting global ambient color to match the PICA register
369 void SyncGlobalAmbient(); 342 void SyncGlobalAmbient();
370 343
371 /// Syncs the lighting lookup tables 344 /// Syncs the lighting lookup tables
372 void SyncLightingLUT(unsigned index); 345 void SyncLightingLUT(unsigned index);
373 346
374 /// Syncs the specified light's diffuse color to match the PICA register
375 void SyncLightDiffuse(int light_index);
376
377 /// Syncs the specified light's ambient color to match the PICA register
378 void SyncLightAmbient(int light_index);
379
380 /// Syncs the specified light's position to match the PICA register
381 void SyncLightPosition(int light_index);
382
383 /// Syncs the specified light's specular 0 color to match the PICA register 347 /// Syncs the specified light's specular 0 color to match the PICA register
384 void SyncLightSpecular0(int light_index); 348 void SyncLightSpecular0(int light_index);
385 349
386 /// Syncs the specified light's specular 1 color to match the PICA register 350 /// Syncs the specified light's specular 1 color to match the PICA register
387 void SyncLightSpecular1(int light_index); 351 void SyncLightSpecular1(int light_index);
388 352
389 /// Syncs the remaining OpenGL drawing state to match the current PICA state 353 /// Syncs the specified light's diffuse color to match the PICA register
390 void SyncDrawState(); 354 void SyncLightDiffuse(int light_index);
391
392 /// Copies the 3DS color framebuffer into the OpenGL color framebuffer texture
393 void ReloadColorBuffer();
394 355
395 /// Copies the 3DS depth framebuffer into the OpenGL depth framebuffer texture 356 /// Syncs the specified light's ambient color to match the PICA register
396 void ReloadDepthBuffer(); 357 void SyncLightAmbient(int light_index);
397 358
398 /** 359 /// Syncs the specified light's position to match the PICA register
399 * Save the current OpenGL color framebuffer to the current PICA framebuffer in 3DS memory 360 void SyncLightPosition(int light_index);
400 * Loads the OpenGL framebuffer textures into temporary buffers
401 * Then copies into the 3DS framebuffer using proper Morton order
402 */
403 void CommitColorBuffer();
404 361
405 /** 362 OpenGLState state;
406 * Save the current OpenGL depth framebuffer to the current PICA framebuffer in 3DS memory
407 * Loads the OpenGL framebuffer textures into temporary buffers
408 * Then copies into the 3DS framebuffer using proper Morton order
409 */
410 void CommitDepthBuffer();
411 363
412 RasterizerCacheOpenGL res_cache; 364 RasterizerCacheOpenGL res_cache;
413 365
414 std::vector<HardwareVertex> vertex_batch; 366 std::vector<HardwareVertex> vertex_batch;
415 367
416 OpenGLState state;
417
418 PAddr cached_fb_color_addr;
419 PAddr cached_fb_depth_addr;
420
421 // Hardware rasterizer
422 std::array<SamplerInfo, 3> texture_samplers;
423 TextureInfo fb_color_texture;
424 DepthTextureInfo fb_depth_texture;
425
426 std::unordered_map<PicaShaderConfig, std::unique_ptr<PicaShader>> shader_cache; 368 std::unordered_map<PicaShaderConfig, std::unique_ptr<PicaShader>> shader_cache;
427 const PicaShader* current_shader = nullptr; 369 const PicaShader* current_shader = nullptr;
370 bool shader_dirty;
428 371
429 struct { 372 struct {
430 UniformData data; 373 UniformData data;
@@ -432,11 +375,12 @@ private:
432 bool dirty; 375 bool dirty;
433 } uniform_block_data; 376 } uniform_block_data;
434 377
378 std::array<SamplerInfo, 3> texture_samplers;
435 OGLVertexArray vertex_array; 379 OGLVertexArray vertex_array;
436 OGLBuffer vertex_buffer; 380 OGLBuffer vertex_buffer;
437 OGLBuffer uniform_buffer; 381 OGLBuffer uniform_buffer;
438 OGLFramebuffer framebuffer; 382 OGLFramebuffer framebuffer;
439 383
440 std::array<OGLTexture, 6> lighting_lut; 384 std::array<OGLTexture, 6> lighting_luts;
441 std::array<std::array<GLvec4, 256>, 6> lighting_lut_data; 385 std::array<std::array<GLvec4, 256>, 6> lighting_lut_data;
442}; 386};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 1323c12e4..55c2fb283 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -2,8 +2,9 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <memory> 5#include <unordered_set>
6 6
7#include "common/emu_window.h"
7#include "common/hash.h" 8#include "common/hash.h"
8#include "common/math_util.h" 9#include "common/math_util.h"
9#include "common/microprofile.h" 10#include "common/microprofile.h"
@@ -12,71 +13,693 @@
12#include "core/memory.h" 13#include "core/memory.h"
13 14
14#include "video_core/debug_utils/debug_utils.h" 15#include "video_core/debug_utils/debug_utils.h"
16#include "video_core/pica_state.h"
15#include "video_core/renderer_opengl/gl_rasterizer_cache.h" 17#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
16#include "video_core/renderer_opengl/pica_to_gl.h" 18#include "video_core/renderer_opengl/pica_to_gl.h"
19#include "video_core/utils.h"
20#include "video_core/video_core.h"
21
22struct FormatTuple {
23 GLint internal_format;
24 GLenum format;
25 GLenum type;
26};
27
28static const std::array<FormatTuple, 5> fb_format_tuples = {{
29 { GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8 }, // RGBA8
30 { GL_RGB8, GL_BGR, GL_UNSIGNED_BYTE }, // RGB8
31 { GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1 }, // RGB5A1
32 { GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5 }, // RGB565
33 { GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4 }, // RGBA4
34}};
35
36static const std::array<FormatTuple, 4> depth_format_tuples = {{
37 { GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT }, // D16
38 {},
39 { GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT }, // D24
40 { GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8 }, // D24S8
41}};
42
43RasterizerCacheOpenGL::RasterizerCacheOpenGL() {
44 transfer_framebuffers[0].Create();
45 transfer_framebuffers[1].Create();
46}
17 47
18RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { 48RasterizerCacheOpenGL::~RasterizerCacheOpenGL() {
19 InvalidateAll(); 49 FlushAll();
20} 50}
21 51
22MICROPROFILE_DEFINE(OpenGL_TextureUpload, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192)); 52static void MortonCopyPixels(CachedSurface::PixelFormat pixel_format, u32 width, u32 height, u32 bytes_per_pixel, u32 gl_bytes_per_pixel, u8* morton_data, u8* gl_data, bool morton_to_gl) {
53 using PixelFormat = CachedSurface::PixelFormat;
54
55 u8* data_ptrs[2];
56 u32 depth_stencil_shifts[2] = {24, 8};
23 57
24void RasterizerCacheOpenGL::LoadAndBindTexture(OpenGLState &state, unsigned texture_unit, const Pica::DebugUtils::TextureInfo& info) { 58 if (morton_to_gl) {
25 const auto cached_texture = texture_cache.find(info.physical_address); 59 std::swap(depth_stencil_shifts[0], depth_stencil_shifts[1]);
60 }
61
62 if (pixel_format == PixelFormat::D24S8) {
63 for (unsigned y = 0; y < height; ++y) {
64 for (unsigned x = 0; x < width; ++x) {
65 const u32 coarse_y = y & ~7;
66 u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel;
67 u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel;
68
69 data_ptrs[morton_to_gl] = morton_data + morton_offset;
70 data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index];
26 71
27 if (cached_texture != texture_cache.end()) { 72 // Swap depth and stencil value ordering since 3DS does not match OpenGL
28 state.texture_units[texture_unit].texture_2d = cached_texture->second->texture.handle; 73 u32 depth_stencil;
29 state.Apply(); 74 memcpy(&depth_stencil, data_ptrs[1], sizeof(u32));
75 depth_stencil = (depth_stencil << depth_stencil_shifts[0]) | (depth_stencil >> depth_stencil_shifts[1]);
76
77 memcpy(data_ptrs[0], &depth_stencil, sizeof(u32));
78 }
79 }
30 } else { 80 } else {
31 MICROPROFILE_SCOPE(OpenGL_TextureUpload); 81 for (unsigned y = 0; y < height; ++y) {
82 for (unsigned x = 0; x < width; ++x) {
83 const u32 coarse_y = y & ~7;
84 u32 morton_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel;
85 u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel;
86
87 data_ptrs[morton_to_gl] = morton_data + morton_offset;
88 data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index];
89
90 memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
91 }
92 }
93 }
94}
95
96bool RasterizerCacheOpenGL::BlitTextures(GLuint src_tex, GLuint dst_tex, CachedSurface::SurfaceType type, const MathUtil::Rectangle<int>& src_rect, const MathUtil::Rectangle<int>& dst_rect) {
97 using SurfaceType = CachedSurface::SurfaceType;
98
99 OpenGLState cur_state = OpenGLState::GetCurState();
100
101 // Make sure textures aren't bound to texture units, since going to bind them to framebuffer components
102 OpenGLState::ResetTexture(src_tex);
103 OpenGLState::ResetTexture(dst_tex);
104
105 // Keep track of previous framebuffer bindings
106 GLuint old_fbs[2] = { cur_state.draw.read_framebuffer, cur_state.draw.draw_framebuffer };
107 cur_state.draw.read_framebuffer = transfer_framebuffers[0].handle;
108 cur_state.draw.draw_framebuffer = transfer_framebuffers[1].handle;
109 cur_state.Apply();
110
111 u32 buffers = 0;
112
113 if (type == SurfaceType::Color || type == SurfaceType::Texture) {
114 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex, 0);
115 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
116
117 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex, 0);
118 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
119
120 buffers = GL_COLOR_BUFFER_BIT;
121 } else if (type == SurfaceType::Depth) {
122 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
123 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0);
124 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
125
126 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
127 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0);
128 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
129
130 buffers = GL_DEPTH_BUFFER_BIT;
131 } else if (type == SurfaceType::DepthStencil) {
132 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
133 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0);
134
135 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
136 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0);
137
138 buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
139 }
140
141 if (OpenGLState::CheckFBStatus(GL_READ_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
142 return false;
143 }
144
145 if (OpenGLState::CheckFBStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
146 return false;
147 }
148
149 glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
150 dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom,
151 buffers, buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST);
152
153 // Restore previous framebuffer bindings
154 cur_state.draw.read_framebuffer = old_fbs[0];
155 cur_state.draw.draw_framebuffer = old_fbs[1];
156 cur_state.Apply();
157
158 return true;
159}
160
161bool RasterizerCacheOpenGL::TryBlitSurfaces(CachedSurface* src_surface, const MathUtil::Rectangle<int>& src_rect, CachedSurface* dst_surface, const MathUtil::Rectangle<int>& dst_rect) {
162 using SurfaceType = CachedSurface::SurfaceType;
163
164 if (!CachedSurface::CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format)) {
165 return false;
166 }
167
168 return BlitTextures(src_surface->texture.handle, dst_surface->texture.handle, CachedSurface::GetFormatType(src_surface->pixel_format), src_rect, dst_rect);
169}
170
171static void AllocateSurfaceTexture(GLuint texture, CachedSurface::PixelFormat pixel_format, u32 width, u32 height) {
172 // Allocate an uninitialized texture of appropriate size and format for the surface
173 using SurfaceType = CachedSurface::SurfaceType;
174
175 OpenGLState cur_state = OpenGLState::GetCurState();
176
177 // Keep track of previous texture bindings
178 GLuint old_tex = cur_state.texture_units[0].texture_2d;
179 cur_state.texture_units[0].texture_2d = texture;
180 cur_state.Apply();
181 glActiveTexture(GL_TEXTURE0);
182
183 SurfaceType type = CachedSurface::GetFormatType(pixel_format);
184
185 FormatTuple tuple;
186 if (type == SurfaceType::Color) {
187 ASSERT((size_t)pixel_format < fb_format_tuples.size());
188 tuple = fb_format_tuples[(unsigned int)pixel_format];
189 } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) {
190 size_t tuple_idx = (size_t)pixel_format - 14;
191 ASSERT(tuple_idx < depth_format_tuples.size());
192 tuple = depth_format_tuples[tuple_idx];
193 } else {
194 tuple = { GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE };
195 }
196
197 glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, width, height, 0,
198 tuple.format, tuple.type, nullptr);
199
200 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
201 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
202 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
203 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
204
205 // Restore previous texture bindings
206 cur_state.texture_units[0].texture_2d = old_tex;
207 cur_state.Apply();
208}
209
210MICROPROFILE_DEFINE(OpenGL_SurfaceUpload, "OpenGL", "Surface Upload", MP_RGB(128, 64, 192));
211CachedSurface* RasterizerCacheOpenGL::GetSurface(const CachedSurface& params, bool match_res_scale, bool load_if_create) {
212 using PixelFormat = CachedSurface::PixelFormat;
213 using SurfaceType = CachedSurface::SurfaceType;
214
215 if (params.addr == 0) {
216 return nullptr;
217 }
218
219 u32 params_size = params.width * params.height * CachedSurface::GetFormatBpp(params.pixel_format) / 8;
220
221 // Check for an exact match in existing surfaces
222 CachedSurface* best_exact_surface = nullptr;
223 float exact_surface_goodness = -1.f;
224
225 auto surface_interval = boost::icl::interval<PAddr>::right_open(params.addr, params.addr + params_size);
226 auto range = surface_cache.equal_range(surface_interval);
227 for (auto it = range.first; it != range.second; ++it) {
228 for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) {
229 CachedSurface* surface = it2->get();
230
231 // Check if the request matches the surface exactly
232 if (params.addr == surface->addr &&
233 params.width == surface->width && params.height == surface->height &&
234 params.pixel_format == surface->pixel_format)
235 {
236 // Make sure optional param-matching criteria are fulfilled
237 bool tiling_match = (params.is_tiled == surface->is_tiled);
238 bool res_scale_match = (params.res_scale_width == surface->res_scale_width && params.res_scale_height == surface->res_scale_height);
239 if (!match_res_scale || res_scale_match) {
240 // Prioritize same-tiling and highest resolution surfaces
241 float match_goodness = (float)tiling_match + surface->res_scale_width * surface->res_scale_height;
242 if (match_goodness > exact_surface_goodness || surface->dirty) {
243 exact_surface_goodness = match_goodness;
244 best_exact_surface = surface;
245 }
246 }
247 }
248 }
249 }
250
251 // Return the best exact surface if found
252 if (best_exact_surface != nullptr) {
253 return best_exact_surface;
254 }
255
256 // No matching surfaces found, so create a new one
257 u8* texture_src_data = Memory::GetPhysicalPointer(params.addr);
258 if (texture_src_data == nullptr) {
259 return nullptr;
260 }
261
262 MICROPROFILE_SCOPE(OpenGL_SurfaceUpload);
263
264 std::shared_ptr<CachedSurface> new_surface = std::make_shared<CachedSurface>();
265
266 new_surface->addr = params.addr;
267 new_surface->size = params_size;
268
269 new_surface->texture.Create();
270 new_surface->width = params.width;
271 new_surface->height = params.height;
272 new_surface->stride = params.stride;
273 new_surface->res_scale_width = params.res_scale_width;
274 new_surface->res_scale_height = params.res_scale_height;
275
276 new_surface->is_tiled = params.is_tiled;
277 new_surface->pixel_format = params.pixel_format;
278 new_surface->dirty = false;
279
280 if (!load_if_create) {
281 // Don't load any data; just allocate the surface's texture
282 AllocateSurfaceTexture(new_surface->texture.handle, new_surface->pixel_format, new_surface->GetScaledWidth(), new_surface->GetScaledHeight());
283 } else {
284 // TODO: Consider attempting subrect match in existing surfaces and direct blit here instead of memory upload below if that's a common scenario in some game
285
286 Memory::RasterizerFlushRegion(params.addr, params_size);
287
288 // Load data from memory to the new surface
289 OpenGLState cur_state = OpenGLState::GetCurState();
290
291 GLuint old_tex = cur_state.texture_units[0].texture_2d;
292 cur_state.texture_units[0].texture_2d = new_surface->texture.handle;
293 cur_state.Apply();
294 glActiveTexture(GL_TEXTURE0);
295
296 glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)new_surface->stride);
297 if (!new_surface->is_tiled) {
298 // TODO: Ensure this will always be a color format, not a depth or other format
299 ASSERT((size_t)new_surface->pixel_format < fb_format_tuples.size());
300 const FormatTuple& tuple = fb_format_tuples[(unsigned int)params.pixel_format];
301
302 glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0,
303 tuple.format, tuple.type, texture_src_data);
304 } else {
305 SurfaceType type = CachedSurface::GetFormatType(new_surface->pixel_format);
306 if (type != SurfaceType::Depth && type != SurfaceType::DepthStencil) {
307 FormatTuple tuple;
308 if ((size_t)params.pixel_format < fb_format_tuples.size()) {
309 tuple = fb_format_tuples[(unsigned int)params.pixel_format];
310 } else {
311 // Texture
312 tuple = { GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE };
313 }
314
315 std::vector<Math::Vec4<u8>> tex_buffer(params.width * params.height);
316
317 Pica::DebugUtils::TextureInfo tex_info;
318 tex_info.width = params.width;
319 tex_info.height = params.height;
320 tex_info.stride = params.width * CachedSurface::GetFormatBpp(params.pixel_format) / 8;
321 tex_info.format = (Pica::Regs::TextureFormat)params.pixel_format;
322 tex_info.physical_address = params.addr;
323
324 for (unsigned y = 0; y < params.height; ++y) {
325 for (unsigned x = 0; x < params.width; ++x) {
326 tex_buffer[x + params.width * y] = Pica::DebugUtils::LookupTexture(texture_src_data, x, params.height - 1 - y, tex_info);
327 }
328 }
329
330 glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0, GL_RGBA, GL_UNSIGNED_BYTE, tex_buffer.data());
331 } else {
332 // Depth/Stencil formats need special treatment since they aren't sampleable using LookupTexture and can't use RGBA format
333 size_t tuple_idx = (size_t)params.pixel_format - 14;
334 ASSERT(tuple_idx < depth_format_tuples.size());
335 const FormatTuple& tuple = depth_format_tuples[tuple_idx];
336
337 u32 bytes_per_pixel = CachedSurface::GetFormatBpp(params.pixel_format) / 8;
338
339 // OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type
340 bool use_4bpp = (params.pixel_format == PixelFormat::D24);
341
342 u32 gl_bytes_per_pixel = use_4bpp ? 4 : bytes_per_pixel;
343
344 std::vector<u8> temp_fb_depth_buffer(params.width * params.height * gl_bytes_per_pixel);
345
346 u8* temp_fb_depth_buffer_ptr = use_4bpp ? temp_fb_depth_buffer.data() + 1 : temp_fb_depth_buffer.data();
347
348 MortonCopyPixels(params.pixel_format, params.width, params.height, bytes_per_pixel, gl_bytes_per_pixel, texture_src_data, temp_fb_depth_buffer_ptr, true);
349
350 glTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, params.width, params.height, 0,
351 tuple.format, tuple.type, temp_fb_depth_buffer.data());
352 }
353 }
354 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
32 355
33 std::unique_ptr<CachedTexture> new_texture = std::make_unique<CachedTexture>(); 356 // If not 1x scale, blit 1x texture to a new scaled texture and replace texture in surface
357 if (new_surface->res_scale_width != 1.f || new_surface->res_scale_height != 1.f) {
358 OGLTexture scaled_texture;
359 scaled_texture.Create();
34 360
35 new_texture->texture.Create(); 361 AllocateSurfaceTexture(scaled_texture.handle, new_surface->pixel_format, new_surface->GetScaledWidth(), new_surface->GetScaledHeight());
36 state.texture_units[texture_unit].texture_2d = new_texture->texture.handle; 362 BlitTextures(new_surface->texture.handle, scaled_texture.handle, CachedSurface::GetFormatType(new_surface->pixel_format),
37 state.Apply(); 363 MathUtil::Rectangle<int>(0, 0, new_surface->width, new_surface->height),
38 glActiveTexture(GL_TEXTURE0 + texture_unit); 364 MathUtil::Rectangle<int>(0, 0, new_surface->GetScaledWidth(), new_surface->GetScaledHeight()));
39 365
40 u8* texture_src_data = Memory::GetPhysicalPointer(info.physical_address); 366 new_surface->texture.Release();
367 new_surface->texture.handle = scaled_texture.handle;
368 scaled_texture.handle = 0;
369 cur_state.texture_units[0].texture_2d = new_surface->texture.handle;
370 cur_state.Apply();
371 }
372
373 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
374 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
375 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
376 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
377
378 cur_state.texture_units[0].texture_2d = old_tex;
379 cur_state.Apply();
380 }
381
382 Memory::RasterizerMarkRegionCached(new_surface->addr, new_surface->size, 1);
383 surface_cache.add(std::make_pair(boost::icl::interval<PAddr>::right_open(new_surface->addr, new_surface->addr + new_surface->size), std::set<std::shared_ptr<CachedSurface>>({ new_surface })));
384 return new_surface.get();
385}
41 386
42 new_texture->width = info.width; 387CachedSurface* RasterizerCacheOpenGL::GetSurfaceRect(const CachedSurface& params, bool match_res_scale, bool load_if_create, MathUtil::Rectangle<int>& out_rect) {
43 new_texture->height = info.height; 388 if (params.addr == 0) {
44 new_texture->size = info.stride * info.height; 389 return nullptr;
45 new_texture->addr = info.physical_address; 390 }
46 new_texture->hash = Common::ComputeHash64(texture_src_data, new_texture->size); 391
392 u32 total_pixels = params.width * params.height;
393 u32 params_size = total_pixels * CachedSurface::GetFormatBpp(params.pixel_format) / 8;
394
395 // Attempt to find encompassing surfaces
396 CachedSurface* best_subrect_surface = nullptr;
397 float subrect_surface_goodness = -1.f;
47 398
48 std::unique_ptr<Math::Vec4<u8>[]> temp_texture_buffer_rgba(new Math::Vec4<u8>[info.width * info.height]); 399 auto surface_interval = boost::icl::interval<PAddr>::right_open(params.addr, params.addr + params_size);
400 auto cache_upper_bound = surface_cache.upper_bound(surface_interval);
401 for (auto it = surface_cache.lower_bound(surface_interval); it != cache_upper_bound; ++it) {
402 for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) {
403 CachedSurface* surface = it2->get();
49 404
50 for (int y = 0; y < info.height; ++y) { 405 // Check if the request is contained in the surface
51 for (int x = 0; x < info.width; ++x) { 406 if (params.addr >= surface->addr &&
52 temp_texture_buffer_rgba[x + info.width * y] = Pica::DebugUtils::LookupTexture(texture_src_data, x, info.height - 1 - y, info); 407 params.addr + params_size - 1 <= surface->addr + surface->size - 1 &&
408 params.pixel_format == surface->pixel_format)
409 {
410 // Make sure optional param-matching criteria are fulfilled
411 bool tiling_match = (params.is_tiled == surface->is_tiled);
412 bool res_scale_match = (params.res_scale_width == surface->res_scale_width && params.res_scale_height == surface->res_scale_height);
413 if (!match_res_scale || res_scale_match) {
414 // Prioritize same-tiling and highest resolution surfaces
415 float match_goodness = (float)tiling_match + surface->res_scale_width * surface->res_scale_height;
416 if (match_goodness > subrect_surface_goodness || surface->dirty) {
417 subrect_surface_goodness = match_goodness;
418 best_subrect_surface = surface;
419 }
420 }
53 } 421 }
54 } 422 }
423 }
424
425 // Return the best subrect surface if found
426 if (best_subrect_surface != nullptr) {
427 unsigned int bytes_per_pixel = (CachedSurface::GetFormatBpp(best_subrect_surface->pixel_format) / 8);
428
429 int x0, y0;
430
431 if (!params.is_tiled) {
432 u32 begin_pixel_index = (params.addr - best_subrect_surface->addr) / bytes_per_pixel;
433 x0 = begin_pixel_index % best_subrect_surface->width;
434 y0 = begin_pixel_index / best_subrect_surface->width;
435
436 out_rect = MathUtil::Rectangle<int>(x0, y0, x0 + params.width, y0 + params.height);
437 } else {
438 u32 bytes_per_tile = 8 * 8 * bytes_per_pixel;
439 u32 tiles_per_row = best_subrect_surface->width / 8;
440
441 u32 begin_tile_index = (params.addr - best_subrect_surface->addr) / bytes_per_tile;
442 x0 = begin_tile_index % tiles_per_row * 8;
443 y0 = begin_tile_index / tiles_per_row * 8;
444
445 // Tiled surfaces are flipped vertically in the rasterizer vs. 3DS memory.
446 out_rect = MathUtil::Rectangle<int>(x0, best_subrect_surface->height - y0, x0 + params.width, best_subrect_surface->height - (y0 + params.height));
447 }
55 448
56 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, info.width, info.height, 0, GL_RGBA, GL_UNSIGNED_BYTE, temp_texture_buffer_rgba.get()); 449 out_rect.left = (int)(out_rect.left * best_subrect_surface->res_scale_width);
450 out_rect.right = (int)(out_rect.right * best_subrect_surface->res_scale_width);
451 out_rect.top = (int)(out_rect.top * best_subrect_surface->res_scale_height);
452 out_rect.bottom = (int)(out_rect.bottom * best_subrect_surface->res_scale_height);
57 453
58 texture_cache.emplace(info.physical_address, std::move(new_texture)); 454 return best_subrect_surface;
59 } 455 }
456
457 // No subrect found - create and return a new surface
458 if (!params.is_tiled) {
459 out_rect = MathUtil::Rectangle<int>(0, 0, (int)(params.width * params.res_scale_width), (int)(params.height * params.res_scale_height));
460 } else {
461 out_rect = MathUtil::Rectangle<int>(0, (int)(params.height * params.res_scale_height), (int)(params.width * params.res_scale_width), 0);
462 }
463
464 return GetSurface(params, match_res_scale, load_if_create);
465}
466
467CachedSurface* RasterizerCacheOpenGL::GetTextureSurface(const Pica::Regs::FullTextureConfig& config) {
468 Pica::DebugUtils::TextureInfo info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config.config, config.format);
469
470 CachedSurface params;
471 params.addr = info.physical_address;
472 params.width = info.width;
473 params.height = info.height;
474 params.is_tiled = true;
475 params.pixel_format = CachedSurface::PixelFormatFromTextureFormat(info.format);
476 return GetSurface(params, false, true);
60} 477}
61 478
62void RasterizerCacheOpenGL::InvalidateInRange(PAddr addr, u32 size, bool ignore_hash) { 479std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> RasterizerCacheOpenGL::GetFramebufferSurfaces(const Pica::Regs::FramebufferConfig& config) {
63 // TODO: Optimize by also inserting upper bound (addr + size) of each texture into the same map and also narrow using lower_bound 480 const auto& regs = Pica::g_state.regs;
64 auto cache_upper_bound = texture_cache.upper_bound(addr + size); 481
482 // Make sur that framebuffers don't overlap if both color and depth are being used
483 u32 fb_area = config.GetWidth() * config.GetHeight();
484 bool framebuffers_overlap = config.GetColorBufferPhysicalAddress() != 0 &&
485 config.GetDepthBufferPhysicalAddress() != 0 &&
486 MathUtil::IntervalsIntersect(config.GetColorBufferPhysicalAddress(), fb_area * GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(config.color_format.Value())),
487 config.GetDepthBufferPhysicalAddress(), fb_area * Pica::Regs::BytesPerDepthPixel(config.depth_format));
488 bool using_color_fb = config.GetColorBufferPhysicalAddress() != 0;
489 bool using_depth_fb = config.GetDepthBufferPhysicalAddress() != 0 && (regs.output_merger.depth_test_enable || regs.output_merger.depth_write_enable || !framebuffers_overlap);
490
491 if (framebuffers_overlap && using_color_fb && using_depth_fb) {
492 LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; overlapping framebuffers not supported!");
493 using_depth_fb = false;
494 }
495
496 // get color and depth surfaces
497 CachedSurface color_params;
498 CachedSurface depth_params;
499 color_params.width = depth_params.width = config.GetWidth();
500 color_params.height = depth_params.height = config.GetHeight();
501 color_params.is_tiled = depth_params.is_tiled = true;
502 if (VideoCore::g_scaled_resolution_enabled) {
503 auto layout = VideoCore::g_emu_window->GetFramebufferLayout();
504
505 // Assume same scaling factor for top and bottom screens
506 color_params.res_scale_width = depth_params.res_scale_width = (float)layout.top_screen.GetWidth() / VideoCore::kScreenTopWidth;
507 color_params.res_scale_height = depth_params.res_scale_height = (float)layout.top_screen.GetHeight() / VideoCore::kScreenTopHeight;
508 }
509
510 color_params.addr = config.GetColorBufferPhysicalAddress();
511 color_params.pixel_format = CachedSurface::PixelFormatFromColorFormat(config.color_format);
512
513 depth_params.addr = config.GetDepthBufferPhysicalAddress();
514 depth_params.pixel_format = CachedSurface::PixelFormatFromDepthFormat(config.depth_format);
515
516 MathUtil::Rectangle<int> color_rect;
517 CachedSurface* color_surface = using_color_fb ? GetSurfaceRect(color_params, true, true, color_rect) : nullptr;
518
519 MathUtil::Rectangle<int> depth_rect;
520 CachedSurface* depth_surface = using_depth_fb ? GetSurfaceRect(depth_params, true, true, depth_rect) : nullptr;
521
522 // Sanity check to make sure found surfaces aren't the same
523 if (using_depth_fb && using_color_fb && color_surface == depth_surface) {
524 LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer surfaces overlap; overlapping surfaces not supported!");
525 using_depth_fb = false;
526 depth_surface = nullptr;
527 }
528
529 MathUtil::Rectangle<int> rect;
65 530
66 for (auto it = texture_cache.begin(); it != cache_upper_bound;) { 531 if (color_surface != nullptr && depth_surface != nullptr && (depth_rect.left != color_rect.left || depth_rect.top != color_rect.top)) {
67 const auto& info = *it->second; 532 // Can't specify separate color and depth viewport offsets in OpenGL, so re-zero both if they don't match
533 if (color_rect.left != 0 || color_rect.top != 0) {
534 color_surface = GetSurface(color_params, true, true);
535 }
68 536
69 // Flush the texture only if the memory region intersects and a change is detected 537 if (depth_rect.left != 0 || depth_rect.top != 0) {
70 if (MathUtil::IntervalsIntersect(addr, size, info.addr, info.size) && 538 depth_surface = GetSurface(depth_params, true, true);
71 (ignore_hash || info.hash != Common::ComputeHash64(Memory::GetPhysicalPointer(info.addr), info.size))) { 539 }
72 540
73 it = texture_cache.erase(it); 541 if (!color_surface->is_tiled) {
542 rect = MathUtil::Rectangle<int>(0, 0, (int)(color_params.width * color_params.res_scale_width), (int)(color_params.height * color_params.res_scale_height));
74 } else { 543 } else {
75 ++it; 544 rect = MathUtil::Rectangle<int>(0, (int)(color_params.height * color_params.res_scale_height), (int)(color_params.width * color_params.res_scale_width), 0);
76 } 545 }
546 } else if (color_surface != nullptr) {
547 rect = color_rect;
548 } else if (depth_surface != nullptr) {
549 rect = depth_rect;
550 } else {
551 rect = MathUtil::Rectangle<int>(0, 0, 0, 0);
77 } 552 }
553
554 return std::make_tuple(color_surface, depth_surface, rect);
78} 555}
79 556
80void RasterizerCacheOpenGL::InvalidateAll() { 557CachedSurface* RasterizerCacheOpenGL::TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config) {
81 texture_cache.clear(); 558 auto surface_interval = boost::icl::interval<PAddr>::right_open(config.GetStartAddress(), config.GetEndAddress());
559 auto range = surface_cache.equal_range(surface_interval);
560 for (auto it = range.first; it != range.second; ++it) {
561 for (auto it2 = it->second.begin(); it2 != it->second.end(); ++it2) {
562 int bits_per_value = 0;
563 if (config.fill_24bit) {
564 bits_per_value = 24;
565 } else if (config.fill_32bit) {
566 bits_per_value = 32;
567 } else {
568 bits_per_value = 16;
569 }
570
571 CachedSurface* surface = it2->get();
572
573 if (surface->addr == config.GetStartAddress() &&
574 CachedSurface::GetFormatBpp(surface->pixel_format) == bits_per_value &&
575 (surface->width * surface->height * CachedSurface::GetFormatBpp(surface->pixel_format) / 8) == (config.GetEndAddress() - config.GetStartAddress()))
576 {
577 return surface;
578 }
579 }
580 }
581
582 return nullptr;
583}
584
585MICROPROFILE_DEFINE(OpenGL_SurfaceDownload, "OpenGL", "Surface Download", MP_RGB(128, 192, 64));
586void RasterizerCacheOpenGL::FlushSurface(CachedSurface* surface) {
587 using PixelFormat = CachedSurface::PixelFormat;
588 using SurfaceType = CachedSurface::SurfaceType;
589
590 if (!surface->dirty) {
591 return;
592 }
593
594 MICROPROFILE_SCOPE(OpenGL_SurfaceDownload);
595
596 u8* dst_buffer = Memory::GetPhysicalPointer(surface->addr);
597 if (dst_buffer == nullptr) {
598 return;
599 }
600
601 OpenGLState cur_state = OpenGLState::GetCurState();
602 GLuint old_tex = cur_state.texture_units[0].texture_2d;
603
604 OGLTexture unscaled_tex;
605 GLuint texture_to_flush = surface->texture.handle;
606
607 // If not 1x scale, blit scaled texture to a new 1x texture and use that to flush
608 if (surface->res_scale_width != 1.f || surface->res_scale_height != 1.f) {
609 unscaled_tex.Create();
610
611 AllocateSurfaceTexture(unscaled_tex.handle, surface->pixel_format, surface->width, surface->height);
612 BlitTextures(surface->texture.handle, unscaled_tex.handle, CachedSurface::GetFormatType(surface->pixel_format),
613 MathUtil::Rectangle<int>(0, 0, surface->GetScaledWidth(), surface->GetScaledHeight()),
614 MathUtil::Rectangle<int>(0, 0, surface->width, surface->height));
615
616 texture_to_flush = unscaled_tex.handle;
617 }
618
619 cur_state.texture_units[0].texture_2d = texture_to_flush;
620 cur_state.Apply();
621 glActiveTexture(GL_TEXTURE0);
622
623 glPixelStorei(GL_PACK_ROW_LENGTH, (GLint)surface->stride);
624 if (!surface->is_tiled) {
625 // TODO: Ensure this will always be a color format, not a depth or other format
626 ASSERT((size_t)surface->pixel_format < fb_format_tuples.size());
627 const FormatTuple& tuple = fb_format_tuples[(unsigned int)surface->pixel_format];
628
629 glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, dst_buffer);
630 } else {
631 SurfaceType type = CachedSurface::GetFormatType(surface->pixel_format);
632 if (type != SurfaceType::Depth && type != SurfaceType::DepthStencil) {
633 ASSERT((size_t)surface->pixel_format < fb_format_tuples.size());
634 const FormatTuple& tuple = fb_format_tuples[(unsigned int)surface->pixel_format];
635
636 u32 bytes_per_pixel = CachedSurface::GetFormatBpp(surface->pixel_format) / 8;
637
638 std::vector<u8> temp_gl_buffer(surface->width * surface->height * bytes_per_pixel);
639
640 glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data());
641
642 // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion is necessary.
643 MortonCopyPixels(surface->pixel_format, surface->width, surface->height, bytes_per_pixel, bytes_per_pixel, dst_buffer, temp_gl_buffer.data(), false);
644 } else {
645 // Depth/Stencil formats need special treatment since they aren't sampleable using LookupTexture and can't use RGBA format
646 size_t tuple_idx = (size_t)surface->pixel_format - 14;
647 ASSERT(tuple_idx < depth_format_tuples.size());
648 const FormatTuple& tuple = depth_format_tuples[tuple_idx];
649
650 u32 bytes_per_pixel = CachedSurface::GetFormatBpp(surface->pixel_format) / 8;
651
652 // OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type
653 bool use_4bpp = (surface->pixel_format == PixelFormat::D24);
654
655 u32 gl_bytes_per_pixel = use_4bpp ? 4 : bytes_per_pixel;
656
657 std::vector<u8> temp_gl_buffer(surface->width * surface->height * gl_bytes_per_pixel);
658
659 glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, temp_gl_buffer.data());
660
661 u8* temp_gl_buffer_ptr = use_4bpp ? temp_gl_buffer.data() + 1 : temp_gl_buffer.data();
662
663 MortonCopyPixels(surface->pixel_format, surface->width, surface->height, bytes_per_pixel, gl_bytes_per_pixel, dst_buffer, temp_gl_buffer_ptr, false);
664 }
665 }
666 glPixelStorei(GL_PACK_ROW_LENGTH, 0);
667
668 surface->dirty = false;
669
670 cur_state.texture_units[0].texture_2d = old_tex;
671 cur_state.Apply();
672}
673
674void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, bool invalidate) {
675 if (size == 0) {
676 return;
677 }
678
679 // Gather up unique surfaces that touch the region
680 std::unordered_set<std::shared_ptr<CachedSurface>> touching_surfaces;
681
682 auto surface_interval = boost::icl::interval<PAddr>::right_open(addr, addr + size);
683 auto cache_upper_bound = surface_cache.upper_bound(surface_interval);
684 for (auto it = surface_cache.lower_bound(surface_interval); it != cache_upper_bound; ++it) {
685 std::copy_if(it->second.begin(), it->second.end(), std::inserter(touching_surfaces, touching_surfaces.end()),
686 [skip_surface](std::shared_ptr<CachedSurface> surface) { return (surface.get() != skip_surface); });
687 }
688
689 // Flush and invalidate surfaces
690 for (auto surface : touching_surfaces) {
691 FlushSurface(surface.get());
692 if (invalidate) {
693 Memory::RasterizerMarkRegionCached(surface->addr, surface->size, -1);
694 surface_cache.subtract(std::make_pair(boost::icl::interval<PAddr>::right_open(surface->addr, surface->addr + surface->size), std::set<std::shared_ptr<CachedSurface>>({ surface })));
695 }
696 }
697}
698
699void RasterizerCacheOpenGL::FlushAll() {
700 for (auto& surfaces : surface_cache) {
701 for (auto& surface : surfaces.second) {
702 FlushSurface(surface.get());
703 }
704 }
82} 705}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index b69651427..893d51138 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -6,38 +6,211 @@
6 6
7#include <map> 7#include <map>
8#include <memory> 8#include <memory>
9#include <set>
10
11#include <boost/icl/interval_map.hpp>
12
13#include "common/math_util.h"
14
15#include "core/hw/gpu.h"
9 16
10#include "video_core/pica.h" 17#include "video_core/pica.h"
11#include "video_core/debug_utils/debug_utils.h" 18#include "video_core/debug_utils/debug_utils.h"
12#include "video_core/renderer_opengl/gl_resource_manager.h" 19#include "video_core/renderer_opengl/gl_resource_manager.h"
13#include "video_core/renderer_opengl/gl_state.h" 20#include "video_core/renderer_opengl/gl_state.h"
14 21
22struct CachedSurface;
23
24using SurfaceCache = boost::icl::interval_map<PAddr, std::set<std::shared_ptr<CachedSurface>>>;
25
26struct CachedSurface {
27 enum class PixelFormat {
28 // First 5 formats are shared between textures and color buffers
29 RGBA8 = 0,
30 RGB8 = 1,
31 RGB5A1 = 2,
32 RGB565 = 3,
33 RGBA4 = 4,
34
35 // Texture-only formats
36 IA8 = 5,
37 RG8 = 6,
38 I8 = 7,
39 A8 = 8,
40 IA4 = 9,
41 I4 = 10,
42 A4 = 11,
43 ETC1 = 12,
44 ETC1A4 = 13,
45
46 // Depth buffer-only formats
47 D16 = 14,
48 // gap
49 D24 = 16,
50 D24S8 = 17,
51
52 Invalid = 255,
53 };
54
55 enum class SurfaceType {
56 Color = 0,
57 Texture = 1,
58 Depth = 2,
59 DepthStencil = 3,
60 Invalid = 4,
61 };
62
63 static unsigned int GetFormatBpp(CachedSurface::PixelFormat format) {
64 static const std::array<unsigned int, 18> bpp_table = {
65 32, // RGBA8
66 24, // RGB8
67 16, // RGB5A1
68 16, // RGB565
69 16, // RGBA4
70 16, // IA8
71 16, // RG8
72 8, // I8
73 8, // A8
74 8, // IA4
75 4, // I4
76 4, // A4
77 4, // ETC1
78 8, // ETC1A4
79 16, // D16
80 0,
81 24, // D24
82 32, // D24S8
83 };
84
85 ASSERT((unsigned int)format < ARRAY_SIZE(bpp_table));
86 return bpp_table[(unsigned int)format];
87 }
88
89 static PixelFormat PixelFormatFromTextureFormat(Pica::Regs::TextureFormat format) {
90 return ((unsigned int)format < 14) ? (PixelFormat)format : PixelFormat::Invalid;
91 }
92
93 static PixelFormat PixelFormatFromColorFormat(Pica::Regs::ColorFormat format) {
94 return ((unsigned int)format < 5) ? (PixelFormat)format : PixelFormat::Invalid;
95 }
96
97 static PixelFormat PixelFormatFromDepthFormat(Pica::Regs::DepthFormat format) {
98 return ((unsigned int)format < 4) ? (PixelFormat)((unsigned int)format + 14) : PixelFormat::Invalid;
99 }
100
101 static PixelFormat PixelFormatFromGPUPixelFormat(GPU::Regs::PixelFormat format) {
102 switch (format) {
103 // RGB565 and RGB5A1 are switched in PixelFormat compared to ColorFormat
104 case GPU::Regs::PixelFormat::RGB565:
105 return PixelFormat::RGB565;
106 case GPU::Regs::PixelFormat::RGB5A1:
107 return PixelFormat::RGB5A1;
108 default:
109 return ((unsigned int)format < 5) ? (PixelFormat)format : PixelFormat::Invalid;
110 }
111 }
112
113 static bool CheckFormatsBlittable(PixelFormat pixel_format_a, PixelFormat pixel_format_b) {
114 SurfaceType a_type = GetFormatType(pixel_format_a);
115 SurfaceType b_type = GetFormatType(pixel_format_b);
116
117 if ((a_type == SurfaceType::Color || a_type == SurfaceType::Texture) && (b_type == SurfaceType::Color || b_type == SurfaceType::Texture)) {
118 return true;
119 }
120
121 if (a_type == SurfaceType::Depth && b_type == SurfaceType::Depth) {
122 return true;
123 }
124
125 if (a_type == SurfaceType::DepthStencil && b_type == SurfaceType::DepthStencil) {
126 return true;
127 }
128
129 return false;
130 }
131
132 static SurfaceType GetFormatType(PixelFormat pixel_format) {
133 if ((unsigned int)pixel_format < 5) {
134 return SurfaceType::Color;
135 }
136
137 if ((unsigned int)pixel_format < 14) {
138 return SurfaceType::Texture;
139 }
140
141 if (pixel_format == PixelFormat::D16 || pixel_format == PixelFormat::D24) {
142 return SurfaceType::Depth;
143 }
144
145 if (pixel_format == PixelFormat::D24S8) {
146 return SurfaceType::DepthStencil;
147 }
148
149 return SurfaceType::Invalid;
150 }
151
152 u32 GetScaledWidth() const {
153 return (u32)(width * res_scale_width);
154 }
155
156 u32 GetScaledHeight() const {
157 return (u32)(height * res_scale_height);
158 }
159
160 PAddr addr;
161 u32 size;
162
163 PAddr min_valid;
164 PAddr max_valid;
165
166 OGLTexture texture;
167 u32 width;
168 u32 height;
169 u32 stride = 0;
170 float res_scale_width = 1.f;
171 float res_scale_height = 1.f;
172
173 bool is_tiled;
174 PixelFormat pixel_format;
175 bool dirty;
176};
177
15class RasterizerCacheOpenGL : NonCopyable { 178class RasterizerCacheOpenGL : NonCopyable {
16public: 179public:
180 RasterizerCacheOpenGL();
17 ~RasterizerCacheOpenGL(); 181 ~RasterizerCacheOpenGL();
18 182
183 /// Blits one texture to another
184 bool BlitTextures(GLuint src_tex, GLuint dst_tex, CachedSurface::SurfaceType type, const MathUtil::Rectangle<int>& src_rect, const MathUtil::Rectangle<int>& dst_rect);
185
186 /// Attempt to blit one surface's texture to another
187 bool TryBlitSurfaces(CachedSurface* src_surface, const MathUtil::Rectangle<int>& src_rect, CachedSurface* dst_surface, const MathUtil::Rectangle<int>& dst_rect);
188
19 /// Loads a texture from 3DS memory to OpenGL and caches it (if not already cached) 189 /// Loads a texture from 3DS memory to OpenGL and caches it (if not already cached)
20 void LoadAndBindTexture(OpenGLState &state, unsigned texture_unit, const Pica::DebugUtils::TextureInfo& info); 190 CachedSurface* GetSurface(const CachedSurface& params, bool match_res_scale, bool load_if_create);
21 191
22 void LoadAndBindTexture(OpenGLState &state, unsigned texture_unit, const Pica::Regs::FullTextureConfig& config) { 192 /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from 3DS memory to OpenGL and caches it (if not already cached)
23 LoadAndBindTexture(state, texture_unit, Pica::DebugUtils::TextureInfo::FromPicaRegister(config.config, config.format)); 193 CachedSurface* GetSurfaceRect(const CachedSurface& params, bool match_res_scale, bool load_if_create, MathUtil::Rectangle<int>& out_rect);
24 }
25 194
26 /// Invalidate any cached resource intersecting the specified region. 195 /// Gets a surface based on the texture configuration
27 void InvalidateInRange(PAddr addr, u32 size, bool ignore_hash = false); 196 CachedSurface* GetTextureSurface(const Pica::Regs::FullTextureConfig& config);
28 197
29 /// Invalidate all cached OpenGL resources tracked by this cache manager 198 /// Gets the color and depth surfaces and rect (resolution scaled) based on the framebuffer configuration
30 void InvalidateAll(); 199 std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> GetFramebufferSurfaces(const Pica::Regs::FramebufferConfig& config);
31 200
32private: 201 /// Attempt to get a surface that exactly matches the fill region and format
33 struct CachedTexture { 202 CachedSurface* TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config);
34 OGLTexture texture; 203
35 GLuint width; 204 /// Write the surface back to memory
36 GLuint height; 205 void FlushSurface(CachedSurface* surface);
37 u32 size;
38 u64 hash;
39 PAddr addr;
40 };
41 206
42 std::map<PAddr, std::unique_ptr<CachedTexture>> texture_cache; 207 /// Write any cached resources overlapping the region back to memory (if dirty) and optionally invalidate them in the cache
208 void FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, bool invalidate);
209
210 /// Flush all cached resources tracked by this cache manager
211 void FlushAll();
212
213private:
214 SurfaceCache surface_cache;
215 OGLFramebuffer transfer_framebuffers[2];
43}; 216};
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index ee4b54ab9..646b4eaaf 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -198,6 +198,9 @@ static void AppendColorCombiner(std::string& out, TevStageConfig::Operation oper
198 case Operation::AddThenMultiply: 198 case Operation::AddThenMultiply:
199 out += "min(" + variable_name + "[0] + " + variable_name + "[1], vec3(1.0)) * " + variable_name + "[2]"; 199 out += "min(" + variable_name + "[0] + " + variable_name + "[1], vec3(1.0)) * " + variable_name + "[2]";
200 break; 200 break;
201 case Operation::Dot3_RGB:
202 out += "vec3(dot(" + variable_name + "[0] - vec3(0.5), " + variable_name + "[1] - vec3(0.5)) * 4.0)";
203 break;
201 default: 204 default:
202 out += "vec3(0.0)"; 205 out += "vec3(0.0)";
203 LOG_CRITICAL(Render_OpenGL, "Unknown color combiner operation: %u", operation); 206 LOG_CRITICAL(Render_OpenGL, "Unknown color combiner operation: %u", operation);
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 08e4d0b54..f04bdd8c5 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "video_core/pica.h" 5#include "video_core/pica.h"
6#include "video_core/renderer_opengl/gl_resource_manager.h"
6#include "video_core/renderer_opengl/gl_state.h" 7#include "video_core/renderer_opengl/gl_state.h"
7 8
8OpenGLState OpenGLState::cur_state; 9OpenGLState OpenGLState::cur_state;
@@ -48,17 +49,19 @@ OpenGLState::OpenGLState() {
48 texture_unit.sampler = 0; 49 texture_unit.sampler = 0;
49 } 50 }
50 51
51 for (auto& lut : lighting_lut) { 52 for (auto& lut : lighting_luts) {
52 lut.texture_1d = 0; 53 lut.texture_1d = 0;
53 } 54 }
54 55
55 draw.framebuffer = 0; 56 draw.read_framebuffer = 0;
57 draw.draw_framebuffer = 0;
56 draw.vertex_array = 0; 58 draw.vertex_array = 0;
57 draw.vertex_buffer = 0; 59 draw.vertex_buffer = 0;
60 draw.uniform_buffer = 0;
58 draw.shader_program = 0; 61 draw.shader_program = 0;
59} 62}
60 63
61void OpenGLState::Apply() { 64void OpenGLState::Apply() const {
62 // Culling 65 // Culling
63 if (cull.enabled != cur_state.cull.enabled) { 66 if (cull.enabled != cur_state.cull.enabled) {
64 if (cull.enabled) { 67 if (cull.enabled) {
@@ -175,16 +178,19 @@ void OpenGLState::Apply() {
175 } 178 }
176 179
177 // Lighting LUTs 180 // Lighting LUTs
178 for (unsigned i = 0; i < ARRAY_SIZE(lighting_lut); ++i) { 181 for (unsigned i = 0; i < ARRAY_SIZE(lighting_luts); ++i) {
179 if (lighting_lut[i].texture_1d != cur_state.lighting_lut[i].texture_1d) { 182 if (lighting_luts[i].texture_1d != cur_state.lighting_luts[i].texture_1d) {
180 glActiveTexture(GL_TEXTURE3 + i); 183 glActiveTexture(GL_TEXTURE3 + i);
181 glBindTexture(GL_TEXTURE_1D, lighting_lut[i].texture_1d); 184 glBindTexture(GL_TEXTURE_1D, lighting_luts[i].texture_1d);
182 } 185 }
183 } 186 }
184 187
185 // Framebuffer 188 // Framebuffer
186 if (draw.framebuffer != cur_state.draw.framebuffer) { 189 if (draw.read_framebuffer != cur_state.draw.read_framebuffer) {
187 glBindFramebuffer(GL_FRAMEBUFFER, draw.framebuffer); 190 glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
191 }
192 if (draw.draw_framebuffer != cur_state.draw.draw_framebuffer) {
193 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer);
188 } 194 }
189 195
190 // Vertex array 196 // Vertex array
@@ -210,45 +216,58 @@ void OpenGLState::Apply() {
210 cur_state = *this; 216 cur_state = *this;
211} 217}
212 218
213void OpenGLState::ResetTexture(GLuint id) { 219GLenum OpenGLState::CheckFBStatus(GLenum target) {
220 GLenum fb_status = glCheckFramebufferStatus(target);
221 if (fb_status != GL_FRAMEBUFFER_COMPLETE) {
222 const char* fb_description = (target == GL_READ_FRAMEBUFFER ? "READ" : (target == GL_DRAW_FRAMEBUFFER ? "DRAW" : "UNK"));
223 LOG_CRITICAL(Render_OpenGL, "OpenGL %s framebuffer check failed, status %X", fb_description, fb_status);
224 }
225
226 return fb_status;
227}
228
229void OpenGLState::ResetTexture(GLuint handle) {
214 for (auto& unit : cur_state.texture_units) { 230 for (auto& unit : cur_state.texture_units) {
215 if (unit.texture_2d == id) { 231 if (unit.texture_2d == handle) {
216 unit.texture_2d = 0; 232 unit.texture_2d = 0;
217 } 233 }
218 } 234 }
219} 235}
220 236
221void OpenGLState::ResetSampler(GLuint id) { 237void OpenGLState::ResetSampler(GLuint handle) {
222 for (auto& unit : cur_state.texture_units) { 238 for (auto& unit : cur_state.texture_units) {
223 if (unit.sampler == id) { 239 if (unit.sampler == handle) {
224 unit.sampler = 0; 240 unit.sampler = 0;
225 } 241 }
226 } 242 }
227} 243}
228 244
229void OpenGLState::ResetProgram(GLuint id) { 245void OpenGLState::ResetProgram(GLuint handle) {
230 if (cur_state.draw.shader_program == id) { 246 if (cur_state.draw.shader_program == handle) {
231 cur_state.draw.shader_program = 0; 247 cur_state.draw.shader_program = 0;
232 } 248 }
233} 249}
234 250
235void OpenGLState::ResetBuffer(GLuint id) { 251void OpenGLState::ResetBuffer(GLuint handle) {
236 if (cur_state.draw.vertex_buffer == id) { 252 if (cur_state.draw.vertex_buffer == handle) {
237 cur_state.draw.vertex_buffer = 0; 253 cur_state.draw.vertex_buffer = 0;
238 } 254 }
239 if (cur_state.draw.uniform_buffer == id) { 255 if (cur_state.draw.uniform_buffer == handle) {
240 cur_state.draw.uniform_buffer = 0; 256 cur_state.draw.uniform_buffer = 0;
241 } 257 }
242} 258}
243 259
244void OpenGLState::ResetVertexArray(GLuint id) { 260void OpenGLState::ResetVertexArray(GLuint handle) {
245 if (cur_state.draw.vertex_array == id) { 261 if (cur_state.draw.vertex_array == handle) {
246 cur_state.draw.vertex_array = 0; 262 cur_state.draw.vertex_array = 0;
247 } 263 }
248} 264}
249 265
250void OpenGLState::ResetFramebuffer(GLuint id) { 266void OpenGLState::ResetFramebuffer(GLuint handle) {
251 if (cur_state.draw.framebuffer == id) { 267 if (cur_state.draw.read_framebuffer == handle) {
252 cur_state.draw.framebuffer = 0; 268 cur_state.draw.read_framebuffer = 0;
269 }
270 if (cur_state.draw.draw_framebuffer == handle) {
271 cur_state.draw.draw_framebuffer = 0;
253 } 272 }
254} 273}
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index e848058d7..0f72e9004 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -5,6 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include <glad/glad.h> 7#include <glad/glad.h>
8#include <memory>
8 9
9class OpenGLState { 10class OpenGLState {
10public: 11public:
@@ -63,15 +64,15 @@ public:
63 64
64 struct { 65 struct {
65 GLuint texture_1d; // GL_TEXTURE_BINDING_1D 66 GLuint texture_1d; // GL_TEXTURE_BINDING_1D
66 } lighting_lut[6]; 67 } lighting_luts[6];
67 68
68 struct { 69 struct {
69 GLuint framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING 70 GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING
71 GLuint draw_framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING
70 GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING 72 GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING
71 GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING 73 GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING
72 GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING 74 GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING
73 GLuint shader_program; // GL_CURRENT_PROGRAM 75 GLuint shader_program; // GL_CURRENT_PROGRAM
74 bool shader_dirty;
75 } draw; 76 } draw;
76 77
77 OpenGLState(); 78 OpenGLState();
@@ -82,14 +83,18 @@ public:
82 } 83 }
83 84
84 /// Apply this state as the current OpenGL state 85 /// Apply this state as the current OpenGL state
85 void Apply(); 86 void Apply() const;
86 87
87 static void ResetTexture(GLuint id); 88 /// Check the status of the current OpenGL read or draw framebuffer configuration
88 static void ResetSampler(GLuint id); 89 static GLenum CheckFBStatus(GLenum target);
89 static void ResetProgram(GLuint id); 90
90 static void ResetBuffer(GLuint id); 91 /// Resets and unbinds any references to the given resource in the current OpenGL state
91 static void ResetVertexArray(GLuint id); 92 static void ResetTexture(GLuint handle);
92 static void ResetFramebuffer(GLuint id); 93 static void ResetSampler(GLuint handle);
94 static void ResetProgram(GLuint handle);
95 static void ResetBuffer(GLuint handle);
96 static void ResetVertexArray(GLuint handle);
97 static void ResetFramebuffer(GLuint handle);
93 98
94private: 99private:
95 static OpenGLState cur_state; 100 static OpenGLState cur_state;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 11c4d0daf..8f907593f 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -107,7 +107,7 @@ void RendererOpenGL::SwapBuffers() {
107 OpenGLState prev_state = OpenGLState::GetCurState(); 107 OpenGLState prev_state = OpenGLState::GetCurState();
108 state.Apply(); 108 state.Apply();
109 109
110 for(int i : {0, 1}) { 110 for (int i : {0, 1}) {
111 const auto& framebuffer = GPU::g_regs.framebuffer_config[i]; 111 const auto& framebuffer = GPU::g_regs.framebuffer_config[i];
112 112
113 // Main LCD (0): 0x1ED02204, Sub LCD (1): 0x1ED02A04 113 // Main LCD (0): 0x1ED02204, Sub LCD (1): 0x1ED02A04
@@ -117,25 +117,25 @@ void RendererOpenGL::SwapBuffers() {
117 LCD::Read(color_fill.raw, lcd_color_addr); 117 LCD::Read(color_fill.raw, lcd_color_addr);
118 118
119 if (color_fill.is_enabled) { 119 if (color_fill.is_enabled) {
120 LoadColorToActiveGLTexture(color_fill.color_r, color_fill.color_g, color_fill.color_b, textures[i]); 120 LoadColorToActiveGLTexture(color_fill.color_r, color_fill.color_g, color_fill.color_b, screen_infos[i].texture);
121 121
122 // Resize the texture in case the framebuffer size has changed 122 // Resize the texture in case the framebuffer size has changed
123 textures[i].width = 1; 123 screen_infos[i].texture.width = 1;
124 textures[i].height = 1; 124 screen_infos[i].texture.height = 1;
125 } else { 125 } else {
126 if (textures[i].width != (GLsizei)framebuffer.width || 126 if (screen_infos[i].texture.width != (GLsizei)framebuffer.width ||
127 textures[i].height != (GLsizei)framebuffer.height || 127 screen_infos[i].texture.height != (GLsizei)framebuffer.height ||
128 textures[i].format != framebuffer.color_format) { 128 screen_infos[i].texture.format != framebuffer.color_format) {
129 // Reallocate texture if the framebuffer size has changed. 129 // Reallocate texture if the framebuffer size has changed.
130 // This is expected to not happen very often and hence should not be a 130 // This is expected to not happen very often and hence should not be a
131 // performance problem. 131 // performance problem.
132 ConfigureFramebufferTexture(textures[i], framebuffer); 132 ConfigureFramebufferTexture(screen_infos[i].texture, framebuffer);
133 } 133 }
134 LoadFBToActiveGLTexture(framebuffer, textures[i]); 134 LoadFBToScreenInfo(framebuffer, screen_infos[i]);
135 135
136 // Resize the texture in case the framebuffer size has changed 136 // Resize the texture in case the framebuffer size has changed
137 textures[i].width = framebuffer.width; 137 screen_infos[i].texture.width = framebuffer.width;
138 textures[i].height = framebuffer.height; 138 screen_infos[i].texture.height = framebuffer.height;
139 } 139 }
140 } 140 }
141 141
@@ -166,8 +166,8 @@ void RendererOpenGL::SwapBuffers() {
166/** 166/**
167 * Loads framebuffer from emulated memory into the active OpenGL texture. 167 * Loads framebuffer from emulated memory into the active OpenGL texture.
168 */ 168 */
169void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig& framebuffer, 169void RendererOpenGL::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer,
170 const TextureInfo& texture) { 170 ScreenInfo& screen_info) {
171 171
172 const PAddr framebuffer_addr = framebuffer.active_fb == 0 ? 172 const PAddr framebuffer_addr = framebuffer.active_fb == 0 ?
173 framebuffer.address_left1 : framebuffer.address_left2; 173 framebuffer.address_left1 : framebuffer.address_left2;
@@ -177,8 +177,6 @@ void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig&
177 framebuffer_addr, (int)framebuffer.width, 177 framebuffer_addr, (int)framebuffer.width,
178 (int)framebuffer.height, (int)framebuffer.format); 178 (int)framebuffer.height, (int)framebuffer.format);
179 179
180 const u8* framebuffer_data = Memory::GetPhysicalPointer(framebuffer_addr);
181
182 int bpp = GPU::Regs::BytesPerPixel(framebuffer.color_format); 180 int bpp = GPU::Regs::BytesPerPixel(framebuffer.color_format);
183 size_t pixel_stride = framebuffer.stride / bpp; 181 size_t pixel_stride = framebuffer.stride / bpp;
184 182
@@ -189,24 +187,34 @@ void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig&
189 // only allows rows to have a memory alignement of 4. 187 // only allows rows to have a memory alignement of 4.
190 ASSERT(pixel_stride % 4 == 0); 188 ASSERT(pixel_stride % 4 == 0);
191 189
192 state.texture_units[0].texture_2d = texture.handle; 190 if (!Rasterizer()->AccelerateDisplay(framebuffer, framebuffer_addr, pixel_stride, screen_info)) {
193 state.Apply(); 191 // Reset the screen info's display texture to its own permanent texture
192 screen_info.display_texture = screen_info.texture.resource.handle;
193 screen_info.display_texcoords = MathUtil::Rectangle<float>(0.f, 0.f, 1.f, 1.f);
194 194
195 glActiveTexture(GL_TEXTURE0); 195 Memory::RasterizerFlushRegion(framebuffer_addr, framebuffer.stride * framebuffer.height);
196 glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)pixel_stride);
197 196
198 // Update existing texture 197 const u8* framebuffer_data = Memory::GetPhysicalPointer(framebuffer_addr);
199 // TODO: Test what happens on hardware when you change the framebuffer dimensions so that they
200 // differ from the LCD resolution.
201 // TODO: Applications could theoretically crash Citra here by specifying too large
202 // framebuffer sizes. We should make sure that this cannot happen.
203 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height,
204 texture.gl_format, texture.gl_type, framebuffer_data);
205 198
206 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); 199 state.texture_units[0].texture_2d = screen_info.texture.resource.handle;
200 state.Apply();
207 201
208 state.texture_units[0].texture_2d = 0; 202 glActiveTexture(GL_TEXTURE0);
209 state.Apply(); 203 glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)pixel_stride);
204
205 // Update existing texture
206 // TODO: Test what happens on hardware when you change the framebuffer dimensions so that they
207 // differ from the LCD resolution.
208 // TODO: Applications could theoretically crash Citra here by specifying too large
209 // framebuffer sizes. We should make sure that this cannot happen.
210 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height,
211 screen_info.texture.gl_format, screen_info.texture.gl_type, framebuffer_data);
212
213 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
214
215 state.texture_units[0].texture_2d = 0;
216 state.Apply();
217 }
210} 218}
211 219
212/** 220/**
@@ -216,7 +224,7 @@ void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig&
216 */ 224 */
217void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, 225void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b,
218 const TextureInfo& texture) { 226 const TextureInfo& texture) {
219 state.texture_units[0].texture_2d = texture.handle; 227 state.texture_units[0].texture_2d = texture.resource.handle;
220 state.Apply(); 228 state.Apply();
221 229
222 glActiveTexture(GL_TEXTURE0); 230 glActiveTexture(GL_TEXTURE0);
@@ -224,6 +232,9 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color
224 232
225 // Update existing texture 233 // Update existing texture
226 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, framebuffer_data); 234 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, framebuffer_data);
235
236 state.texture_units[0].texture_2d = 0;
237 state.Apply();
227} 238}
228 239
229/** 240/**
@@ -233,20 +244,22 @@ void RendererOpenGL::InitOpenGLObjects() {
233 glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, 0.0f); 244 glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, 0.0f);
234 245
235 // Link shaders and get variable locations 246 // Link shaders and get variable locations
236 program_id = GLShader::LoadProgram(vertex_shader, fragment_shader); 247 shader.Create(vertex_shader, fragment_shader);
237 uniform_modelview_matrix = glGetUniformLocation(program_id, "modelview_matrix"); 248 state.draw.shader_program = shader.handle;
238 uniform_color_texture = glGetUniformLocation(program_id, "color_texture"); 249 state.Apply();
239 attrib_position = glGetAttribLocation(program_id, "vert_position"); 250 uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix");
240 attrib_tex_coord = glGetAttribLocation(program_id, "vert_tex_coord"); 251 uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture");
252 attrib_position = glGetAttribLocation(shader.handle, "vert_position");
253 attrib_tex_coord = glGetAttribLocation(shader.handle, "vert_tex_coord");
241 254
242 // Generate VBO handle for drawing 255 // Generate VBO handle for drawing
243 glGenBuffers(1, &vertex_buffer_handle); 256 vertex_buffer.Create();
244 257
245 // Generate VAO 258 // Generate VAO
246 glGenVertexArrays(1, &vertex_array_handle); 259 vertex_array.Create();
247 260
248 state.draw.vertex_array = vertex_array_handle; 261 state.draw.vertex_array = vertex_array.handle;
249 state.draw.vertex_buffer = vertex_buffer_handle; 262 state.draw.vertex_buffer = vertex_buffer.handle;
250 state.draw.uniform_buffer = 0; 263 state.draw.uniform_buffer = 0;
251 state.Apply(); 264 state.Apply();
252 265
@@ -258,13 +271,13 @@ void RendererOpenGL::InitOpenGLObjects() {
258 glEnableVertexAttribArray(attrib_tex_coord); 271 glEnableVertexAttribArray(attrib_tex_coord);
259 272
260 // Allocate textures for each screen 273 // Allocate textures for each screen
261 for (auto& texture : textures) { 274 for (auto& screen_info : screen_infos) {
262 glGenTextures(1, &texture.handle); 275 screen_info.texture.resource.Create();
263 276
264 // Allocation of storage is deferred until the first frame, when we 277 // Allocation of storage is deferred until the first frame, when we
265 // know the framebuffer size. 278 // know the framebuffer size.
266 279
267 state.texture_units[0].texture_2d = texture.handle; 280 state.texture_units[0].texture_2d = screen_info.texture.resource.handle;
268 state.Apply(); 281 state.Apply();
269 282
270 glActiveTexture(GL_TEXTURE0); 283 glActiveTexture(GL_TEXTURE0);
@@ -273,6 +286,8 @@ void RendererOpenGL::InitOpenGLObjects() {
273 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); 286 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
274 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); 287 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
275 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); 288 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
289
290 screen_info.display_texture = screen_info.texture.resource.handle;
276 } 291 }
277 292
278 state.texture_units[0].texture_2d = 0; 293 state.texture_units[0].texture_2d = 0;
@@ -327,30 +342,38 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
327 UNIMPLEMENTED(); 342 UNIMPLEMENTED();
328 } 343 }
329 344
330 state.texture_units[0].texture_2d = texture.handle; 345 state.texture_units[0].texture_2d = texture.resource.handle;
331 state.Apply(); 346 state.Apply();
332 347
333 glActiveTexture(GL_TEXTURE0); 348 glActiveTexture(GL_TEXTURE0);
334 glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0, 349 glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0,
335 texture.gl_format, texture.gl_type, nullptr); 350 texture.gl_format, texture.gl_type, nullptr);
351
352 state.texture_units[0].texture_2d = 0;
353 state.Apply();
336} 354}
337 355
338/** 356/**
339 * Draws a single texture to the emulator window, rotating the texture to correct for the 3DS's LCD rotation. 357 * Draws a single texture to the emulator window, rotating the texture to correct for the 3DS's LCD rotation.
340 */ 358 */
341void RendererOpenGL::DrawSingleScreenRotated(const TextureInfo& texture, float x, float y, float w, float h) { 359void RendererOpenGL::DrawSingleScreenRotated(const ScreenInfo& screen_info, float x, float y, float w, float h) {
360 auto& texcoords = screen_info.display_texcoords;
361
342 std::array<ScreenRectVertex, 4> vertices = {{ 362 std::array<ScreenRectVertex, 4> vertices = {{
343 ScreenRectVertex(x, y, 1.f, 0.f), 363 ScreenRectVertex(x, y, texcoords.bottom, texcoords.left),
344 ScreenRectVertex(x+w, y, 1.f, 1.f), 364 ScreenRectVertex(x+w, y, texcoords.bottom, texcoords.right),
345 ScreenRectVertex(x, y+h, 0.f, 0.f), 365 ScreenRectVertex(x, y+h, texcoords.top, texcoords.left),
346 ScreenRectVertex(x+w, y+h, 0.f, 1.f), 366 ScreenRectVertex(x+w, y+h, texcoords.top, texcoords.right),
347 }}; 367 }};
348 368
349 state.texture_units[0].texture_2d = texture.handle; 369 state.texture_units[0].texture_2d = screen_info.display_texture;
350 state.Apply(); 370 state.Apply();
351 371
352 glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices.data()); 372 glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices.data());
353 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); 373 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
374
375 state.texture_units[0].texture_2d = 0;
376 state.Apply();
354} 377}
355 378
356/** 379/**
@@ -362,9 +385,6 @@ void RendererOpenGL::DrawScreens() {
362 glViewport(0, 0, layout.width, layout.height); 385 glViewport(0, 0, layout.width, layout.height);
363 glClear(GL_COLOR_BUFFER_BIT); 386 glClear(GL_COLOR_BUFFER_BIT);
364 387
365 state.draw.shader_program = program_id;
366 state.Apply();
367
368 // Set projection matrix 388 // Set projection matrix
369 std::array<GLfloat, 3 * 2> ortho_matrix = MakeOrthographicMatrix((float)layout.width, 389 std::array<GLfloat, 3 * 2> ortho_matrix = MakeOrthographicMatrix((float)layout.width,
370 (float)layout.height); 390 (float)layout.height);
@@ -374,9 +394,9 @@ void RendererOpenGL::DrawScreens() {
374 glActiveTexture(GL_TEXTURE0); 394 glActiveTexture(GL_TEXTURE0);
375 glUniform1i(uniform_color_texture, 0); 395 glUniform1i(uniform_color_texture, 0);
376 396
377 DrawSingleScreenRotated(textures[0], (float)layout.top_screen.left, (float)layout.top_screen.top, 397 DrawSingleScreenRotated(screen_infos[0], (float)layout.top_screen.left, (float)layout.top_screen.top,
378 (float)layout.top_screen.GetWidth(), (float)layout.top_screen.GetHeight()); 398 (float)layout.top_screen.GetWidth(), (float)layout.top_screen.GetHeight());
379 DrawSingleScreenRotated(textures[1], (float)layout.bottom_screen.left,(float)layout.bottom_screen.top, 399 DrawSingleScreenRotated(screen_infos[1], (float)layout.bottom_screen.left,(float)layout.bottom_screen.top,
380 (float)layout.bottom_screen.GetWidth(), (float)layout.bottom_screen.GetHeight()); 400 (float)layout.bottom_screen.GetWidth(), (float)layout.bottom_screen.GetHeight());
381 401
382 m_current_frame++; 402 m_current_frame++;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index fe4d142a5..5ca5255ac 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -11,10 +11,28 @@
11#include "core/hw/gpu.h" 11#include "core/hw/gpu.h"
12 12
13#include "video_core/renderer_base.h" 13#include "video_core/renderer_base.h"
14#include "video_core/renderer_opengl/gl_resource_manager.h"
14#include "video_core/renderer_opengl/gl_state.h" 15#include "video_core/renderer_opengl/gl_state.h"
15 16
16class EmuWindow; 17class EmuWindow;
17 18
19/// Structure used for storing information about the textures for each 3DS screen
20struct TextureInfo {
21 OGLTexture resource;
22 GLsizei width;
23 GLsizei height;
24 GPU::Regs::PixelFormat format;
25 GLenum gl_format;
26 GLenum gl_type;
27};
28
29/// Structure used for storing information about the display target for each 3DS screen
30struct ScreenInfo {
31 GLuint display_texture;
32 MathUtil::Rectangle<float> display_texcoords;
33 TextureInfo texture;
34};
35
18class RendererOpenGL : public RendererBase { 36class RendererOpenGL : public RendererBase {
19public: 37public:
20 38
@@ -37,26 +55,16 @@ public:
37 void ShutDown() override; 55 void ShutDown() override;
38 56
39private: 57private:
40 /// Structure used for storing information about the textures for each 3DS screen
41 struct TextureInfo {
42 GLuint handle;
43 GLsizei width;
44 GLsizei height;
45 GPU::Regs::PixelFormat format;
46 GLenum gl_format;
47 GLenum gl_type;
48 };
49
50 void InitOpenGLObjects(); 58 void InitOpenGLObjects();
51 void ConfigureFramebufferTexture(TextureInfo& texture, 59 void ConfigureFramebufferTexture(TextureInfo& texture,
52 const GPU::Regs::FramebufferConfig& framebuffer); 60 const GPU::Regs::FramebufferConfig& framebuffer);
53 void DrawScreens(); 61 void DrawScreens();
54 void DrawSingleScreenRotated(const TextureInfo& texture, float x, float y, float w, float h); 62 void DrawSingleScreenRotated(const ScreenInfo& screen_info, float x, float y, float w, float h);
55 void UpdateFramerate(); 63 void UpdateFramerate();
56 64
57 // Loads framebuffer from emulated memory into the active OpenGL texture. 65 // Loads framebuffer from emulated memory into the display information structure
58 void LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig& framebuffer, 66 void LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer,
59 const TextureInfo& texture); 67 ScreenInfo& screen_info);
60 // Fills active OpenGL texture with the given RGB color. 68 // Fills active OpenGL texture with the given RGB color.
61 void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, 69 void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b,
62 const TextureInfo& texture); 70 const TextureInfo& texture);
@@ -69,10 +77,10 @@ private:
69 OpenGLState state; 77 OpenGLState state;
70 78
71 // OpenGL object IDs 79 // OpenGL object IDs
72 GLuint vertex_array_handle; 80 OGLVertexArray vertex_array;
73 GLuint vertex_buffer_handle; 81 OGLBuffer vertex_buffer;
74 GLuint program_id; 82 OGLShader shader;
75 std::array<TextureInfo, 2> textures; ///< Textures for top and bottom screens respectively 83 std::array<ScreenInfo, 2> screen_infos; ///< Display information for top and bottom screens respectively
76 // Shader uniform location indices 84 // Shader uniform location indices
77 GLuint uniform_modelview_matrix; 85 GLuint uniform_modelview_matrix;
78 GLuint uniform_color_texture; 86 GLuint uniform_color_texture;
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index 75301accd..043e99190 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -9,7 +9,6 @@
9 9
10#include "common/hash.h" 10#include "common/hash.h"
11#include "common/microprofile.h" 11#include "common/microprofile.h"
12#include "common/profiler.h"
13 12
14#include "video_core/debug_utils/debug_utils.h" 13#include "video_core/debug_utils/debug_utils.h"
15#include "video_core/pica.h" 14#include "video_core/pica.h"
@@ -57,13 +56,11 @@ void Shutdown() {
57#endif // ARCHITECTURE_x86_64 56#endif // ARCHITECTURE_x86_64
58} 57}
59 58
60static Common::Profiling::TimingCategory shader_category("Vertex Shader");
61MICROPROFILE_DEFINE(GPU_VertexShader, "GPU", "Vertex Shader", MP_RGB(50, 50, 240)); 59MICROPROFILE_DEFINE(GPU_VertexShader, "GPU", "Vertex Shader", MP_RGB(50, 50, 240));
62 60
63OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes) { 61OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes) {
64 auto& config = g_state.regs.vs; 62 auto& config = g_state.regs.vs;
65 63
66 Common::Profiling::ScopeTimer timer(shader_category);
67 MICROPROFILE_SCOPE(GPU_VertexShader); 64 MICROPROFILE_SCOPE(GPU_VertexShader);
68 65
69 state.program_counter = config.main_offset; 66 state.program_counter = config.main_offset;
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index 9c5bd97bd..9ce9344d2 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -25,7 +25,7 @@ namespace Pica {
25namespace Shader { 25namespace Shader {
26 26
27struct InputVertex { 27struct InputVertex {
28 Math::Vec4<float24> attr[16]; 28 alignas(16) Math::Vec4<float24> attr[16];
29}; 29};
30 30
31struct OutputVertex { 31struct OutputVertex {
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index b47d3beda..b7747fa42 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -148,7 +148,7 @@ static Instruction GetVertexShaderInstruction(size_t offset) {
148} 148}
149 149
150static void LogCritical(const char* msg) { 150static void LogCritical(const char* msg) {
151 LOG_CRITICAL(HW_GPU, msg); 151 LOG_CRITICAL(HW_GPU, "%s", msg);
152} 152}
153 153
154void JitShader::Compile_Assert(bool condition, const char* msg) { 154void JitShader::Compile_Assert(bool condition, const char* msg) {
@@ -795,6 +795,8 @@ void JitShader::FindReturnOffsets() {
795 case OpCode::Id::CALLU: 795 case OpCode::Id::CALLU:
796 return_offsets.push_back(instr.flow_control.dest_offset + instr.flow_control.num_instructions); 796 return_offsets.push_back(instr.flow_control.dest_offset + instr.flow_control.num_instructions);
797 break; 797 break;
798 default:
799 break;
798 } 800 }
799 } 801 }
800 802
@@ -854,7 +856,7 @@ void JitShader::Compile() {
854 uintptr_t size = reinterpret_cast<uintptr_t>(GetCodePtr()) - reinterpret_cast<uintptr_t>(program); 856 uintptr_t size = reinterpret_cast<uintptr_t>(GetCodePtr()) - reinterpret_cast<uintptr_t>(program);
855 ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!"); 857 ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!");
856 858
857 LOG_DEBUG(HW_GPU, "Compiled shader size=%d", size); 859 LOG_DEBUG(HW_GPU, "Compiled shader size=%lu", size);
858} 860}
859 861
860JitShader::JitShader() { 862JitShader::JitShader() {
diff --git a/src/video_core/swrasterizer.h b/src/video_core/swrasterizer.h
index 9a9a76d7a..090f899bc 100644
--- a/src/video_core/swrasterizer.h
+++ b/src/video_core/swrasterizer.h
@@ -11,16 +11,14 @@
11namespace VideoCore { 11namespace VideoCore {
12 12
13class SWRasterizer : public RasterizerInterface { 13class SWRasterizer : public RasterizerInterface {
14 void InitObjects() override {}
15 void Reset() override {}
16 void AddTriangle(const Pica::Shader::OutputVertex& v0, 14 void AddTriangle(const Pica::Shader::OutputVertex& v0,
17 const Pica::Shader::OutputVertex& v1, 15 const Pica::Shader::OutputVertex& v1,
18 const Pica::Shader::OutputVertex& v2) override; 16 const Pica::Shader::OutputVertex& v2) override;
19 void DrawTriangles() override {} 17 void DrawTriangles() override {}
20 void FlushFramebuffer() override {}
21 void NotifyPicaRegisterChanged(u32 id) override {} 18 void NotifyPicaRegisterChanged(u32 id) override {}
19 void FlushAll() override {}
22 void FlushRegion(PAddr addr, u32 size) override {} 20 void FlushRegion(PAddr addr, u32 size) override {}
23 void InvalidateRegion(PAddr addr, u32 size) override {} 21 void FlushAndInvalidateRegion(PAddr addr, u32 size) override {}
24}; 22};
25 23
26} 24}
diff --git a/src/video_core/vertex_loader.cpp b/src/video_core/vertex_loader.cpp
new file mode 100644
index 000000000..8a3d91896
--- /dev/null
+++ b/src/video_core/vertex_loader.cpp
@@ -0,0 +1,140 @@
1#include <cmath>
2#include <string>
3
4#include "boost/range/algorithm/fill.hpp"
5
6#include "common/assert.h"
7#include "common/alignment.h"
8#include "common/bit_field.h"
9#include "common/common_funcs.h"
10#include "common/common_types.h"
11#include "common/logging/log.h"
12
13#include "core/memory.h"
14
15#include "video_core/debug_utils/debug_utils.h"
16#include "video_core/pica.h"
17#include "video_core/pica_state.h"
18#include "video_core/pica_types.h"
19#include "video_core/vertex_loader.h"
20
21namespace Pica {
22
23void VertexLoader::Setup(const Pica::Regs& regs) {
24 const auto& attribute_config = regs.vertex_attributes;
25 num_total_attributes = attribute_config.GetNumTotalAttributes();
26
27 boost::fill(vertex_attribute_sources, 0xdeadbeef);
28
29 for (int i = 0; i < 16; i++) {
30 vertex_attribute_is_default[i] = attribute_config.IsDefaultAttribute(i);
31 }
32
33 // Setup attribute data from loaders
34 for (int loader = 0; loader < 12; ++loader) {
35 const auto& loader_config = attribute_config.attribute_loaders[loader];
36
37 u32 offset = 0;
38
39 // TODO: What happens if a loader overwrites a previous one's data?
40 for (unsigned component = 0; component < loader_config.component_count; ++component) {
41 if (component >= 12) {
42 LOG_ERROR(HW_GPU, "Overflow in the vertex attribute loader %u trying to load component %u", loader, component);
43 continue;
44 }
45
46 u32 attribute_index = loader_config.GetComponent(component);
47 if (attribute_index < 12) {
48 offset = Common::AlignUp(offset, attribute_config.GetElementSizeInBytes(attribute_index));
49 vertex_attribute_sources[attribute_index] = loader_config.data_offset + offset;
50 vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count);
51 vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index);
52 vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index);
53 offset += attribute_config.GetStride(attribute_index);
54 } else if (attribute_index < 16) {
55 // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively
56 offset = Common::AlignUp(offset, 4);
57 offset += (attribute_index - 11) * 4;
58 } else {
59 UNREACHABLE(); // This is truly unreachable due to the number of bits for each component
60 }
61 }
62 }
63}
64
65void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, DebugUtils::MemoryAccessTracker& memory_accesses) {
66 for (int i = 0; i < num_total_attributes; ++i) {
67 if (vertex_attribute_elements[i] != 0) {
68 // Load per-vertex data from the loader arrays
69 u32 source_addr = base_address + vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex;
70
71 if (g_debug_context && Pica::g_debug_context->recorder) {
72 memory_accesses.AddAccess(source_addr, vertex_attribute_elements[i] * (
73 (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4
74 : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1));
75 }
76
77 switch (vertex_attribute_formats[i]) {
78 case Regs::VertexAttributeFormat::BYTE:
79 {
80 const s8* srcdata = reinterpret_cast<const s8*>(Memory::GetPhysicalPointer(source_addr));
81 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
82 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
83 }
84 break;
85 }
86 case Regs::VertexAttributeFormat::UBYTE:
87 {
88 const u8* srcdata = reinterpret_cast<const u8*>(Memory::GetPhysicalPointer(source_addr));
89 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
90 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
91 }
92 break;
93 }
94 case Regs::VertexAttributeFormat::SHORT:
95 {
96 const s16* srcdata = reinterpret_cast<const s16*>(Memory::GetPhysicalPointer(source_addr));
97 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
98 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
99 }
100 break;
101 }
102 case Regs::VertexAttributeFormat::FLOAT:
103 {
104 const float* srcdata = reinterpret_cast<const float*>(Memory::GetPhysicalPointer(source_addr));
105 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
106 input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
107 }
108 break;
109 }
110 }
111
112 // Default attribute values set if array elements have < 4 components. This
113 // is *not* carried over from the default attribute settings even if they're
114 // enabled for this attribute.
115 for (unsigned int comp = vertex_attribute_elements[i]; comp < 4; ++comp) {
116 input.attr[i][comp] = comp == 3 ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f);
117 }
118
119 LOG_TRACE(HW_GPU, "Loaded %d components of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f %f %f %f",
120 vertex_attribute_elements[i], i, vertex, index,
121 base_address,
122 vertex_attribute_sources[i],
123 vertex_attribute_strides[i] * vertex,
124 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
125 } else if (vertex_attribute_is_default[i]) {
126 // Load the default attribute if we're configured to do so
127 input.attr[i] = g_state.vs.default_attributes[i];
128 LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)",
129 i, vertex, index,
130 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(),
131 input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
132 } else {
133 // TODO(yuriks): In this case, no data gets loaded and the vertex
134 // remains with the last value it had. This isn't currently maintained
135 // as global state, however, and so won't work in Citra yet.
136 }
137 }
138}
139
140} // namespace Pica \ No newline at end of file
diff --git a/src/video_core/vertex_loader.h b/src/video_core/vertex_loader.h
new file mode 100644
index 000000000..ff42d1596
--- /dev/null
+++ b/src/video_core/vertex_loader.h
@@ -0,0 +1,28 @@
1#pragma once
2
3#include <iterator>
4#include <algorithm>
5
6#include "video_core/pica.h"
7#include "video_core/shader/shader.h"
8#include "video_core/debug_utils/debug_utils.h"
9
10namespace Pica {
11
12class VertexLoader {
13public:
14 void Setup(const Pica::Regs& regs);
15 void LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, DebugUtils::MemoryAccessTracker& memory_accesses);
16
17 int GetNumTotalAttributes() const { return num_total_attributes; }
18
19private:
20 u32 vertex_attribute_sources[16];
21 u32 vertex_attribute_strides[16] = {};
22 Regs::VertexAttributeFormat vertex_attribute_formats[16] = {};
23 u32 vertex_attribute_elements[16] = {};
24 bool vertex_attribute_is_default[16];
25 int num_total_attributes;
26};
27
28} // namespace Pica
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index 256899c89..855286173 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -25,6 +25,7 @@ std::unique_ptr<RendererBase> g_renderer; ///< Renderer plugin
25 25
26std::atomic<bool> g_hw_renderer_enabled; 26std::atomic<bool> g_hw_renderer_enabled;
27std::atomic<bool> g_shader_jit_enabled; 27std::atomic<bool> g_shader_jit_enabled;
28std::atomic<bool> g_scaled_resolution_enabled;
28 29
29/// Initialize the video core 30/// Initialize the video core
30bool Init(EmuWindow* emu_window) { 31bool Init(EmuWindow* emu_window) {
diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h
index bca67fb8c..30267489e 100644
--- a/src/video_core/video_core.h
+++ b/src/video_core/video_core.h
@@ -36,6 +36,7 @@ extern EmuWindow* g_emu_window; ///< Emu window
36// TODO: Wrap these in a user settings struct along with any other graphics settings (often set from qt ui) 36// TODO: Wrap these in a user settings struct along with any other graphics settings (often set from qt ui)
37extern std::atomic<bool> g_hw_renderer_enabled; 37extern std::atomic<bool> g_hw_renderer_enabled;
38extern std::atomic<bool> g_shader_jit_enabled; 38extern std::atomic<bool> g_shader_jit_enabled;
39extern std::atomic<bool> g_scaled_resolution_enabled;
39 40
40/// Start the video core 41/// Start the video core
41void Start(); 42void Start();