summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
m---------externals/cubeb0
-rw-r--r--src/audio_core/audio_renderer.h14
-rw-r--r--src/audio_core/codec.cpp4
-rw-r--r--src/audio_core/cubeb_sink.cpp23
-rw-r--r--src/audio_core/cubeb_sink.h4
-rw-r--r--src/common/color.h40
-rw-r--r--src/common/math_util.h4
-rw-r--r--src/common/quaternion.h10
-rw-r--r--src/common/swap.h6
-rw-r--r--src/common/vector_math.h4
-rw-r--r--src/core/file_sys/vfs_vector.cpp2
-rw-r--r--src/core/frontend/emu_window.cpp2
-rw-r--r--src/core/frontend/emu_window.h2
-rw-r--r--src/core/frontend/framebuffer_layout.cpp12
-rw-r--r--src/core/frontend/framebuffer_layout.h2
-rw-r--r--src/core/frontend/input.h2
-rw-r--r--src/core/hle/kernel/errors.h1
-rw-r--r--src/core/hle/kernel/handle_table.cpp40
-rw-r--r--src/core/hle/kernel/handle_table.h25
-rw-r--r--src/core/hle/kernel/process.cpp8
-rw-r--r--src/core/hle/kernel/process_capability.cpp4
-rw-r--r--src/core/hle/kernel/process_capability.h4
-rw-r--r--src/core/hle/service/audio/audren_u.cpp26
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.h2
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.cpp2
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.h4
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.cpp76
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.h11
-rw-r--r--src/core/hle/service/vi/display/vi_display.cpp49
-rw-r--r--src/core/hle/service/vi/display/vi_display.h74
-rw-r--r--src/core/hle/service/vi/layer/vi_layer.cpp3
-rw-r--r--src/core/hle/service/vi/layer/vi_layer.h37
-rw-r--r--src/core/hle/service/vi/vi.cpp29
-rw-r--r--src/core/memory.cpp17
-rw-r--r--src/input_common/motion_emu.cpp28
-rw-r--r--src/video_core/dma_pusher.cpp57
-rw-r--r--src/video_core/dma_pusher.h5
-rw-r--r--src/video_core/engines/fermi_2d.cpp8
-rw-r--r--src/video_core/engines/maxwell_3d.cpp66
-rw-r--r--src/video_core/engines/maxwell_3d.h17
-rw-r--r--src/video_core/engines/shader_bytecode.h6
-rw-r--r--src/video_core/engines/shader_header.h41
-rw-r--r--src/video_core/gpu.h2
-rw-r--r--src/video_core/rasterizer_cache.h18
-rw-r--r--src/video_core/rasterizer_interface.h4
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp32
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h4
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp142
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h91
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp44
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp4
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp1
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_memory_manager.cpp2
-rw-r--r--src/video_core/shader/decode/memory.cpp2
-rw-r--r--src/video_core/shader/decode/other.cpp15
-rw-r--r--src/video_core/shader/track.cpp10
-rw-r--r--src/yuzu/debugger/graphics/graphics_surface.cpp2
60 files changed, 803 insertions, 349 deletions
diff --git a/externals/cubeb b/externals/cubeb
Subproject 12b78c0edfa40007e41dbdcd9dfe367fbb98d01 Subproject 6f2420de8f155b10330cf973900ac7bdbfee589
diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h
index 201ec7a3c..b2e5d336c 100644
--- a/src/audio_core/audio_renderer.h
+++ b/src/audio_core/audio_renderer.h
@@ -46,16 +46,18 @@ struct AudioRendererParameter {
46 u32_le sample_rate; 46 u32_le sample_rate;
47 u32_le sample_count; 47 u32_le sample_count;
48 u32_le mix_buffer_count; 48 u32_le mix_buffer_count;
49 u32_le unknown_c; 49 u32_le submix_count;
50 u32_le voice_count; 50 u32_le voice_count;
51 u32_le sink_count; 51 u32_le sink_count;
52 u32_le effect_count; 52 u32_le effect_count;
53 u32_le unknown_1c; 53 u32_le performance_frame_count;
54 u8 unknown_20; 54 u8 is_voice_drop_enabled;
55 INSERT_PADDING_BYTES(3); 55 u8 unknown_21;
56 u8 unknown_22;
57 u8 execution_mode;
56 u32_le splitter_count; 58 u32_le splitter_count;
57 u32_le unknown_2c; 59 u32_le num_splitter_send_channels;
58 INSERT_PADDING_WORDS(1); 60 u32_le unknown_30;
59 u32_le revision; 61 u32_le revision;
60}; 62};
61static_assert(sizeof(AudioRendererParameter) == 52, "AudioRendererParameter is an invalid size"); 63static_assert(sizeof(AudioRendererParameter) == 52, "AudioRendererParameter is an invalid size");
diff --git a/src/audio_core/codec.cpp b/src/audio_core/codec.cpp
index 454de798b..c5a0d98ce 100644
--- a/src/audio_core/codec.cpp
+++ b/src/audio_core/codec.cpp
@@ -68,8 +68,8 @@ std::vector<s16> DecodeADPCM(const u8* const data, std::size_t size, const ADPCM
68 } 68 }
69 } 69 }
70 70
71 state.yn1 = yn1; 71 state.yn1 = static_cast<s16>(yn1);
72 state.yn2 = yn2; 72 state.yn2 = static_cast<s16>(yn2);
73 73
74 return ret; 74 return ret;
75} 75}
diff --git a/src/audio_core/cubeb_sink.cpp b/src/audio_core/cubeb_sink.cpp
index 097328901..1da0b9f2a 100644
--- a/src/audio_core/cubeb_sink.cpp
+++ b/src/audio_core/cubeb_sink.cpp
@@ -12,6 +12,10 @@
12#include "common/ring_buffer.h" 12#include "common/ring_buffer.h"
13#include "core/settings.h" 13#include "core/settings.h"
14 14
15#ifdef _MSC_VER
16#include <objbase.h>
17#endif
18
15namespace AudioCore { 19namespace AudioCore {
16 20
17class CubebSinkStream final : public SinkStream { 21class CubebSinkStream final : public SinkStream {
@@ -46,7 +50,7 @@ public:
46 } 50 }
47 } 51 }
48 52
49 ~CubebSinkStream() { 53 ~CubebSinkStream() override {
50 if (!ctx) { 54 if (!ctx) {
51 return; 55 return;
52 } 56 }
@@ -75,11 +79,11 @@ public:
75 queue.Push(samples); 79 queue.Push(samples);
76 } 80 }
77 81
78 std::size_t SamplesInQueue(u32 num_channels) const override { 82 std::size_t SamplesInQueue(u32 channel_count) const override {
79 if (!ctx) 83 if (!ctx)
80 return 0; 84 return 0;
81 85
82 return queue.Size() / num_channels; 86 return queue.Size() / channel_count;
83 } 87 }
84 88
85 void Flush() override { 89 void Flush() override {
@@ -98,7 +102,7 @@ private:
98 u32 num_channels{}; 102 u32 num_channels{};
99 103
100 Common::RingBuffer<s16, 0x10000> queue; 104 Common::RingBuffer<s16, 0x10000> queue;
101 std::array<s16, 2> last_frame; 105 std::array<s16, 2> last_frame{};
102 std::atomic<bool> should_flush{}; 106 std::atomic<bool> should_flush{};
103 TimeStretcher time_stretch; 107 TimeStretcher time_stretch;
104 108
@@ -108,6 +112,11 @@ private:
108}; 112};
109 113
110CubebSink::CubebSink(std::string_view target_device_name) { 114CubebSink::CubebSink(std::string_view target_device_name) {
115 // Cubeb requires COM to be initialized on the thread calling cubeb_init on Windows
116#ifdef _MSC_VER
117 com_init_result = CoInitializeEx(nullptr, COINIT_MULTITHREADED);
118#endif
119
111 if (cubeb_init(&ctx, "yuzu", nullptr) != CUBEB_OK) { 120 if (cubeb_init(&ctx, "yuzu", nullptr) != CUBEB_OK) {
112 LOG_CRITICAL(Audio_Sink, "cubeb_init failed"); 121 LOG_CRITICAL(Audio_Sink, "cubeb_init failed");
113 return; 122 return;
@@ -142,6 +151,12 @@ CubebSink::~CubebSink() {
142 } 151 }
143 152
144 cubeb_destroy(ctx); 153 cubeb_destroy(ctx);
154
155#ifdef _MSC_VER
156 if (SUCCEEDED(com_init_result)) {
157 CoUninitialize();
158 }
159#endif
145} 160}
146 161
147SinkStream& CubebSink::AcquireSinkStream(u32 sample_rate, u32 num_channels, 162SinkStream& CubebSink::AcquireSinkStream(u32 sample_rate, u32 num_channels,
diff --git a/src/audio_core/cubeb_sink.h b/src/audio_core/cubeb_sink.h
index efb9d1634..511df7bb1 100644
--- a/src/audio_core/cubeb_sink.h
+++ b/src/audio_core/cubeb_sink.h
@@ -25,6 +25,10 @@ private:
25 cubeb* ctx{}; 25 cubeb* ctx{};
26 cubeb_devid output_device{}; 26 cubeb_devid output_device{};
27 std::vector<SinkStreamPtr> sink_streams; 27 std::vector<SinkStreamPtr> sink_streams;
28
29#ifdef _MSC_VER
30 u32 com_init_result = 0;
31#endif
28}; 32};
29 33
30std::vector<std::string> ListCubebSinkDevices(); 34std::vector<std::string> ListCubebSinkDevices();
diff --git a/src/common/color.h b/src/common/color.h
index 0379040be..3a2222077 100644
--- a/src/common/color.h
+++ b/src/common/color.h
@@ -55,36 +55,36 @@ constexpr u8 Convert8To6(u8 value) {
55/** 55/**
56 * Decode a color stored in RGBA8 format 56 * Decode a color stored in RGBA8 format
57 * @param bytes Pointer to encoded source color 57 * @param bytes Pointer to encoded source color
58 * @return Result color decoded as Math::Vec4<u8> 58 * @return Result color decoded as Common::Vec4<u8>
59 */ 59 */
60inline Math::Vec4<u8> DecodeRGBA8(const u8* bytes) { 60inline Common::Vec4<u8> DecodeRGBA8(const u8* bytes) {
61 return {bytes[3], bytes[2], bytes[1], bytes[0]}; 61 return {bytes[3], bytes[2], bytes[1], bytes[0]};
62} 62}
63 63
64/** 64/**
65 * Decode a color stored in RGB8 format 65 * Decode a color stored in RGB8 format
66 * @param bytes Pointer to encoded source color 66 * @param bytes Pointer to encoded source color
67 * @return Result color decoded as Math::Vec4<u8> 67 * @return Result color decoded as Common::Vec4<u8>
68 */ 68 */
69inline Math::Vec4<u8> DecodeRGB8(const u8* bytes) { 69inline Common::Vec4<u8> DecodeRGB8(const u8* bytes) {
70 return {bytes[2], bytes[1], bytes[0], 255}; 70 return {bytes[2], bytes[1], bytes[0], 255};
71} 71}
72 72
73/** 73/**
74 * Decode a color stored in RG8 (aka HILO8) format 74 * Decode a color stored in RG8 (aka HILO8) format
75 * @param bytes Pointer to encoded source color 75 * @param bytes Pointer to encoded source color
76 * @return Result color decoded as Math::Vec4<u8> 76 * @return Result color decoded as Common::Vec4<u8>
77 */ 77 */
78inline Math::Vec4<u8> DecodeRG8(const u8* bytes) { 78inline Common::Vec4<u8> DecodeRG8(const u8* bytes) {
79 return {bytes[1], bytes[0], 0, 255}; 79 return {bytes[1], bytes[0], 0, 255};
80} 80}
81 81
82/** 82/**
83 * Decode a color stored in RGB565 format 83 * Decode a color stored in RGB565 format
84 * @param bytes Pointer to encoded source color 84 * @param bytes Pointer to encoded source color
85 * @return Result color decoded as Math::Vec4<u8> 85 * @return Result color decoded as Common::Vec4<u8>
86 */ 86 */
87inline Math::Vec4<u8> DecodeRGB565(const u8* bytes) { 87inline Common::Vec4<u8> DecodeRGB565(const u8* bytes) {
88 u16_le pixel; 88 u16_le pixel;
89 std::memcpy(&pixel, bytes, sizeof(pixel)); 89 std::memcpy(&pixel, bytes, sizeof(pixel));
90 return {Convert5To8((pixel >> 11) & 0x1F), Convert6To8((pixel >> 5) & 0x3F), 90 return {Convert5To8((pixel >> 11) & 0x1F), Convert6To8((pixel >> 5) & 0x3F),
@@ -94,9 +94,9 @@ inline Math::Vec4<u8> DecodeRGB565(const u8* bytes) {
94/** 94/**
95 * Decode a color stored in RGB5A1 format 95 * Decode a color stored in RGB5A1 format
96 * @param bytes Pointer to encoded source color 96 * @param bytes Pointer to encoded source color
97 * @return Result color decoded as Math::Vec4<u8> 97 * @return Result color decoded as Common::Vec4<u8>
98 */ 98 */
99inline Math::Vec4<u8> DecodeRGB5A1(const u8* bytes) { 99inline Common::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
100 u16_le pixel; 100 u16_le pixel;
101 std::memcpy(&pixel, bytes, sizeof(pixel)); 101 std::memcpy(&pixel, bytes, sizeof(pixel));
102 return {Convert5To8((pixel >> 11) & 0x1F), Convert5To8((pixel >> 6) & 0x1F), 102 return {Convert5To8((pixel >> 11) & 0x1F), Convert5To8((pixel >> 6) & 0x1F),
@@ -106,9 +106,9 @@ inline Math::Vec4<u8> DecodeRGB5A1(const u8* bytes) {
106/** 106/**
107 * Decode a color stored in RGBA4 format 107 * Decode a color stored in RGBA4 format
108 * @param bytes Pointer to encoded source color 108 * @param bytes Pointer to encoded source color
109 * @return Result color decoded as Math::Vec4<u8> 109 * @return Result color decoded as Common::Vec4<u8>
110 */ 110 */
111inline Math::Vec4<u8> DecodeRGBA4(const u8* bytes) { 111inline Common::Vec4<u8> DecodeRGBA4(const u8* bytes) {
112 u16_le pixel; 112 u16_le pixel;
113 std::memcpy(&pixel, bytes, sizeof(pixel)); 113 std::memcpy(&pixel, bytes, sizeof(pixel));
114 return {Convert4To8((pixel >> 12) & 0xF), Convert4To8((pixel >> 8) & 0xF), 114 return {Convert4To8((pixel >> 12) & 0xF), Convert4To8((pixel >> 8) & 0xF),
@@ -138,9 +138,9 @@ inline u32 DecodeD24(const u8* bytes) {
138/** 138/**
139 * Decode a depth value and a stencil value stored in D24S8 format 139 * Decode a depth value and a stencil value stored in D24S8 format
140 * @param bytes Pointer to encoded source values 140 * @param bytes Pointer to encoded source values
141 * @return Resulting values stored as a Math::Vec2 141 * @return Resulting values stored as a Common::Vec2
142 */ 142 */
143inline Math::Vec2<u32> DecodeD24S8(const u8* bytes) { 143inline Common::Vec2<u32> DecodeD24S8(const u8* bytes) {
144 return {static_cast<u32>((bytes[2] << 16) | (bytes[1] << 8) | bytes[0]), bytes[3]}; 144 return {static_cast<u32>((bytes[2] << 16) | (bytes[1] << 8) | bytes[0]), bytes[3]};
145} 145}
146 146
@@ -149,7 +149,7 @@ inline Math::Vec2<u32> DecodeD24S8(const u8* bytes) {
149 * @param color Source color to encode 149 * @param color Source color to encode
150 * @param bytes Destination pointer to store encoded color 150 * @param bytes Destination pointer to store encoded color
151 */ 151 */
152inline void EncodeRGBA8(const Math::Vec4<u8>& color, u8* bytes) { 152inline void EncodeRGBA8(const Common::Vec4<u8>& color, u8* bytes) {
153 bytes[3] = color.r(); 153 bytes[3] = color.r();
154 bytes[2] = color.g(); 154 bytes[2] = color.g();
155 bytes[1] = color.b(); 155 bytes[1] = color.b();
@@ -161,7 +161,7 @@ inline void EncodeRGBA8(const Math::Vec4<u8>& color, u8* bytes) {
161 * @param color Source color to encode 161 * @param color Source color to encode
162 * @param bytes Destination pointer to store encoded color 162 * @param bytes Destination pointer to store encoded color
163 */ 163 */
164inline void EncodeRGB8(const Math::Vec4<u8>& color, u8* bytes) { 164inline void EncodeRGB8(const Common::Vec4<u8>& color, u8* bytes) {
165 bytes[2] = color.r(); 165 bytes[2] = color.r();
166 bytes[1] = color.g(); 166 bytes[1] = color.g();
167 bytes[0] = color.b(); 167 bytes[0] = color.b();
@@ -172,7 +172,7 @@ inline void EncodeRGB8(const Math::Vec4<u8>& color, u8* bytes) {
172 * @param color Source color to encode 172 * @param color Source color to encode
173 * @param bytes Destination pointer to store encoded color 173 * @param bytes Destination pointer to store encoded color
174 */ 174 */
175inline void EncodeRG8(const Math::Vec4<u8>& color, u8* bytes) { 175inline void EncodeRG8(const Common::Vec4<u8>& color, u8* bytes) {
176 bytes[1] = color.r(); 176 bytes[1] = color.r();
177 bytes[0] = color.g(); 177 bytes[0] = color.g();
178} 178}
@@ -181,7 +181,7 @@ inline void EncodeRG8(const Math::Vec4<u8>& color, u8* bytes) {
181 * @param color Source color to encode 181 * @param color Source color to encode
182 * @param bytes Destination pointer to store encoded color 182 * @param bytes Destination pointer to store encoded color
183 */ 183 */
184inline void EncodeRGB565(const Math::Vec4<u8>& color, u8* bytes) { 184inline void EncodeRGB565(const Common::Vec4<u8>& color, u8* bytes) {
185 const u16_le data = 185 const u16_le data =
186 (Convert8To5(color.r()) << 11) | (Convert8To6(color.g()) << 5) | Convert8To5(color.b()); 186 (Convert8To5(color.r()) << 11) | (Convert8To6(color.g()) << 5) | Convert8To5(color.b());
187 187
@@ -193,7 +193,7 @@ inline void EncodeRGB565(const Math::Vec4<u8>& color, u8* bytes) {
193 * @param color Source color to encode 193 * @param color Source color to encode
194 * @param bytes Destination pointer to store encoded color 194 * @param bytes Destination pointer to store encoded color
195 */ 195 */
196inline void EncodeRGB5A1(const Math::Vec4<u8>& color, u8* bytes) { 196inline void EncodeRGB5A1(const Common::Vec4<u8>& color, u8* bytes) {
197 const u16_le data = (Convert8To5(color.r()) << 11) | (Convert8To5(color.g()) << 6) | 197 const u16_le data = (Convert8To5(color.r()) << 11) | (Convert8To5(color.g()) << 6) |
198 (Convert8To5(color.b()) << 1) | Convert8To1(color.a()); 198 (Convert8To5(color.b()) << 1) | Convert8To1(color.a());
199 199
@@ -205,7 +205,7 @@ inline void EncodeRGB5A1(const Math::Vec4<u8>& color, u8* bytes) {
205 * @param color Source color to encode 205 * @param color Source color to encode
206 * @param bytes Destination pointer to store encoded color 206 * @param bytes Destination pointer to store encoded color
207 */ 207 */
208inline void EncodeRGBA4(const Math::Vec4<u8>& color, u8* bytes) { 208inline void EncodeRGBA4(const Common::Vec4<u8>& color, u8* bytes) {
209 const u16 data = (Convert8To4(color.r()) << 12) | (Convert8To4(color.g()) << 8) | 209 const u16 data = (Convert8To4(color.r()) << 12) | (Convert8To4(color.g()) << 8) |
210 (Convert8To4(color.b()) << 4) | Convert8To4(color.a()); 210 (Convert8To4(color.b()) << 4) | Convert8To4(color.a());
211 211
diff --git a/src/common/math_util.h b/src/common/math_util.h
index 94b4394c5..cff3d48c5 100644
--- a/src/common/math_util.h
+++ b/src/common/math_util.h
@@ -7,7 +7,7 @@
7#include <cstdlib> 7#include <cstdlib>
8#include <type_traits> 8#include <type_traits>
9 9
10namespace MathUtil { 10namespace Common {
11 11
12constexpr float PI = 3.14159265f; 12constexpr float PI = 3.14159265f;
13 13
@@ -41,4 +41,4 @@ struct Rectangle {
41 } 41 }
42}; 42};
43 43
44} // namespace MathUtil 44} // namespace Common
diff --git a/src/common/quaternion.h b/src/common/quaternion.h
index c528c0b68..370198ae0 100644
--- a/src/common/quaternion.h
+++ b/src/common/quaternion.h
@@ -6,12 +6,12 @@
6 6
7#include "common/vector_math.h" 7#include "common/vector_math.h"
8 8
9namespace Math { 9namespace Common {
10 10
11template <typename T> 11template <typename T>
12class Quaternion { 12class Quaternion {
13public: 13public:
14 Math::Vec3<T> xyz; 14 Vec3<T> xyz;
15 T w{}; 15 T w{};
16 16
17 Quaternion<decltype(-T{})> Inverse() const { 17 Quaternion<decltype(-T{})> Inverse() const {
@@ -38,12 +38,12 @@ public:
38}; 38};
39 39
40template <typename T> 40template <typename T>
41auto QuaternionRotate(const Quaternion<T>& q, const Math::Vec3<T>& v) { 41auto QuaternionRotate(const Quaternion<T>& q, const Vec3<T>& v) {
42 return v + 2 * Cross(q.xyz, Cross(q.xyz, v) + v * q.w); 42 return v + 2 * Cross(q.xyz, Cross(q.xyz, v) + v * q.w);
43} 43}
44 44
45inline Quaternion<float> MakeQuaternion(const Math::Vec3<float>& axis, float angle) { 45inline Quaternion<float> MakeQuaternion(const Vec3<float>& axis, float angle) {
46 return {axis * std::sin(angle / 2), std::cos(angle / 2)}; 46 return {axis * std::sin(angle / 2), std::cos(angle / 2)};
47} 47}
48 48
49} // namespace Math 49} // namespace Common
diff --git a/src/common/swap.h b/src/common/swap.h
index 32af0b6ac..0e219747f 100644
--- a/src/common/swap.h
+++ b/src/common/swap.h
@@ -28,8 +28,8 @@
28#include <cstring> 28#include <cstring>
29#include "common/common_types.h" 29#include "common/common_types.h"
30 30
31// GCC 4.6+ 31// GCC
32#if __GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6) 32#ifdef __GNUC__
33 33
34#if __BYTE_ORDER__ && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && !defined(COMMON_LITTLE_ENDIAN) 34#if __BYTE_ORDER__ && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && !defined(COMMON_LITTLE_ENDIAN)
35#define COMMON_LITTLE_ENDIAN 1 35#define COMMON_LITTLE_ENDIAN 1
@@ -38,7 +38,7 @@
38#endif 38#endif
39 39
40// LLVM/clang 40// LLVM/clang
41#elif __clang__ 41#elif defined(__clang__)
42 42
43#if __LITTLE_ENDIAN__ && !defined(COMMON_LITTLE_ENDIAN) 43#if __LITTLE_ENDIAN__ && !defined(COMMON_LITTLE_ENDIAN)
44#define COMMON_LITTLE_ENDIAN 1 44#define COMMON_LITTLE_ENDIAN 1
diff --git a/src/common/vector_math.h b/src/common/vector_math.h
index 8feb49941..429485329 100644
--- a/src/common/vector_math.h
+++ b/src/common/vector_math.h
@@ -33,7 +33,7 @@
33#include <cmath> 33#include <cmath>
34#include <type_traits> 34#include <type_traits>
35 35
36namespace Math { 36namespace Common {
37 37
38template <typename T> 38template <typename T>
39class Vec2; 39class Vec2;
@@ -690,4 +690,4 @@ constexpr Vec4<T> MakeVec(const T& x, const Vec3<T>& yzw) {
690 return MakeVec(x, yzw[0], yzw[1], yzw[2]); 690 return MakeVec(x, yzw[0], yzw[1], yzw[2]);
691} 691}
692 692
693} // namespace Math 693} // namespace Common
diff --git a/src/core/file_sys/vfs_vector.cpp b/src/core/file_sys/vfs_vector.cpp
index 515626658..75fc04302 100644
--- a/src/core/file_sys/vfs_vector.cpp
+++ b/src/core/file_sys/vfs_vector.cpp
@@ -47,7 +47,7 @@ std::size_t VectorVfsFile::Write(const u8* data_, std::size_t length, std::size_
47 if (offset + length > data.size()) 47 if (offset + length > data.size())
48 data.resize(offset + length); 48 data.resize(offset + length);
49 const auto write = std::min(length, data.size() - offset); 49 const auto write = std::min(length, data.size() - offset);
50 std::memcpy(data.data(), data_, write); 50 std::memcpy(data.data() + offset, data_, write);
51 return write; 51 return write;
52} 52}
53 53
diff --git a/src/core/frontend/emu_window.cpp b/src/core/frontend/emu_window.cpp
index 9dd493efb..e29afd630 100644
--- a/src/core/frontend/emu_window.cpp
+++ b/src/core/frontend/emu_window.cpp
@@ -67,7 +67,7 @@ static bool IsWithinTouchscreen(const Layout::FramebufferLayout& layout, unsigne
67 framebuffer_x >= layout.screen.left && framebuffer_x < layout.screen.right); 67 framebuffer_x >= layout.screen.left && framebuffer_x < layout.screen.right);
68} 68}
69 69
70std::tuple<unsigned, unsigned> EmuWindow::ClipToTouchScreen(unsigned new_x, unsigned new_y) { 70std::tuple<unsigned, unsigned> EmuWindow::ClipToTouchScreen(unsigned new_x, unsigned new_y) const {
71 new_x = std::max(new_x, framebuffer_layout.screen.left); 71 new_x = std::max(new_x, framebuffer_layout.screen.left);
72 new_x = std::min(new_x, framebuffer_layout.screen.right - 1); 72 new_x = std::min(new_x, framebuffer_layout.screen.right - 1);
73 73
diff --git a/src/core/frontend/emu_window.h b/src/core/frontend/emu_window.h
index 7006a37b3..d0bcb4660 100644
--- a/src/core/frontend/emu_window.h
+++ b/src/core/frontend/emu_window.h
@@ -166,7 +166,7 @@ private:
166 /** 166 /**
167 * Clip the provided coordinates to be inside the touchscreen area. 167 * Clip the provided coordinates to be inside the touchscreen area.
168 */ 168 */
169 std::tuple<unsigned, unsigned> ClipToTouchScreen(unsigned new_x, unsigned new_y); 169 std::tuple<unsigned, unsigned> ClipToTouchScreen(unsigned new_x, unsigned new_y) const;
170}; 170};
171 171
172} // namespace Core::Frontend 172} // namespace Core::Frontend
diff --git a/src/core/frontend/framebuffer_layout.cpp b/src/core/frontend/framebuffer_layout.cpp
index f8662d193..a1357179f 100644
--- a/src/core/frontend/framebuffer_layout.cpp
+++ b/src/core/frontend/framebuffer_layout.cpp
@@ -12,12 +12,12 @@ namespace Layout {
12 12
13// Finds the largest size subrectangle contained in window area that is confined to the aspect ratio 13// Finds the largest size subrectangle contained in window area that is confined to the aspect ratio
14template <class T> 14template <class T>
15static MathUtil::Rectangle<T> maxRectangle(MathUtil::Rectangle<T> window_area, 15static Common::Rectangle<T> MaxRectangle(Common::Rectangle<T> window_area,
16 float screen_aspect_ratio) { 16 float screen_aspect_ratio) {
17 float scale = std::min(static_cast<float>(window_area.GetWidth()), 17 float scale = std::min(static_cast<float>(window_area.GetWidth()),
18 window_area.GetHeight() / screen_aspect_ratio); 18 window_area.GetHeight() / screen_aspect_ratio);
19 return MathUtil::Rectangle<T>{0, 0, static_cast<T>(std::round(scale)), 19 return Common::Rectangle<T>{0, 0, static_cast<T>(std::round(scale)),
20 static_cast<T>(std::round(scale * screen_aspect_ratio))}; 20 static_cast<T>(std::round(scale * screen_aspect_ratio))};
21} 21}
22 22
23FramebufferLayout DefaultFrameLayout(unsigned width, unsigned height) { 23FramebufferLayout DefaultFrameLayout(unsigned width, unsigned height) {
@@ -29,8 +29,8 @@ FramebufferLayout DefaultFrameLayout(unsigned width, unsigned height) {
29 29
30 const float emulation_aspect_ratio{static_cast<float>(ScreenUndocked::Height) / 30 const float emulation_aspect_ratio{static_cast<float>(ScreenUndocked::Height) /
31 ScreenUndocked::Width}; 31 ScreenUndocked::Width};
32 MathUtil::Rectangle<unsigned> screen_window_area{0, 0, width, height}; 32 Common::Rectangle<unsigned> screen_window_area{0, 0, width, height};
33 MathUtil::Rectangle<unsigned> screen = maxRectangle(screen_window_area, emulation_aspect_ratio); 33 Common::Rectangle<unsigned> screen = MaxRectangle(screen_window_area, emulation_aspect_ratio);
34 34
35 float window_aspect_ratio = static_cast<float>(height) / width; 35 float window_aspect_ratio = static_cast<float>(height) / width;
36 36
diff --git a/src/core/frontend/framebuffer_layout.h b/src/core/frontend/framebuffer_layout.h
index e06647794..c2c63d08c 100644
--- a/src/core/frontend/framebuffer_layout.h
+++ b/src/core/frontend/framebuffer_layout.h
@@ -16,7 +16,7 @@ struct FramebufferLayout {
16 unsigned width{ScreenUndocked::Width}; 16 unsigned width{ScreenUndocked::Width};
17 unsigned height{ScreenUndocked::Height}; 17 unsigned height{ScreenUndocked::Height};
18 18
19 MathUtil::Rectangle<unsigned> screen; 19 Common::Rectangle<unsigned> screen;
20 20
21 /** 21 /**
22 * Returns the ration of pixel size of the screen, compared to the native size of the undocked 22 * Returns the ration of pixel size of the screen, compared to the native size of the undocked
diff --git a/src/core/frontend/input.h b/src/core/frontend/input.h
index 16fdcd376..7c11d7546 100644
--- a/src/core/frontend/input.h
+++ b/src/core/frontend/input.h
@@ -124,7 +124,7 @@ using AnalogDevice = InputDevice<std::tuple<float, float>>;
124 * Orientation is determined by right-hand rule. 124 * Orientation is determined by right-hand rule.
125 * Units: deg/sec 125 * Units: deg/sec
126 */ 126 */
127using MotionDevice = InputDevice<std::tuple<Math::Vec3<float>, Math::Vec3<float>>>; 127using MotionDevice = InputDevice<std::tuple<Common::Vec3<float>, Common::Vec3<float>>>;
128 128
129/** 129/**
130 * A touch device is an input device that returns a tuple of two floats and a bool. The floats are 130 * A touch device is an input device that returns a tuple of two floats and a bool. The floats are
diff --git a/src/core/hle/kernel/errors.h b/src/core/hle/kernel/errors.h
index d17eb0cb6..8097b3863 100644
--- a/src/core/hle/kernel/errors.h
+++ b/src/core/hle/kernel/errors.h
@@ -14,6 +14,7 @@ constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED{ErrorModule::Kernel, 7};
14constexpr ResultCode ERR_INVALID_CAPABILITY_DESCRIPTOR{ErrorModule::Kernel, 14}; 14constexpr ResultCode ERR_INVALID_CAPABILITY_DESCRIPTOR{ErrorModule::Kernel, 14};
15constexpr ResultCode ERR_INVALID_SIZE{ErrorModule::Kernel, 101}; 15constexpr ResultCode ERR_INVALID_SIZE{ErrorModule::Kernel, 101};
16constexpr ResultCode ERR_INVALID_ADDRESS{ErrorModule::Kernel, 102}; 16constexpr ResultCode ERR_INVALID_ADDRESS{ErrorModule::Kernel, 102};
17constexpr ResultCode ERR_OUT_OF_MEMORY{ErrorModule::Kernel, 104};
17constexpr ResultCode ERR_HANDLE_TABLE_FULL{ErrorModule::Kernel, 105}; 18constexpr ResultCode ERR_HANDLE_TABLE_FULL{ErrorModule::Kernel, 105};
18constexpr ResultCode ERR_INVALID_ADDRESS_STATE{ErrorModule::Kernel, 106}; 19constexpr ResultCode ERR_INVALID_ADDRESS_STATE{ErrorModule::Kernel, 106};
19constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS{ErrorModule::Kernel, 108}; 20constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS{ErrorModule::Kernel, 108};
diff --git a/src/core/hle/kernel/handle_table.cpp b/src/core/hle/kernel/handle_table.cpp
index c8acde5b1..bdfaa977f 100644
--- a/src/core/hle/kernel/handle_table.cpp
+++ b/src/core/hle/kernel/handle_table.cpp
@@ -14,32 +14,47 @@
14namespace Kernel { 14namespace Kernel {
15namespace { 15namespace {
16constexpr u16 GetSlot(Handle handle) { 16constexpr u16 GetSlot(Handle handle) {
17 return handle >> 15; 17 return static_cast<u16>(handle >> 15);
18} 18}
19 19
20constexpr u16 GetGeneration(Handle handle) { 20constexpr u16 GetGeneration(Handle handle) {
21 return handle & 0x7FFF; 21 return static_cast<u16>(handle & 0x7FFF);
22} 22}
23} // Anonymous namespace 23} // Anonymous namespace
24 24
25HandleTable::HandleTable() { 25HandleTable::HandleTable() {
26 next_generation = 1;
27 Clear(); 26 Clear();
28} 27}
29 28
30HandleTable::~HandleTable() = default; 29HandleTable::~HandleTable() = default;
31 30
31ResultCode HandleTable::SetSize(s32 handle_table_size) {
32 if (static_cast<u32>(handle_table_size) > MAX_COUNT) {
33 return ERR_OUT_OF_MEMORY;
34 }
35
36 // Values less than or equal to zero indicate to use the maximum allowable
37 // size for the handle table in the actual kernel, so we ignore the given
38 // value in that case, since we assume this by default unless this function
39 // is called.
40 if (handle_table_size > 0) {
41 table_size = static_cast<u16>(handle_table_size);
42 }
43
44 return RESULT_SUCCESS;
45}
46
32ResultVal<Handle> HandleTable::Create(SharedPtr<Object> obj) { 47ResultVal<Handle> HandleTable::Create(SharedPtr<Object> obj) {
33 DEBUG_ASSERT(obj != nullptr); 48 DEBUG_ASSERT(obj != nullptr);
34 49
35 u16 slot = next_free_slot; 50 const u16 slot = next_free_slot;
36 if (slot >= generations.size()) { 51 if (slot >= table_size) {
37 LOG_ERROR(Kernel, "Unable to allocate Handle, too many slots in use."); 52 LOG_ERROR(Kernel, "Unable to allocate Handle, too many slots in use.");
38 return ERR_HANDLE_TABLE_FULL; 53 return ERR_HANDLE_TABLE_FULL;
39 } 54 }
40 next_free_slot = generations[slot]; 55 next_free_slot = generations[slot];
41 56
42 u16 generation = next_generation++; 57 const u16 generation = next_generation++;
43 58
44 // Overflow count so it fits in the 15 bits dedicated to the generation in the handle. 59 // Overflow count so it fits in the 15 bits dedicated to the generation in the handle.
45 // Horizon OS uses zero to represent an invalid handle, so skip to 1. 60 // Horizon OS uses zero to represent an invalid handle, so skip to 1.
@@ -64,10 +79,11 @@ ResultVal<Handle> HandleTable::Duplicate(Handle handle) {
64} 79}
65 80
66ResultCode HandleTable::Close(Handle handle) { 81ResultCode HandleTable::Close(Handle handle) {
67 if (!IsValid(handle)) 82 if (!IsValid(handle)) {
68 return ERR_INVALID_HANDLE; 83 return ERR_INVALID_HANDLE;
84 }
69 85
70 u16 slot = GetSlot(handle); 86 const u16 slot = GetSlot(handle);
71 87
72 objects[slot] = nullptr; 88 objects[slot] = nullptr;
73 89
@@ -77,10 +93,10 @@ ResultCode HandleTable::Close(Handle handle) {
77} 93}
78 94
79bool HandleTable::IsValid(Handle handle) const { 95bool HandleTable::IsValid(Handle handle) const {
80 std::size_t slot = GetSlot(handle); 96 const std::size_t slot = GetSlot(handle);
81 u16 generation = GetGeneration(handle); 97 const u16 generation = GetGeneration(handle);
82 98
83 return slot < MAX_COUNT && objects[slot] != nullptr && generations[slot] == generation; 99 return slot < table_size && objects[slot] != nullptr && generations[slot] == generation;
84} 100}
85 101
86SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const { 102SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const {
@@ -97,7 +113,7 @@ SharedPtr<Object> HandleTable::GetGeneric(Handle handle) const {
97} 113}
98 114
99void HandleTable::Clear() { 115void HandleTable::Clear() {
100 for (u16 i = 0; i < MAX_COUNT; ++i) { 116 for (u16 i = 0; i < table_size; ++i) {
101 generations[i] = i + 1; 117 generations[i] = i + 1;
102 objects[i] = nullptr; 118 objects[i] = nullptr;
103 } 119 }
diff --git a/src/core/hle/kernel/handle_table.h b/src/core/hle/kernel/handle_table.h
index 89a3bc740..44901391b 100644
--- a/src/core/hle/kernel/handle_table.h
+++ b/src/core/hle/kernel/handle_table.h
@@ -50,6 +50,20 @@ public:
50 ~HandleTable(); 50 ~HandleTable();
51 51
52 /** 52 /**
53 * Sets the number of handles that may be in use at one time
54 * for this handle table.
55 *
56 * @param handle_table_size The desired size to limit the handle table to.
57 *
58 * @returns an error code indicating if initialization was successful.
59 * If initialization was not successful, then ERR_OUT_OF_MEMORY
60 * will be returned.
61 *
62 * @pre handle_table_size must be within the range [0, 1024]
63 */
64 ResultCode SetSize(s32 handle_table_size);
65
66 /**
53 * Allocates a handle for the given object. 67 * Allocates a handle for the given object.
54 * @return The created Handle or one of the following errors: 68 * @return The created Handle or one of the following errors:
55 * - `ERR_HANDLE_TABLE_FULL`: the maximum number of handles has been exceeded. 69 * - `ERR_HANDLE_TABLE_FULL`: the maximum number of handles has been exceeded.
@@ -104,13 +118,20 @@ private:
104 std::array<u16, MAX_COUNT> generations; 118 std::array<u16, MAX_COUNT> generations;
105 119
106 /** 120 /**
121 * The limited size of the handle table. This can be specified by process
122 * capabilities in order to restrict the overall number of handles that
123 * can be created in a process instance
124 */
125 u16 table_size = static_cast<u16>(MAX_COUNT);
126
127 /**
107 * Global counter of the number of created handles. Stored in `generations` when a handle is 128 * Global counter of the number of created handles. Stored in `generations` when a handle is
108 * created, and wraps around to 1 when it hits 0x8000. 129 * created, and wraps around to 1 when it hits 0x8000.
109 */ 130 */
110 u16 next_generation; 131 u16 next_generation = 1;
111 132
112 /// Head of the free slots linked list. 133 /// Head of the free slots linked list.
113 u16 next_free_slot; 134 u16 next_free_slot = 0;
114}; 135};
115 136
116} // namespace Kernel 137} // namespace Kernel
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index c5aa19afa..8009150e0 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -99,7 +99,13 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
99 vm_manager.Reset(metadata.GetAddressSpaceType()); 99 vm_manager.Reset(metadata.GetAddressSpaceType());
100 100
101 const auto& caps = metadata.GetKernelCapabilities(); 101 const auto& caps = metadata.GetKernelCapabilities();
102 return capabilities.InitializeForUserProcess(caps.data(), caps.size(), vm_manager); 102 const auto capability_init_result =
103 capabilities.InitializeForUserProcess(caps.data(), caps.size(), vm_manager);
104 if (capability_init_result.IsError()) {
105 return capability_init_result;
106 }
107
108 return handle_table.SetSize(capabilities.GetHandleTableSize());
103} 109}
104 110
105void Process::Run(VAddr entry_point, s32 main_thread_priority, u32 stack_size) { 111void Process::Run(VAddr entry_point, s32 main_thread_priority, u32 stack_size) {
diff --git a/src/core/hle/kernel/process_capability.cpp b/src/core/hle/kernel/process_capability.cpp
index 3a2164b25..583e35b79 100644
--- a/src/core/hle/kernel/process_capability.cpp
+++ b/src/core/hle/kernel/process_capability.cpp
@@ -96,7 +96,7 @@ void ProcessCapabilities::InitializeForMetadatalessProcess() {
96 interrupt_capabilities.set(); 96 interrupt_capabilities.set();
97 97
98 // Allow using the maximum possible amount of handles 98 // Allow using the maximum possible amount of handles
99 handle_table_size = static_cast<u32>(HandleTable::MAX_COUNT); 99 handle_table_size = static_cast<s32>(HandleTable::MAX_COUNT);
100 100
101 // Allow all debugging capabilities. 101 // Allow all debugging capabilities.
102 is_debuggable = true; 102 is_debuggable = true;
@@ -337,7 +337,7 @@ ResultCode ProcessCapabilities::HandleHandleTableFlags(u32 flags) {
337 return ERR_RESERVED_VALUE; 337 return ERR_RESERVED_VALUE;
338 } 338 }
339 339
340 handle_table_size = (flags >> 16) & 0x3FF; 340 handle_table_size = static_cast<s32>((flags >> 16) & 0x3FF);
341 return RESULT_SUCCESS; 341 return RESULT_SUCCESS;
342} 342}
343 343
diff --git a/src/core/hle/kernel/process_capability.h b/src/core/hle/kernel/process_capability.h
index fbc8812a3..5cdd80747 100644
--- a/src/core/hle/kernel/process_capability.h
+++ b/src/core/hle/kernel/process_capability.h
@@ -156,7 +156,7 @@ public:
156 } 156 }
157 157
158 /// Gets the number of total allowable handles for the process' handle table. 158 /// Gets the number of total allowable handles for the process' handle table.
159 u32 GetHandleTableSize() const { 159 s32 GetHandleTableSize() const {
160 return handle_table_size; 160 return handle_table_size;
161 } 161 }
162 162
@@ -252,7 +252,7 @@ private:
252 u64 core_mask = 0; 252 u64 core_mask = 0;
253 u64 priority_mask = 0; 253 u64 priority_mask = 0;
254 254
255 u32 handle_table_size = 0; 255 s32 handle_table_size = 0;
256 u32 kernel_version = 0; 256 u32 kernel_version = 0;
257 257
258 ProgramType program_type = ProgramType::SysModule; 258 ProgramType program_type = ProgramType::SysModule;
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp
index 7e0cc64a8..49648394c 100644
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -262,20 +262,20 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
262 LOG_DEBUG(Service_Audio, "called"); 262 LOG_DEBUG(Service_Audio, "called");
263 263
264 u64 buffer_sz = Common::AlignUp(4 * params.mix_buffer_count, 0x40); 264 u64 buffer_sz = Common::AlignUp(4 * params.mix_buffer_count, 0x40);
265 buffer_sz += params.unknown_c * 1024; 265 buffer_sz += params.submix_count * 1024;
266 buffer_sz += 0x940 * (params.unknown_c + 1); 266 buffer_sz += 0x940 * (params.submix_count + 1);
267 buffer_sz += 0x3F0 * params.voice_count; 267 buffer_sz += 0x3F0 * params.voice_count;
268 buffer_sz += Common::AlignUp(8 * (params.unknown_c + 1), 0x10); 268 buffer_sz += Common::AlignUp(8 * (params.submix_count + 1), 0x10);
269 buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10); 269 buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10);
270 buffer_sz += 270 buffer_sz += Common::AlignUp(
271 Common::AlignUp((0x3C0 * (params.sink_count + params.unknown_c) + 4 * params.sample_count) * 271 (0x3C0 * (params.sink_count + params.submix_count) + 4 * params.sample_count) *
272 (params.mix_buffer_count + 6), 272 (params.mix_buffer_count + 6),
273 0x40); 273 0x40);
274 274
275 if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) { 275 if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
276 u32 count = params.unknown_c + 1; 276 const u32 count = params.submix_count + 1;
277 u64 node_count = Common::AlignUp(count, 0x40); 277 u64 node_count = Common::AlignUp(count, 0x40);
278 u64 node_state_buffer_sz = 278 const u64 node_state_buffer_sz =
279 4 * (node_count * node_count) + 0xC * node_count + 2 * (node_count / 8); 279 4 * (node_count * node_count) + 0xC * node_count + 2 * (node_count / 8);
280 u64 edge_matrix_buffer_sz = 0; 280 u64 edge_matrix_buffer_sz = 0;
281 node_count = Common::AlignUp(count * count, 0x40); 281 node_count = Common::AlignUp(count * count, 0x40);
@@ -289,19 +289,19 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
289 289
290 buffer_sz += 0x20 * (params.effect_count + 4 * params.voice_count) + 0x50; 290 buffer_sz += 0x20 * (params.effect_count + 4 * params.voice_count) + 0x50;
291 if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) { 291 if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
292 buffer_sz += 0xE0 * params.unknown_2c; 292 buffer_sz += 0xE0 * params.num_splitter_send_channels;
293 buffer_sz += 0x20 * params.splitter_count; 293 buffer_sz += 0x20 * params.splitter_count;
294 buffer_sz += Common::AlignUp(4 * params.unknown_2c, 0x10); 294 buffer_sz += Common::AlignUp(4 * params.num_splitter_send_channels, 0x10);
295 } 295 }
296 buffer_sz = Common::AlignUp(buffer_sz, 0x40) + 0x170 * params.sink_count; 296 buffer_sz = Common::AlignUp(buffer_sz, 0x40) + 0x170 * params.sink_count;
297 u64 output_sz = buffer_sz + 0x280 * params.sink_count + 0x4B0 * params.effect_count + 297 u64 output_sz = buffer_sz + 0x280 * params.sink_count + 0x4B0 * params.effect_count +
298 ((params.voice_count * 256) | 0x40); 298 ((params.voice_count * 256) | 0x40);
299 299
300 if (params.unknown_1c >= 1) { 300 if (params.performance_frame_count >= 1) {
301 output_sz = Common::AlignUp(((16 * params.sink_count + 16 * params.effect_count + 301 output_sz = Common::AlignUp(((16 * params.sink_count + 16 * params.effect_count +
302 16 * params.voice_count + 16) + 302 16 * params.voice_count + 16) +
303 0x658) * 303 0x658) *
304 (params.unknown_1c + 1) + 304 (params.performance_frame_count + 1) +
305 0xc0, 305 0xc0,
306 0x40) + 306 0x40) +
307 output_sz; 307 output_sz;
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
index 21ccfe1f8..dbe7ee6e8 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -23,7 +23,7 @@ u32 nvdisp_disp0::ioctl(Ioctl command, const std::vector<u8>& input, std::vector
23 23
24void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, 24void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height,
25 u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform, 25 u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform,
26 const MathUtil::Rectangle<int>& crop_rect) { 26 const Common::Rectangle<int>& crop_rect) {
27 VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle); 27 VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle);
28 LOG_TRACE(Service, 28 LOG_TRACE(Service,
29 "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}", 29 "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
index a45086e45..ace71169f 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
@@ -25,7 +25,7 @@ public:
25 /// Performs a screen flip, drawing the buffer pointed to by the handle. 25 /// Performs a screen flip, drawing the buffer pointed to by the handle.
26 void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride, 26 void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride,
27 NVFlinger::BufferQueue::BufferTransformFlags transform, 27 NVFlinger::BufferQueue::BufferTransformFlags transform,
28 const MathUtil::Rectangle<int>& crop_rect); 28 const Common::Rectangle<int>& crop_rect);
29 29
30private: 30private:
31 std::shared_ptr<nvmap> nvmap_dev; 31 std::shared_ptr<nvmap> nvmap_dev;
diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp
index fc07d9bb8..4d150fc71 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue.cpp
@@ -63,7 +63,7 @@ const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const {
63} 63}
64 64
65void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform, 65void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform,
66 const MathUtil::Rectangle<int>& crop_rect) { 66 const Common::Rectangle<int>& crop_rect) {
67 auto itr = std::find_if(queue.begin(), queue.end(), 67 auto itr = std::find_if(queue.begin(), queue.end(),
68 [&](const Buffer& buffer) { return buffer.slot == slot; }); 68 [&](const Buffer& buffer) { return buffer.slot == slot; });
69 ASSERT(itr != queue.end()); 69 ASSERT(itr != queue.end());
diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h
index ab90d591e..e1ccb6171 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.h
+++ b/src/core/hle/service/nvflinger/buffer_queue.h
@@ -67,14 +67,14 @@ public:
67 Status status = Status::Free; 67 Status status = Status::Free;
68 IGBPBuffer igbp_buffer; 68 IGBPBuffer igbp_buffer;
69 BufferTransformFlags transform; 69 BufferTransformFlags transform;
70 MathUtil::Rectangle<int> crop_rect; 70 Common::Rectangle<int> crop_rect;
71 }; 71 };
72 72
73 void SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer); 73 void SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer);
74 std::optional<u32> DequeueBuffer(u32 width, u32 height); 74 std::optional<u32> DequeueBuffer(u32 width, u32 height);
75 const IGBPBuffer& RequestBuffer(u32 slot) const; 75 const IGBPBuffer& RequestBuffer(u32 slot) const;
76 void QueueBuffer(u32 slot, BufferTransformFlags transform, 76 void QueueBuffer(u32 slot, BufferTransformFlags transform,
77 const MathUtil::Rectangle<int>& crop_rect); 77 const Common::Rectangle<int>& crop_rect);
78 std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer(); 78 std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer();
79 void ReleaseBuffer(u32 slot); 79 void ReleaseBuffer(u32 slot);
80 u32 Query(QueryType type); 80 u32 Query(QueryType type);
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index b5d452db1..56f31e2ac 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -28,9 +28,13 @@ namespace Service::NVFlinger {
28constexpr std::size_t SCREEN_REFRESH_RATE = 60; 28constexpr std::size_t SCREEN_REFRESH_RATE = 60;
29constexpr u64 frame_ticks = static_cast<u64>(Core::Timing::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE); 29constexpr u64 frame_ticks = static_cast<u64>(Core::Timing::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE);
30 30
31NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing) 31NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing) : core_timing{core_timing} {
32 : displays{{0, "Default"}, {1, "External"}, {2, "Edid"}, {3, "Internal"}, {4, "Null"}}, 32 displays.emplace_back(0, "Default");
33 core_timing{core_timing} { 33 displays.emplace_back(1, "External");
34 displays.emplace_back(2, "Edid");
35 displays.emplace_back(3, "Internal");
36 displays.emplace_back(4, "Null");
37
34 // Schedule the screen composition events 38 // Schedule the screen composition events
35 composition_event = 39 composition_event =
36 core_timing.RegisterEvent("ScreenComposition", [this](u64 userdata, int cycles_late) { 40 core_timing.RegisterEvent("ScreenComposition", [this](u64 userdata, int cycles_late) {
@@ -55,13 +59,14 @@ std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) {
55 // TODO(Subv): Currently we only support the Default display. 59 // TODO(Subv): Currently we only support the Default display.
56 ASSERT(name == "Default"); 60 ASSERT(name == "Default");
57 61
58 const auto itr = std::find_if(displays.begin(), displays.end(), 62 const auto itr =
59 [&](const VI::Display& display) { return display.name == name; }); 63 std::find_if(displays.begin(), displays.end(),
64 [&](const VI::Display& display) { return display.GetName() == name; });
60 if (itr == displays.end()) { 65 if (itr == displays.end()) {
61 return {}; 66 return {};
62 } 67 }
63 68
64 return itr->id; 69 return itr->GetID();
65} 70}
66 71
67std::optional<u64> NVFlinger::CreateLayer(u64 display_id) { 72std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
@@ -71,13 +76,10 @@ std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
71 return {}; 76 return {};
72 } 77 }
73 78
74 ASSERT_MSG(display->layers.empty(), "Only one layer is supported per display at the moment");
75
76 const u64 layer_id = next_layer_id++; 79 const u64 layer_id = next_layer_id++;
77 const u32 buffer_queue_id = next_buffer_queue_id++; 80 const u32 buffer_queue_id = next_buffer_queue_id++;
78 auto buffer_queue = std::make_shared<BufferQueue>(buffer_queue_id, layer_id); 81 buffer_queues.emplace_back(buffer_queue_id, layer_id);
79 display->layers.emplace_back(layer_id, buffer_queue); 82 display->CreateLayer(layer_id, buffer_queues.back());
80 buffer_queues.emplace_back(std::move(buffer_queue));
81 return layer_id; 83 return layer_id;
82} 84}
83 85
@@ -88,7 +90,7 @@ std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) co
88 return {}; 90 return {};
89 } 91 }
90 92
91 return layer->buffer_queue->GetId(); 93 return layer->GetBufferQueue().GetId();
92} 94}
93 95
94Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_id) const { 96Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_id) const {
@@ -98,12 +100,20 @@ Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_i
98 return nullptr; 100 return nullptr;
99 } 101 }
100 102
101 return display->vsync_event.readable; 103 return display->GetVSyncEvent();
102} 104}
103 105
104std::shared_ptr<BufferQueue> NVFlinger::FindBufferQueue(u32 id) const { 106BufferQueue& NVFlinger::FindBufferQueue(u32 id) {
105 const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(), 107 const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
106 [&](const auto& queue) { return queue->GetId() == id; }); 108 [id](const auto& queue) { return queue.GetId() == id; });
109
110 ASSERT(itr != buffer_queues.end());
111 return *itr;
112}
113
114const BufferQueue& NVFlinger::FindBufferQueue(u32 id) const {
115 const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(),
116 [id](const auto& queue) { return queue.GetId() == id; });
107 117
108 ASSERT(itr != buffer_queues.end()); 118 ASSERT(itr != buffer_queues.end());
109 return *itr; 119 return *itr;
@@ -112,7 +122,7 @@ std::shared_ptr<BufferQueue> NVFlinger::FindBufferQueue(u32 id) const {
112VI::Display* NVFlinger::FindDisplay(u64 display_id) { 122VI::Display* NVFlinger::FindDisplay(u64 display_id) {
113 const auto itr = 123 const auto itr =
114 std::find_if(displays.begin(), displays.end(), 124 std::find_if(displays.begin(), displays.end(),
115 [&](const VI::Display& display) { return display.id == display_id; }); 125 [&](const VI::Display& display) { return display.GetID() == display_id; });
116 126
117 if (itr == displays.end()) { 127 if (itr == displays.end()) {
118 return nullptr; 128 return nullptr;
@@ -124,7 +134,7 @@ VI::Display* NVFlinger::FindDisplay(u64 display_id) {
124const VI::Display* NVFlinger::FindDisplay(u64 display_id) const { 134const VI::Display* NVFlinger::FindDisplay(u64 display_id) const {
125 const auto itr = 135 const auto itr =
126 std::find_if(displays.begin(), displays.end(), 136 std::find_if(displays.begin(), displays.end(),
127 [&](const VI::Display& display) { return display.id == display_id; }); 137 [&](const VI::Display& display) { return display.GetID() == display_id; });
128 138
129 if (itr == displays.end()) { 139 if (itr == displays.end()) {
130 return nullptr; 140 return nullptr;
@@ -140,14 +150,7 @@ VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) {
140 return nullptr; 150 return nullptr;
141 } 151 }
142 152
143 const auto itr = std::find_if(display->layers.begin(), display->layers.end(), 153 return display->FindLayer(layer_id);
144 [&](const VI::Layer& layer) { return layer.id == layer_id; });
145
146 if (itr == display->layers.end()) {
147 return nullptr;
148 }
149
150 return &*itr;
151} 154}
152 155
153const VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) const { 156const VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) const {
@@ -157,33 +160,24 @@ const VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) const {
157 return nullptr; 160 return nullptr;
158 } 161 }
159 162
160 const auto itr = std::find_if(display->layers.begin(), display->layers.end(), 163 return display->FindLayer(layer_id);
161 [&](const VI::Layer& layer) { return layer.id == layer_id; });
162
163 if (itr == display->layers.end()) {
164 return nullptr;
165 }
166
167 return &*itr;
168} 164}
169 165
170void NVFlinger::Compose() { 166void NVFlinger::Compose() {
171 for (auto& display : displays) { 167 for (auto& display : displays) {
172 // Trigger vsync for this display at the end of drawing 168 // Trigger vsync for this display at the end of drawing
173 SCOPE_EXIT({ display.vsync_event.writable->Signal(); }); 169 SCOPE_EXIT({ display.SignalVSyncEvent(); });
174 170
175 // Don't do anything for displays without layers. 171 // Don't do anything for displays without layers.
176 if (display.layers.empty()) 172 if (!display.HasLayers())
177 continue; 173 continue;
178 174
179 // TODO(Subv): Support more than 1 layer. 175 // TODO(Subv): Support more than 1 layer.
180 ASSERT_MSG(display.layers.size() == 1, "Max 1 layer per display is supported"); 176 VI::Layer& layer = display.GetLayer(0);
181 177 auto& buffer_queue = layer.GetBufferQueue();
182 VI::Layer& layer = display.layers[0];
183 auto& buffer_queue = layer.buffer_queue;
184 178
185 // Search for a queued buffer and acquire it 179 // Search for a queued buffer and acquire it
186 auto buffer = buffer_queue->AcquireBuffer(); 180 auto buffer = buffer_queue.AcquireBuffer();
187 181
188 MicroProfileFlip(); 182 MicroProfileFlip();
189 183
@@ -208,7 +202,7 @@ void NVFlinger::Compose() {
208 igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride, 202 igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride,
209 buffer->get().transform, buffer->get().crop_rect); 203 buffer->get().transform, buffer->get().crop_rect);
210 204
211 buffer_queue->ReleaseBuffer(buffer->get().slot); 205 buffer_queue.ReleaseBuffer(buffer->get().slot);
212 } 206 }
213} 207}
214 208
diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h
index 2e000af91..c0a83fffb 100644
--- a/src/core/hle/service/nvflinger/nvflinger.h
+++ b/src/core/hle/service/nvflinger/nvflinger.h
@@ -28,8 +28,8 @@ class Module;
28} // namespace Service::Nvidia 28} // namespace Service::Nvidia
29 29
30namespace Service::VI { 30namespace Service::VI {
31struct Display; 31class Display;
32struct Layer; 32class Layer;
33} // namespace Service::VI 33} // namespace Service::VI
34 34
35namespace Service::NVFlinger { 35namespace Service::NVFlinger {
@@ -65,7 +65,10 @@ public:
65 Kernel::SharedPtr<Kernel::ReadableEvent> FindVsyncEvent(u64 display_id) const; 65 Kernel::SharedPtr<Kernel::ReadableEvent> FindVsyncEvent(u64 display_id) const;
66 66
67 /// Obtains a buffer queue identified by the ID. 67 /// Obtains a buffer queue identified by the ID.
68 std::shared_ptr<BufferQueue> FindBufferQueue(u32 id) const; 68 BufferQueue& FindBufferQueue(u32 id);
69
70 /// Obtains a buffer queue identified by the ID.
71 const BufferQueue& FindBufferQueue(u32 id) const;
69 72
70 /// Performs a composition request to the emulated nvidia GPU and triggers the vsync events when 73 /// Performs a composition request to the emulated nvidia GPU and triggers the vsync events when
71 /// finished. 74 /// finished.
@@ -87,7 +90,7 @@ private:
87 std::shared_ptr<Nvidia::Module> nvdrv; 90 std::shared_ptr<Nvidia::Module> nvdrv;
88 91
89 std::vector<VI::Display> displays; 92 std::vector<VI::Display> displays;
90 std::vector<std::shared_ptr<BufferQueue>> buffer_queues; 93 std::vector<BufferQueue> buffer_queues;
91 94
92 /// Id to use for the next layer that is created, this counter is shared among all displays. 95 /// Id to use for the next layer that is created, this counter is shared among all displays.
93 u64 next_layer_id = 1; 96 u64 next_layer_id = 1;
diff --git a/src/core/hle/service/vi/display/vi_display.cpp b/src/core/hle/service/vi/display/vi_display.cpp
index a108e468f..01d80311b 100644
--- a/src/core/hle/service/vi/display/vi_display.cpp
+++ b/src/core/hle/service/vi/display/vi_display.cpp
@@ -2,8 +2,12 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
6#include <utility>
7
5#include <fmt/format.h> 8#include <fmt/format.h>
6 9
10#include "common/assert.h"
7#include "core/core.h" 11#include "core/core.h"
8#include "core/hle/kernel/readable_event.h" 12#include "core/hle/kernel/readable_event.h"
9#include "core/hle/service/vi/display/vi_display.h" 13#include "core/hle/service/vi/display/vi_display.h"
@@ -19,4 +23,49 @@ Display::Display(u64 id, std::string name) : id{id}, name{std::move(name)} {
19 23
20Display::~Display() = default; 24Display::~Display() = default;
21 25
26Layer& Display::GetLayer(std::size_t index) {
27 return layers.at(index);
28}
29
30const Layer& Display::GetLayer(std::size_t index) const {
31 return layers.at(index);
32}
33
34Kernel::SharedPtr<Kernel::ReadableEvent> Display::GetVSyncEvent() const {
35 return vsync_event.readable;
36}
37
38void Display::SignalVSyncEvent() {
39 vsync_event.writable->Signal();
40}
41
42void Display::CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue) {
43 // TODO(Subv): Support more than 1 layer.
44 ASSERT_MSG(layers.empty(), "Only one layer is supported per display at the moment");
45
46 layers.emplace_back(id, buffer_queue);
47}
48
49Layer* Display::FindLayer(u64 id) {
50 const auto itr = std::find_if(layers.begin(), layers.end(),
51 [id](const VI::Layer& layer) { return layer.GetID() == id; });
52
53 if (itr == layers.end()) {
54 return nullptr;
55 }
56
57 return &*itr;
58}
59
60const Layer* Display::FindLayer(u64 id) const {
61 const auto itr = std::find_if(layers.begin(), layers.end(),
62 [id](const VI::Layer& layer) { return layer.GetID() == id; });
63
64 if (itr == layers.end()) {
65 return nullptr;
66 }
67
68 return &*itr;
69}
70
22} // namespace Service::VI 71} // namespace Service::VI
diff --git a/src/core/hle/service/vi/display/vi_display.h b/src/core/hle/service/vi/display/vi_display.h
index df44db306..2acd46ff8 100644
--- a/src/core/hle/service/vi/display/vi_display.h
+++ b/src/core/hle/service/vi/display/vi_display.h
@@ -10,14 +10,84 @@
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "core/hle/kernel/writable_event.h" 11#include "core/hle/kernel/writable_event.h"
12 12
13namespace Service::NVFlinger {
14class BufferQueue;
15}
16
13namespace Service::VI { 17namespace Service::VI {
14 18
15struct Layer; 19class Layer;
16 20
17struct Display { 21/// Represents a single display type
22class Display {
23public:
24 /// Constructs a display with a given unique ID and name.
25 ///
26 /// @param id The unique ID for this display.
27 /// @param name The name for this display.
28 ///
18 Display(u64 id, std::string name); 29 Display(u64 id, std::string name);
19 ~Display(); 30 ~Display();
20 31
32 Display(const Display&) = delete;
33 Display& operator=(const Display&) = delete;
34
35 Display(Display&&) = default;
36 Display& operator=(Display&&) = default;
37
38 /// Gets the unique ID assigned to this display.
39 u64 GetID() const {
40 return id;
41 }
42
43 /// Gets the name of this display
44 const std::string& GetName() const {
45 return name;
46 }
47
48 /// Whether or not this display has any layers added to it.
49 bool HasLayers() const {
50 return !layers.empty();
51 }
52
53 /// Gets a layer for this display based off an index.
54 Layer& GetLayer(std::size_t index);
55
56 /// Gets a layer for this display based off an index.
57 const Layer& GetLayer(std::size_t index) const;
58
59 /// Gets the readable vsync event.
60 Kernel::SharedPtr<Kernel::ReadableEvent> GetVSyncEvent() const;
61
62 /// Signals the internal vsync event.
63 void SignalVSyncEvent();
64
65 /// Creates and adds a layer to this display with the given ID.
66 ///
67 /// @param id The ID to assign to the created layer.
68 /// @param buffer_queue The buffer queue for the layer instance to use.
69 ///
70 void CreateLayer(u64 id, NVFlinger::BufferQueue& buffer_queue);
71
72 /// Attempts to find a layer with the given ID.
73 ///
74 /// @param id The layer ID.
75 ///
76 /// @returns If found, the Layer instance with the given ID.
77 /// If not found, then nullptr is returned.
78 ///
79 Layer* FindLayer(u64 id);
80
81 /// Attempts to find a layer with the given ID.
82 ///
83 /// @param id The layer ID.
84 ///
85 /// @returns If found, the Layer instance with the given ID.
86 /// If not found, then nullptr is returned.
87 ///
88 const Layer* FindLayer(u64 id) const;
89
90private:
21 u64 id; 91 u64 id;
22 std::string name; 92 std::string name;
23 93
diff --git a/src/core/hle/service/vi/layer/vi_layer.cpp b/src/core/hle/service/vi/layer/vi_layer.cpp
index 3a83e5b95..954225c26 100644
--- a/src/core/hle/service/vi/layer/vi_layer.cpp
+++ b/src/core/hle/service/vi/layer/vi_layer.cpp
@@ -6,8 +6,7 @@
6 6
7namespace Service::VI { 7namespace Service::VI {
8 8
9Layer::Layer(u64 id, std::shared_ptr<NVFlinger::BufferQueue> queue) 9Layer::Layer(u64 id, NVFlinger::BufferQueue& queue) : id{id}, buffer_queue{queue} {}
10 : id{id}, buffer_queue{std::move(queue)} {}
11 10
12Layer::~Layer() = default; 11Layer::~Layer() = default;
13 12
diff --git a/src/core/hle/service/vi/layer/vi_layer.h b/src/core/hle/service/vi/layer/vi_layer.h
index df328e09f..c6bfd01f6 100644
--- a/src/core/hle/service/vi/layer/vi_layer.h
+++ b/src/core/hle/service/vi/layer/vi_layer.h
@@ -4,8 +4,6 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <memory>
8
9#include "common/common_types.h" 7#include "common/common_types.h"
10 8
11namespace Service::NVFlinger { 9namespace Service::NVFlinger {
@@ -14,12 +12,41 @@ class BufferQueue;
14 12
15namespace Service::VI { 13namespace Service::VI {
16 14
17struct Layer { 15/// Represents a single display layer.
18 Layer(u64 id, std::shared_ptr<NVFlinger::BufferQueue> queue); 16class Layer {
17public:
18 /// Constructs a layer with a given ID and buffer queue.
19 ///
20 /// @param id The ID to assign to this layer.
21 /// @param queue The buffer queue for this layer to use.
22 ///
23 Layer(u64 id, NVFlinger::BufferQueue& queue);
19 ~Layer(); 24 ~Layer();
20 25
26 Layer(const Layer&) = delete;
27 Layer& operator=(const Layer&) = delete;
28
29 Layer(Layer&&) = default;
30 Layer& operator=(Layer&&) = delete;
31
32 /// Gets the ID for this layer.
33 u64 GetID() const {
34 return id;
35 }
36
37 /// Gets a reference to the buffer queue this layer is using.
38 NVFlinger::BufferQueue& GetBufferQueue() {
39 return buffer_queue;
40 }
41
42 /// Gets a const reference to the buffer queue this layer is using.
43 const NVFlinger::BufferQueue& GetBufferQueue() const {
44 return buffer_queue;
45 }
46
47private:
21 u64 id; 48 u64 id;
22 std::shared_ptr<NVFlinger::BufferQueue> buffer_queue; 49 NVFlinger::BufferQueue& buffer_queue;
23}; 50};
24 51
25} // namespace Service::VI 52} // namespace Service::VI
diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp
index a317a2885..a975767bb 100644
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -420,7 +420,7 @@ public:
420 u32_le fence_is_valid; 420 u32_le fence_is_valid;
421 std::array<Fence, 2> fences; 421 std::array<Fence, 2> fences;
422 422
423 MathUtil::Rectangle<int> GetCropRect() const { 423 Common::Rectangle<int> GetCropRect() const {
424 return {crop_left, crop_top, crop_right, crop_bottom}; 424 return {crop_left, crop_top, crop_right, crop_bottom};
425 } 425 }
426 }; 426 };
@@ -525,7 +525,7 @@ private:
525 LOG_DEBUG(Service_VI, "called. id=0x{:08X} transaction={:X}, flags=0x{:08X}", id, 525 LOG_DEBUG(Service_VI, "called. id=0x{:08X} transaction={:X}, flags=0x{:08X}", id,
526 static_cast<u32>(transaction), flags); 526 static_cast<u32>(transaction), flags);
527 527
528 auto buffer_queue = nv_flinger->FindBufferQueue(id); 528 auto& buffer_queue = nv_flinger->FindBufferQueue(id);
529 529
530 if (transaction == TransactionId::Connect) { 530 if (transaction == TransactionId::Connect) {
531 IGBPConnectRequestParcel request{ctx.ReadBuffer()}; 531 IGBPConnectRequestParcel request{ctx.ReadBuffer()};
@@ -538,7 +538,7 @@ private:
538 } else if (transaction == TransactionId::SetPreallocatedBuffer) { 538 } else if (transaction == TransactionId::SetPreallocatedBuffer) {
539 IGBPSetPreallocatedBufferRequestParcel request{ctx.ReadBuffer()}; 539 IGBPSetPreallocatedBufferRequestParcel request{ctx.ReadBuffer()};
540 540
541 buffer_queue->SetPreallocatedBuffer(request.data.slot, request.buffer); 541 buffer_queue.SetPreallocatedBuffer(request.data.slot, request.buffer);
542 542
543 IGBPSetPreallocatedBufferResponseParcel response{}; 543 IGBPSetPreallocatedBufferResponseParcel response{};
544 ctx.WriteBuffer(response.Serialize()); 544 ctx.WriteBuffer(response.Serialize());
@@ -546,7 +546,7 @@ private:
546 IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()}; 546 IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()};
547 const u32 width{request.data.width}; 547 const u32 width{request.data.width};
548 const u32 height{request.data.height}; 548 const u32 height{request.data.height};
549 std::optional<u32> slot = buffer_queue->DequeueBuffer(width, height); 549 std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height);
550 550
551 if (slot) { 551 if (slot) {
552 // Buffer is available 552 // Buffer is available
@@ -559,8 +559,8 @@ private:
559 [=](Kernel::SharedPtr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx, 559 [=](Kernel::SharedPtr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx,
560 Kernel::ThreadWakeupReason reason) { 560 Kernel::ThreadWakeupReason reason) {
561 // Repeat TransactParcel DequeueBuffer when a buffer is available 561 // Repeat TransactParcel DequeueBuffer when a buffer is available
562 auto buffer_queue = nv_flinger->FindBufferQueue(id); 562 auto& buffer_queue = nv_flinger->FindBufferQueue(id);
563 std::optional<u32> slot = buffer_queue->DequeueBuffer(width, height); 563 std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height);
564 ASSERT_MSG(slot != std::nullopt, "Could not dequeue buffer."); 564 ASSERT_MSG(slot != std::nullopt, "Could not dequeue buffer.");
565 565
566 IGBPDequeueBufferResponseParcel response{*slot}; 566 IGBPDequeueBufferResponseParcel response{*slot};
@@ -568,28 +568,28 @@ private:
568 IPC::ResponseBuilder rb{ctx, 2}; 568 IPC::ResponseBuilder rb{ctx, 2};
569 rb.Push(RESULT_SUCCESS); 569 rb.Push(RESULT_SUCCESS);
570 }, 570 },
571 buffer_queue->GetWritableBufferWaitEvent()); 571 buffer_queue.GetWritableBufferWaitEvent());
572 } 572 }
573 } else if (transaction == TransactionId::RequestBuffer) { 573 } else if (transaction == TransactionId::RequestBuffer) {
574 IGBPRequestBufferRequestParcel request{ctx.ReadBuffer()}; 574 IGBPRequestBufferRequestParcel request{ctx.ReadBuffer()};
575 575
576 auto& buffer = buffer_queue->RequestBuffer(request.slot); 576 auto& buffer = buffer_queue.RequestBuffer(request.slot);
577 577
578 IGBPRequestBufferResponseParcel response{buffer}; 578 IGBPRequestBufferResponseParcel response{buffer};
579 ctx.WriteBuffer(response.Serialize()); 579 ctx.WriteBuffer(response.Serialize());
580 } else if (transaction == TransactionId::QueueBuffer) { 580 } else if (transaction == TransactionId::QueueBuffer) {
581 IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()}; 581 IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()};
582 582
583 buffer_queue->QueueBuffer(request.data.slot, request.data.transform, 583 buffer_queue.QueueBuffer(request.data.slot, request.data.transform,
584 request.data.GetCropRect()); 584 request.data.GetCropRect());
585 585
586 IGBPQueueBufferResponseParcel response{1280, 720}; 586 IGBPQueueBufferResponseParcel response{1280, 720};
587 ctx.WriteBuffer(response.Serialize()); 587 ctx.WriteBuffer(response.Serialize());
588 } else if (transaction == TransactionId::Query) { 588 } else if (transaction == TransactionId::Query) {
589 IGBPQueryRequestParcel request{ctx.ReadBuffer()}; 589 IGBPQueryRequestParcel request{ctx.ReadBuffer()};
590 590
591 u32 value = 591 const u32 value =
592 buffer_queue->Query(static_cast<NVFlinger::BufferQueue::QueryType>(request.type)); 592 buffer_queue.Query(static_cast<NVFlinger::BufferQueue::QueryType>(request.type));
593 593
594 IGBPQueryResponseParcel response{value}; 594 IGBPQueryResponseParcel response{value};
595 ctx.WriteBuffer(response.Serialize()); 595 ctx.WriteBuffer(response.Serialize());
@@ -629,12 +629,12 @@ private:
629 629
630 LOG_WARNING(Service_VI, "(STUBBED) called id={}, unknown={:08X}", id, unknown); 630 LOG_WARNING(Service_VI, "(STUBBED) called id={}, unknown={:08X}", id, unknown);
631 631
632 const auto buffer_queue = nv_flinger->FindBufferQueue(id); 632 const auto& buffer_queue = nv_flinger->FindBufferQueue(id);
633 633
634 // TODO(Subv): Find out what this actually is. 634 // TODO(Subv): Find out what this actually is.
635 IPC::ResponseBuilder rb{ctx, 2, 1}; 635 IPC::ResponseBuilder rb{ctx, 2, 1};
636 rb.Push(RESULT_SUCCESS); 636 rb.Push(RESULT_SUCCESS);
637 rb.PushCopyObjects(buffer_queue->GetBufferWaitEvent()); 637 rb.PushCopyObjects(buffer_queue.GetBufferWaitEvent());
638 } 638 }
639 639
640 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger; 640 std::shared_ptr<NVFlinger::NVFlinger> nv_flinger;
@@ -752,6 +752,7 @@ public:
752 {1102, nullptr, "GetDisplayResolution"}, 752 {1102, nullptr, "GetDisplayResolution"},
753 {2010, &IManagerDisplayService::CreateManagedLayer, "CreateManagedLayer"}, 753 {2010, &IManagerDisplayService::CreateManagedLayer, "CreateManagedLayer"},
754 {2011, nullptr, "DestroyManagedLayer"}, 754 {2011, nullptr, "DestroyManagedLayer"},
755 {2012, nullptr, "CreateStrayLayer"},
755 {2050, nullptr, "CreateIndirectLayer"}, 756 {2050, nullptr, "CreateIndirectLayer"},
756 {2051, nullptr, "DestroyIndirectLayer"}, 757 {2051, nullptr, "DestroyIndirectLayer"},
757 {2052, nullptr, "CreateIndirectProducerEndPoint"}, 758 {2052, nullptr, "CreateIndirectProducerEndPoint"},
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index e9166dbd9..f809567b6 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -71,15 +71,20 @@ static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, Pa
71 FlushMode::FlushAndInvalidate); 71 FlushMode::FlushAndInvalidate);
72 72
73 VAddr end = base + size; 73 VAddr end = base + size;
74 while (base != end) { 74 ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
75 ASSERT_MSG(base < page_table.pointers.size(), "out of range mapping at {:016X}", base); 75 base + page_table.pointers.size());
76 76
77 page_table.attributes[base] = type; 77 std::fill(page_table.attributes.begin() + base, page_table.attributes.begin() + end, type);
78 page_table.pointers[base] = memory;
79 78
80 base += 1; 79 if (memory == nullptr) {
81 if (memory != nullptr) 80 std::fill(page_table.pointers.begin() + base, page_table.pointers.begin() + end, memory);
81 } else {
82 while (base != end) {
83 page_table.pointers[base] = memory;
84
85 base += 1;
82 memory += PAGE_SIZE; 86 memory += PAGE_SIZE;
87 }
83 } 88 }
84} 89}
85 90
diff --git a/src/input_common/motion_emu.cpp b/src/input_common/motion_emu.cpp
index 9570c060e..6d96d4019 100644
--- a/src/input_common/motion_emu.cpp
+++ b/src/input_common/motion_emu.cpp
@@ -32,12 +32,12 @@ public:
32 } 32 }
33 33
34 void BeginTilt(int x, int y) { 34 void BeginTilt(int x, int y) {
35 mouse_origin = Math::MakeVec(x, y); 35 mouse_origin = Common::MakeVec(x, y);
36 is_tilting = true; 36 is_tilting = true;
37 } 37 }
38 38
39 void Tilt(int x, int y) { 39 void Tilt(int x, int y) {
40 auto mouse_move = Math::MakeVec(x, y) - mouse_origin; 40 auto mouse_move = Common::MakeVec(x, y) - mouse_origin;
41 if (is_tilting) { 41 if (is_tilting) {
42 std::lock_guard<std::mutex> guard(tilt_mutex); 42 std::lock_guard<std::mutex> guard(tilt_mutex);
43 if (mouse_move.x == 0 && mouse_move.y == 0) { 43 if (mouse_move.x == 0 && mouse_move.y == 0) {
@@ -45,7 +45,7 @@ public:
45 } else { 45 } else {
46 tilt_direction = mouse_move.Cast<float>(); 46 tilt_direction = mouse_move.Cast<float>();
47 tilt_angle = 47 tilt_angle =
48 std::clamp(tilt_direction.Normalize() * sensitivity, 0.0f, MathUtil::PI * 0.5f); 48 std::clamp(tilt_direction.Normalize() * sensitivity, 0.0f, Common::PI * 0.5f);
49 } 49 }
50 } 50 }
51 } 51 }
@@ -56,7 +56,7 @@ public:
56 is_tilting = false; 56 is_tilting = false;
57 } 57 }
58 58
59 std::tuple<Math::Vec3<float>, Math::Vec3<float>> GetStatus() { 59 std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() {
60 std::lock_guard<std::mutex> guard(status_mutex); 60 std::lock_guard<std::mutex> guard(status_mutex);
61 return status; 61 return status;
62 } 62 }
@@ -66,17 +66,17 @@ private:
66 const std::chrono::steady_clock::duration update_duration; 66 const std::chrono::steady_clock::duration update_duration;
67 const float sensitivity; 67 const float sensitivity;
68 68
69 Math::Vec2<int> mouse_origin; 69 Common::Vec2<int> mouse_origin;
70 70
71 std::mutex tilt_mutex; 71 std::mutex tilt_mutex;
72 Math::Vec2<float> tilt_direction; 72 Common::Vec2<float> tilt_direction;
73 float tilt_angle = 0; 73 float tilt_angle = 0;
74 74
75 bool is_tilting = false; 75 bool is_tilting = false;
76 76
77 Common::Event shutdown_event; 77 Common::Event shutdown_event;
78 78
79 std::tuple<Math::Vec3<float>, Math::Vec3<float>> status; 79 std::tuple<Common::Vec3<float>, Common::Vec3<float>> status;
80 std::mutex status_mutex; 80 std::mutex status_mutex;
81 81
82 // Note: always keep the thread declaration at the end so that other objects are initialized 82 // Note: always keep the thread declaration at the end so that other objects are initialized
@@ -85,8 +85,8 @@ private:
85 85
86 void MotionEmuThread() { 86 void MotionEmuThread() {
87 auto update_time = std::chrono::steady_clock::now(); 87 auto update_time = std::chrono::steady_clock::now();
88 Math::Quaternion<float> q = MakeQuaternion(Math::Vec3<float>(), 0); 88 Common::Quaternion<float> q = Common::MakeQuaternion(Common::Vec3<float>(), 0);
89 Math::Quaternion<float> old_q; 89 Common::Quaternion<float> old_q;
90 90
91 while (!shutdown_event.WaitUntil(update_time)) { 91 while (!shutdown_event.WaitUntil(update_time)) {
92 update_time += update_duration; 92 update_time += update_duration;
@@ -96,18 +96,18 @@ private:
96 std::lock_guard<std::mutex> guard(tilt_mutex); 96 std::lock_guard<std::mutex> guard(tilt_mutex);
97 97
98 // Find the quaternion describing current 3DS tilting 98 // Find the quaternion describing current 3DS tilting
99 q = MakeQuaternion(Math::MakeVec(-tilt_direction.y, 0.0f, tilt_direction.x), 99 q = Common::MakeQuaternion(
100 tilt_angle); 100 Common::MakeVec(-tilt_direction.y, 0.0f, tilt_direction.x), tilt_angle);
101 } 101 }
102 102
103 auto inv_q = q.Inverse(); 103 auto inv_q = q.Inverse();
104 104
105 // Set the gravity vector in world space 105 // Set the gravity vector in world space
106 auto gravity = Math::MakeVec(0.0f, -1.0f, 0.0f); 106 auto gravity = Common::MakeVec(0.0f, -1.0f, 0.0f);
107 107
108 // Find the angular rate vector in world space 108 // Find the angular rate vector in world space
109 auto angular_rate = ((q - old_q) * inv_q).xyz * 2; 109 auto angular_rate = ((q - old_q) * inv_q).xyz * 2;
110 angular_rate *= 1000 / update_millisecond / MathUtil::PI * 180; 110 angular_rate *= 1000 / update_millisecond / Common::PI * 180;
111 111
112 // Transform the two vectors from world space to 3DS space 112 // Transform the two vectors from world space to 3DS space
113 gravity = QuaternionRotate(inv_q, gravity); 113 gravity = QuaternionRotate(inv_q, gravity);
@@ -131,7 +131,7 @@ public:
131 device = std::make_shared<MotionEmuDevice>(update_millisecond, sensitivity); 131 device = std::make_shared<MotionEmuDevice>(update_millisecond, sensitivity);
132 } 132 }
133 133
134 std::tuple<Math::Vec3<float>, Math::Vec3<float>> GetStatus() const override { 134 std::tuple<Common::Vec3<float>, Common::Vec3<float>> GetStatus() const override {
135 return device->GetStatus(); 135 return device->GetStatus();
136 } 136 }
137 137
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index eb9bf1878..669541b4b 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -33,18 +33,36 @@ void DmaPusher::DispatchCalls() {
33} 33}
34 34
35bool DmaPusher::Step() { 35bool DmaPusher::Step() {
36 if (dma_get != dma_put) { 36 if (!ib_enable || dma_pushbuffer.empty()) {
37 // Push buffer non-empty, read a word 37 // pushbuffer empty and IB empty or nonexistent - nothing to do
38 const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get); 38 return false;
39 ASSERT_MSG(address, "Invalid GPU address"); 39 }
40 40
41 const CommandHeader command_header{Memory::Read32(*address)}; 41 const CommandList& command_list{dma_pushbuffer.front()};
42 const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]};
43 GPUVAddr dma_get = command_list_header.addr;
44 GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32);
45 bool non_main = command_list_header.is_non_main;
42 46
43 dma_get += sizeof(u32); 47 if (dma_pushbuffer_subindex >= command_list.size()) {
48 // We've gone through the current list, remove it from the queue
49 dma_pushbuffer.pop();
50 dma_pushbuffer_subindex = 0;
51 }
44 52
45 if (!non_main) { 53 if (command_list_header.size == 0) {
46 dma_mget = dma_get; 54 return true;
47 } 55 }
56
57 // Push buffer non-empty, read a word
58 const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get);
59 ASSERT_MSG(address, "Invalid GPU address");
60
61 command_headers.resize(command_list_header.size);
62
63 Memory::ReadBlock(*address, command_headers.data(), command_list_header.size * sizeof(u32));
64
65 for (const CommandHeader& command_header : command_headers) {
48 66
49 // now, see if we're in the middle of a command 67 // now, see if we're in the middle of a command
50 if (dma_state.length_pending) { 68 if (dma_state.length_pending) {
@@ -91,22 +109,11 @@ bool DmaPusher::Step() {
91 break; 109 break;
92 } 110 }
93 } 111 }
94 } else if (ib_enable && !dma_pushbuffer.empty()) { 112 }
95 // Current pushbuffer empty, but we have more IB entries to read 113
96 const CommandList& command_list{dma_pushbuffer.front()}; 114 if (!non_main) {
97 const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]}; 115 // TODO (degasus): This is dead code, as dma_mget is never read.
98 dma_get = command_list_header.addr; 116 dma_mget = dma_put;
99 dma_put = dma_get + command_list_header.size * sizeof(u32);
100 non_main = command_list_header.is_non_main;
101
102 if (dma_pushbuffer_subindex >= command_list.size()) {
103 // We've gone through the current list, remove it from the queue
104 dma_pushbuffer.pop();
105 dma_pushbuffer_subindex = 0;
106 }
107 } else {
108 // Otherwise, pushbuffer empty and IB empty or nonexistent - nothing to do
109 return {};
110 } 117 }
111 118
112 return true; 119 return true;
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index 1097e5c49..27a36348c 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -75,6 +75,8 @@ private:
75 75
76 GPU& gpu; 76 GPU& gpu;
77 77
78 std::vector<CommandHeader> command_headers; ///< Buffer for list of commands fetched at once
79
78 std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed 80 std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed
79 std::size_t dma_pushbuffer_subindex{}; ///< Index within a command list within the pushbuffer 81 std::size_t dma_pushbuffer_subindex{}; ///< Index within a command list within the pushbuffer
80 82
@@ -89,11 +91,8 @@ private:
89 DmaState dma_state{}; 91 DmaState dma_state{};
90 bool dma_increment_once{}; 92 bool dma_increment_once{};
91 93
92 GPUVAddr dma_put{}; ///< pushbuffer current end address
93 GPUVAddr dma_get{}; ///< pushbuffer current read address
94 GPUVAddr dma_mget{}; ///< main pushbuffer last read address 94 GPUVAddr dma_mget{}; ///< main pushbuffer last read address
95 bool ib_enable{true}; ///< IB mode enabled 95 bool ib_enable{true}; ///< IB mode enabled
96 bool non_main{}; ///< non-main pushbuffer active
97}; 96};
98 97
99} // namespace Tegra 98} // namespace Tegra
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index ec1a57226..540dcc52c 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -44,10 +44,10 @@ void Fermi2D::HandleSurfaceCopy() {
44 const u32 src_blit_y2{ 44 const u32 src_blit_y2{
45 static_cast<u32>((regs.blit_src_y + (regs.blit_dst_height * regs.blit_dv_dy)) >> 32)}; 45 static_cast<u32>((regs.blit_src_y + (regs.blit_dst_height * regs.blit_dv_dy)) >> 32)};
46 46
47 const MathUtil::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2}; 47 const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
48 const MathUtil::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, 48 const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y,
49 regs.blit_dst_x + regs.blit_dst_width, 49 regs.blit_dst_x + regs.blit_dst_width,
50 regs.blit_dst_y + regs.blit_dst_height}; 50 regs.blit_dst_y + regs.blit_dst_height};
51 51
52 if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, src_rect, dst_rect)) { 52 if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, src_rect, dst_rect)) {
53 UNIMPLEMENTED(); 53 UNIMPLEMENTED();
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 2d2136067..144e7fa82 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -107,21 +107,23 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
107void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { 107void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
108 auto debug_context = system.GetGPUDebugContext(); 108 auto debug_context = system.GetGPUDebugContext();
109 109
110 const u32 method = method_call.method;
111
110 // It is an error to write to a register other than the current macro's ARG register before it 112 // It is an error to write to a register other than the current macro's ARG register before it
111 // has finished execution. 113 // has finished execution.
112 if (executing_macro != 0) { 114 if (executing_macro != 0) {
113 ASSERT(method_call.method == executing_macro + 1); 115 ASSERT(method == executing_macro + 1);
114 } 116 }
115 117
116 // Methods after 0xE00 are special, they're actually triggers for some microcode that was 118 // Methods after 0xE00 are special, they're actually triggers for some microcode that was
117 // uploaded to the GPU during initialization. 119 // uploaded to the GPU during initialization.
118 if (method_call.method >= MacroRegistersStart) { 120 if (method >= MacroRegistersStart) {
119 // We're trying to execute a macro 121 // We're trying to execute a macro
120 if (executing_macro == 0) { 122 if (executing_macro == 0) {
121 // A macro call must begin by writing the macro method's register, not its argument. 123 // A macro call must begin by writing the macro method's register, not its argument.
122 ASSERT_MSG((method_call.method % 2) == 0, 124 ASSERT_MSG((method % 2) == 0,
123 "Can't start macro execution by writing to the ARGS register"); 125 "Can't start macro execution by writing to the ARGS register");
124 executing_macro = method_call.method; 126 executing_macro = method;
125 } 127 }
126 128
127 macro_params.push_back(method_call.argument); 129 macro_params.push_back(method_call.argument);
@@ -133,66 +135,62 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
133 return; 135 return;
134 } 136 }
135 137
136 ASSERT_MSG(method_call.method < Regs::NUM_REGS, 138 ASSERT_MSG(method < Regs::NUM_REGS,
137 "Invalid Maxwell3D register, increase the size of the Regs structure"); 139 "Invalid Maxwell3D register, increase the size of the Regs structure");
138 140
139 if (debug_context) { 141 if (debug_context) {
140 debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr); 142 debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr);
141 } 143 }
142 144
143 if (regs.reg_array[method_call.method] != method_call.argument) { 145 if (regs.reg_array[method] != method_call.argument) {
144 regs.reg_array[method_call.method] = method_call.argument; 146 regs.reg_array[method] = method_call.argument;
145 // Color buffers 147 // Color buffers
146 constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt); 148 constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt);
147 constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); 149 constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
148 if (method_call.method >= first_rt_reg && 150 if (method >= first_rt_reg &&
149 method_call.method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) { 151 method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) {
150 const std::size_t rt_index = (method_call.method - first_rt_reg) / registers_per_rt; 152 const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt;
151 dirty_flags.color_buffer |= 1u << static_cast<u32>(rt_index); 153 dirty_flags.color_buffer.set(rt_index);
152 } 154 }
153 155
154 // Zeta buffer 156 // Zeta buffer
155 constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32); 157 constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32);
156 if (method_call.method == MAXWELL3D_REG_INDEX(zeta_enable) || 158 if (method == MAXWELL3D_REG_INDEX(zeta_enable) ||
157 method_call.method == MAXWELL3D_REG_INDEX(zeta_width) || 159 method == MAXWELL3D_REG_INDEX(zeta_width) ||
158 method_call.method == MAXWELL3D_REG_INDEX(zeta_height) || 160 method == MAXWELL3D_REG_INDEX(zeta_height) ||
159 (method_call.method >= MAXWELL3D_REG_INDEX(zeta) && 161 (method >= MAXWELL3D_REG_INDEX(zeta) &&
160 method_call.method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) { 162 method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
161 dirty_flags.zeta_buffer = true; 163 dirty_flags.zeta_buffer = true;
162 } 164 }
163 165
164 // Shader 166 // Shader
165 constexpr u32 shader_registers_count = 167 constexpr u32 shader_registers_count =
166 sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32); 168 sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
167 if (method_call.method >= MAXWELL3D_REG_INDEX(shader_config[0]) && 169 if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) &&
168 method_call.method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) { 170 method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) {
169 dirty_flags.shaders = true; 171 dirty_flags.shaders = true;
170 } 172 }
171 173
172 // Vertex format 174 // Vertex format
173 if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && 175 if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
174 method_call.method < 176 method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
175 MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
176 dirty_flags.vertex_attrib_format = true; 177 dirty_flags.vertex_attrib_format = true;
177 } 178 }
178 179
179 // Vertex buffer 180 // Vertex buffer
180 if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array) && 181 if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
181 method_call.method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) { 182 method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
182 dirty_flags.vertex_array |= 183 dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
183 1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2); 184 } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
184 } else if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array_limit) && 185 method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
185 method_call.method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) { 186 dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
186 dirty_flags.vertex_array |= 187 } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
187 1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1); 188 method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
188 } else if (method_call.method >= MAXWELL3D_REG_INDEX(instanced_arrays) && 189 dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays));
189 method_call.method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
190 dirty_flags.vertex_array |=
191 1u << (method_call.method - MAXWELL3D_REG_INDEX(instanced_arrays));
192 } 190 }
193 } 191 }
194 192
195 switch (method_call.method) { 193 switch (method) {
196 case MAXWELL3D_REG_INDEX(macros.data): { 194 case MAXWELL3D_REG_INDEX(macros.data): {
197 ProcessMacroUpload(method_call.argument); 195 ProcessMacroUpload(method_call.argument);
198 break; 196 break;
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 0e3873ffd..7fbf1026e 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -5,8 +5,10 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <bitset>
8#include <unordered_map> 9#include <unordered_map>
9#include <vector> 10#include <vector>
11
10#include "common/assert.h" 12#include "common/assert.h"
11#include "common/bit_field.h" 13#include "common/bit_field.h"
12#include "common/common_funcs.h" 14#include "common/common_funcs.h"
@@ -503,7 +505,7 @@ public:
503 f32 translate_z; 505 f32 translate_z;
504 INSERT_PADDING_WORDS(2); 506 INSERT_PADDING_WORDS(2);
505 507
506 MathUtil::Rectangle<s32> GetRect() const { 508 Common::Rectangle<s32> GetRect() const {
507 return { 509 return {
508 GetX(), // left 510 GetX(), // left
509 GetY() + GetHeight(), // top 511 GetY() + GetHeight(), // top
@@ -1094,19 +1096,18 @@ public:
1094 MemoryManager& memory_manager; 1096 MemoryManager& memory_manager;
1095 1097
1096 struct DirtyFlags { 1098 struct DirtyFlags {
1097 u8 color_buffer = 0xFF; 1099 std::bitset<8> color_buffer{0xFF};
1098 bool zeta_buffer = true; 1100 std::bitset<32> vertex_array{0xFFFFFFFF};
1099
1100 bool shaders = true;
1101 1101
1102 bool vertex_attrib_format = true; 1102 bool vertex_attrib_format = true;
1103 u32 vertex_array = 0xFFFFFFFF; 1103 bool zeta_buffer = true;
1104 bool shaders = true;
1104 1105
1105 void OnMemoryWrite() { 1106 void OnMemoryWrite() {
1106 color_buffer = 0xFF;
1107 zeta_buffer = true; 1107 zeta_buffer = true;
1108 shaders = true; 1108 shaders = true;
1109 vertex_array = 0xFFFFFFFF; 1109 color_buffer.set();
1110 vertex_array.set();
1110 } 1111 }
1111 }; 1112 };
1112 1113
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 1f425f90b..252592edd 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -376,9 +376,9 @@ enum class R2pMode : u64 {
376}; 376};
377 377
378enum class IpaInterpMode : u64 { 378enum class IpaInterpMode : u64 {
379 Linear = 0, 379 Pass = 0,
380 Perspective = 1, 380 Multiply = 1,
381 Flat = 2, 381 Constant = 2,
382 Sc = 3, 382 Sc = 3,
383}; 383};
384 384
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
index cf2b76ff6..e86a7f04a 100644
--- a/src/video_core/engines/shader_header.h
+++ b/src/video_core/engines/shader_header.h
@@ -16,6 +16,13 @@ enum class OutputTopology : u32 {
16 TriangleStrip = 7, 16 TriangleStrip = 7,
17}; 17};
18 18
19enum class AttributeUse : u8 {
20 Unused = 0,
21 Constant = 1,
22 Perspective = 2,
23 ScreenLinear = 3,
24};
25
19// Documentation in: 26// Documentation in:
20// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture 27// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture
21struct Header { 28struct Header {
@@ -84,9 +91,15 @@ struct Header {
84 } vtg; 91 } vtg;
85 92
86 struct { 93 struct {
87 INSERT_PADDING_BYTES(3); // ImapSystemValuesA 94 INSERT_PADDING_BYTES(3); // ImapSystemValuesA
88 INSERT_PADDING_BYTES(1); // ImapSystemValuesB 95 INSERT_PADDING_BYTES(1); // ImapSystemValuesB
89 INSERT_PADDING_BYTES(32); // ImapGenericVector[32] 96 union {
97 BitField<0, 2, AttributeUse> x;
98 BitField<2, 2, AttributeUse> y;
99 BitField<4, 2, AttributeUse> w;
100 BitField<6, 2, AttributeUse> z;
101 u8 raw;
102 } imap_generic_vector[32];
90 INSERT_PADDING_BYTES(2); // ImapColor 103 INSERT_PADDING_BYTES(2); // ImapColor
91 INSERT_PADDING_BYTES(2); // ImapSystemValuesC 104 INSERT_PADDING_BYTES(2); // ImapSystemValuesC
92 INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10] 105 INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10]
@@ -103,6 +116,28 @@ struct Header {
103 const u32 bit = render_target * 4 + component; 116 const u32 bit = render_target * 4 + component;
104 return omap.target & (1 << bit); 117 return omap.target & (1 << bit);
105 } 118 }
119 AttributeUse GetAttributeIndexUse(u32 attribute, u32 index) const {
120 return static_cast<AttributeUse>(
121 (imap_generic_vector[attribute].raw >> (index * 2)) & 0x03);
122 }
123 AttributeUse GetAttributeUse(u32 attribute) const {
124 AttributeUse result = AttributeUse::Unused;
125 for (u32 i = 0; i < 4; i++) {
126 const auto index = GetAttributeIndexUse(attribute, i);
127 if (index == AttributeUse::Unused) {
128 continue;
129 }
130 if (result == AttributeUse::Unused || result == index) {
131 result = index;
132 continue;
133 }
134 LOG_CRITICAL(HW_GPU, "Generic Attribute Conflict in Interpolation Mode");
135 if (index == AttributeUse::Perspective) {
136 result = index;
137 }
138 }
139 return result;
140 }
106 } ps; 141 } ps;
107 }; 142 };
108 143
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 0f5bfdcbf..6313702f2 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -100,7 +100,7 @@ struct FramebufferConfig {
100 100
101 using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags; 101 using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags;
102 TransformFlags transform_flags; 102 TransformFlags transform_flags;
103 MathUtil::Rectangle<int> crop_rect; 103 Common::Rectangle<int> crop_rect;
104}; 104};
105 105
106namespace Engines { 106namespace Engines {
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h
index bcf0c15a4..a7bcf26fb 100644
--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -129,6 +129,15 @@ protected:
129 return ++modified_ticks; 129 return ++modified_ticks;
130 } 130 }
131 131
132 /// Flushes the specified object, updating appropriate cache state as needed
133 void FlushObject(const T& object) {
134 if (!object->IsDirty()) {
135 return;
136 }
137 object->Flush();
138 object->MarkAsModified(false, *this);
139 }
140
132private: 141private:
133 /// Returns a list of cached objects from the specified memory region, ordered by access time 142 /// Returns a list of cached objects from the specified memory region, ordered by access time
134 std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) { 143 std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
@@ -154,15 +163,6 @@ private:
154 return objects; 163 return objects;
155 } 164 }
156 165
157 /// Flushes the specified object, updating appropriate cache state as needed
158 void FlushObject(const T& object) {
159 if (!object->IsDirty()) {
160 return;
161 }
162 object->Flush();
163 object->MarkAsModified(false, *this);
164 }
165
166 using ObjectSet = std::set<T>; 166 using ObjectSet = std::set<T>;
167 using ObjectCache = std::unordered_map<VAddr, T>; 167 using ObjectCache = std::unordered_map<VAddr, T>;
168 using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>; 168 using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index b2a223705..6a1dc9cf6 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -47,8 +47,8 @@ public:
47 /// Attempt to use a faster method to perform a surface copy 47 /// Attempt to use a faster method to perform a surface copy
48 virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 48 virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
49 const Tegra::Engines::Fermi2D::Regs::Surface& dst, 49 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
50 const MathUtil::Rectangle<u32>& src_rect, 50 const Common::Rectangle<u32>& src_rect,
51 const MathUtil::Rectangle<u32>& dst_rect) { 51 const Common::Rectangle<u32>& dst_rect) {
52 return false; 52 return false;
53 } 53 }
54 54
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 12d876120..c8c1d6911 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -102,8 +102,8 @@ struct FramebufferCacheKey {
102 102
103RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system, 103RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system,
104 ScreenInfo& info) 104 ScreenInfo& info)
105 : res_cache{*this}, shader_cache{*this, system}, emu_window{window}, screen_info{info}, 105 : res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, emu_window{window},
106 buffer_cache(*this, STREAM_BUFFER_SIZE), global_cache{*this} { 106 screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) {
107 // Create sampler objects 107 // Create sampler objects
108 for (std::size_t i = 0; i < texture_samplers.size(); ++i) { 108 for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
109 texture_samplers[i].Create(); 109 texture_samplers[i].Create();
@@ -200,7 +200,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
200 } 200 }
201 201
202 // Rebinding the VAO invalidates the vertex buffer bindings. 202 // Rebinding the VAO invalidates the vertex buffer bindings.
203 gpu.dirty_flags.vertex_array = 0xFFFFFFFF; 203 gpu.dirty_flags.vertex_array.set();
204 204
205 state.draw.vertex_array = vao_entry.handle; 205 state.draw.vertex_array = vao_entry.handle;
206 return vao_entry.handle; 206 return vao_entry.handle;
@@ -210,14 +210,14 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
210 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 210 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
211 const auto& regs = gpu.regs; 211 const auto& regs = gpu.regs;
212 212
213 if (!gpu.dirty_flags.vertex_array) 213 if (gpu.dirty_flags.vertex_array.none())
214 return; 214 return;
215 215
216 MICROPROFILE_SCOPE(OpenGL_VB); 216 MICROPROFILE_SCOPE(OpenGL_VB);
217 217
218 // Upload all guest vertex arrays sequentially to our buffer 218 // Upload all guest vertex arrays sequentially to our buffer
219 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { 219 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
220 if (~gpu.dirty_flags.vertex_array & (1u << index)) 220 if (!gpu.dirty_flags.vertex_array[index])
221 continue; 221 continue;
222 222
223 const auto& vertex_array = regs.vertex_array[index]; 223 const auto& vertex_array = regs.vertex_array[index];
@@ -244,7 +244,7 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
244 } 244 }
245 } 245 }
246 246
247 gpu.dirty_flags.vertex_array = 0; 247 gpu.dirty_flags.vertex_array.reset();
248} 248}
249 249
250DrawParameters RasterizerOpenGL::SetupDraw() { 250DrawParameters RasterizerOpenGL::SetupDraw() {
@@ -488,13 +488,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
488 OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents, 488 OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents,
489 std::optional<std::size_t> single_color_target) { 489 std::optional<std::size_t> single_color_target) {
490 MICROPROFILE_SCOPE(OpenGL_Framebuffer); 490 MICROPROFILE_SCOPE(OpenGL_Framebuffer);
491 const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 491 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
492 const auto& regs = gpu.regs; 492 const auto& regs = gpu.regs;
493 493
494 const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents, 494 const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents,
495 single_color_target}; 495 single_color_target};
496 if (fb_config_state == current_framebuffer_config_state && gpu.dirty_flags.color_buffer == 0 && 496 if (fb_config_state == current_framebuffer_config_state &&
497 !gpu.dirty_flags.zeta_buffer) { 497 gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) {
498 // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or 498 // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or
499 // single color targets). This is done because the guest registers may not change but the 499 // single color targets). This is done because the guest registers may not change but the
500 // host framebuffer may contain different attachments 500 // host framebuffer may contain different attachments
@@ -721,10 +721,10 @@ void RasterizerOpenGL::DrawArrays() {
721 // Add space for at least 18 constant buffers 721 // Add space for at least 18 constant buffers
722 buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment); 722 buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment);
723 723
724 bool invalidate = buffer_cache.Map(buffer_size); 724 const bool invalidate = buffer_cache.Map(buffer_size);
725 if (invalidate) { 725 if (invalidate) {
726 // As all cached buffers are invalidated, we need to recheck their state. 726 // As all cached buffers are invalidated, we need to recheck their state.
727 gpu.dirty_flags.vertex_array = 0xFFFFFFFF; 727 gpu.dirty_flags.vertex_array.set();
728 } 728 }
729 729
730 const GLuint vao = SetupVertexFormat(); 730 const GLuint vao = SetupVertexFormat();
@@ -738,9 +738,13 @@ void RasterizerOpenGL::DrawArrays() {
738 shader_program_manager->ApplyTo(state); 738 shader_program_manager->ApplyTo(state);
739 state.Apply(); 739 state.Apply();
740 740
741 res_cache.SignalPreDrawCall();
742
741 // Execute draw call 743 // Execute draw call
742 params.DispatchDraw(); 744 params.DispatchDraw();
743 745
746 res_cache.SignalPostDrawCall();
747
744 // Disable scissor test 748 // Disable scissor test
745 state.viewports[0].scissor.enabled = false; 749 state.viewports[0].scissor.enabled = false;
746 750
@@ -779,8 +783,8 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
779 783
780bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 784bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
781 const Tegra::Engines::Fermi2D::Regs::Surface& dst, 785 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
782 const MathUtil::Rectangle<u32>& src_rect, 786 const Common::Rectangle<u32>& src_rect,
783 const MathUtil::Rectangle<u32>& dst_rect) { 787 const Common::Rectangle<u32>& dst_rect) {
784 MICROPROFILE_SCOPE(OpenGL_Blits); 788 MICROPROFILE_SCOPE(OpenGL_Blits);
785 res_cache.FermiCopySurface(src, dst, src_rect, dst_rect); 789 res_cache.FermiCopySurface(src, dst, src_rect, dst_rect);
786 return true; 790 return true;
@@ -1034,7 +1038,7 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
1034 for (std::size_t i = 0; i < viewport_count; i++) { 1038 for (std::size_t i = 0; i < viewport_count; i++) {
1035 auto& viewport = current_state.viewports[i]; 1039 auto& viewport = current_state.viewports[i];
1036 const auto& src = regs.viewports[i]; 1040 const auto& src = regs.viewports[i];
1037 const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()}; 1041 const Common::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()};
1038 viewport.x = viewport_rect.left; 1042 viewport.x = viewport_rect.left;
1039 viewport.y = viewport_rect.bottom; 1043 viewport.y = viewport_rect.bottom;
1040 viewport.width = viewport_rect.GetWidth(); 1044 viewport.width = viewport_rect.GetWidth();
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 258d62259..2f0524f85 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -62,8 +62,8 @@ public:
62 void FlushAndInvalidateRegion(VAddr addr, u64 size) override; 62 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
63 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 63 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
64 const Tegra::Engines::Fermi2D::Regs::Surface& dst, 64 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
65 const MathUtil::Rectangle<u32>& src_rect, 65 const Common::Rectangle<u32>& src_rect,
66 const MathUtil::Rectangle<u32>& dst_rect) override; 66 const Common::Rectangle<u32>& dst_rect) override;
67 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, 67 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
68 u32 pixel_stride) override; 68 u32 pixel_stride) override;
69 bool AccelerateDrawBatch(bool is_indexed) override; 69 bool AccelerateDrawBatch(bool is_indexed) override;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 74200914e..5fdf1164d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include <algorithm>
6#include <optional>
6#include <glad/glad.h> 7#include <glad/glad.h>
7 8
8#include "common/alignment.h" 9#include "common/alignment.h"
@@ -399,7 +400,7 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType
399 return format; 400 return format;
400} 401}
401 402
402MathUtil::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const { 403Common::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
403 u32 actual_height{std::max(1U, unaligned_height >> mip_level)}; 404 u32 actual_height{std::max(1U, unaligned_height >> mip_level)};
404 if (IsPixelFormatASTC(pixel_format)) { 405 if (IsPixelFormatASTC(pixel_format)) {
405 // ASTC formats must stop at the ATSC block size boundary 406 // ASTC formats must stop at the ATSC block size boundary
@@ -549,6 +550,8 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
549 // alternatives. This signals a bug on those functions. 550 // alternatives. This signals a bug on those functions.
550 const auto width = static_cast<GLsizei>(params.MipWidth(0)); 551 const auto width = static_cast<GLsizei>(params.MipWidth(0));
551 const auto height = static_cast<GLsizei>(params.MipHeight(0)); 552 const auto height = static_cast<GLsizei>(params.MipHeight(0));
553 memory_size = params.MemorySize();
554 reinterpreted = false;
552 555
553 const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type); 556 const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type);
554 gl_internal_format = format_tuple.internal_format; 557 gl_internal_format = format_tuple.internal_format;
@@ -962,30 +965,31 @@ Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool pre
962 auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()}; 965 auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
963 const auto& regs{gpu.regs}; 966 const auto& regs{gpu.regs};
964 967
965 if ((gpu.dirty_flags.color_buffer & (1u << static_cast<u32>(index))) == 0) { 968 if (!gpu.dirty_flags.color_buffer[index]) {
966 return last_color_buffers[index]; 969 return last_color_buffers[index];
967 } 970 }
968 gpu.dirty_flags.color_buffer &= ~(1u << static_cast<u32>(index)); 971 gpu.dirty_flags.color_buffer.reset(index);
969 972
970 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); 973 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
971 974
972 if (index >= regs.rt_control.count) { 975 if (index >= regs.rt_control.count) {
973 return last_color_buffers[index] = {}; 976 return current_color_buffers[index] = {};
974 } 977 }
975 978
976 if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { 979 if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
977 return last_color_buffers[index] = {}; 980 return current_color_buffers[index] = {};
978 } 981 }
979 982
980 const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)}; 983 const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)};
981 984
982 return last_color_buffers[index] = GetSurface(color_params, preserve_contents); 985 return current_color_buffers[index] = GetSurface(color_params, preserve_contents);
983} 986}
984 987
985void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) { 988void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
986 surface->LoadGLBuffer(); 989 surface->LoadGLBuffer();
987 surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); 990 surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
988 surface->MarkAsModified(false, *this); 991 surface->MarkAsModified(false, *this);
992 surface->MarkForReload(false);
989} 993}
990 994
991Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) { 995Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) {
@@ -997,18 +1001,23 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres
997 Surface surface{TryGet(params.addr)}; 1001 Surface surface{TryGet(params.addr)};
998 if (surface) { 1002 if (surface) {
999 if (surface->GetSurfaceParams().IsCompatibleSurface(params)) { 1003 if (surface->GetSurfaceParams().IsCompatibleSurface(params)) {
1000 // Use the cached surface as-is 1004 // Use the cached surface as-is unless it's not synced with memory
1005 if (surface->MustReload())
1006 LoadSurface(surface);
1001 return surface; 1007 return surface;
1002 } else if (preserve_contents) { 1008 } else if (preserve_contents) {
1003 // If surface parameters changed and we care about keeping the previous data, recreate 1009 // If surface parameters changed and we care about keeping the previous data, recreate
1004 // the surface from the old one 1010 // the surface from the old one
1005 Surface new_surface{RecreateSurface(surface, params)}; 1011 Surface new_surface{RecreateSurface(surface, params)};
1006 Unregister(surface); 1012 UnregisterSurface(surface);
1007 Register(new_surface); 1013 Register(new_surface);
1014 if (new_surface->IsUploaded()) {
1015 RegisterReinterpretSurface(new_surface);
1016 }
1008 return new_surface; 1017 return new_surface;
1009 } else { 1018 } else {
1010 // Delete the old surface before creating a new one to prevent collisions. 1019 // Delete the old surface before creating a new one to prevent collisions.
1011 Unregister(surface); 1020 UnregisterSurface(surface);
1012 } 1021 }
1013 } 1022 }
1014 1023
@@ -1062,8 +1071,8 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface,
1062} 1071}
1063 1072
1064static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface, 1073static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
1065 const MathUtil::Rectangle<u32>& src_rect, 1074 const Common::Rectangle<u32>& src_rect,
1066 const MathUtil::Rectangle<u32>& dst_rect, GLuint read_fb_handle, 1075 const Common::Rectangle<u32>& dst_rect, GLuint read_fb_handle,
1067 GLuint draw_fb_handle, GLenum src_attachment = 0, GLenum dst_attachment = 0, 1076 GLuint draw_fb_handle, GLenum src_attachment = 0, GLenum dst_attachment = 0,
1068 std::size_t cubemap_face = 0) { 1077 std::size_t cubemap_face = 0) {
1069 1078
@@ -1193,7 +1202,7 @@ static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
1193void RasterizerCacheOpenGL::FermiCopySurface( 1202void RasterizerCacheOpenGL::FermiCopySurface(
1194 const Tegra::Engines::Fermi2D::Regs::Surface& src_config, 1203 const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
1195 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, 1204 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
1196 const MathUtil::Rectangle<u32>& src_rect, const MathUtil::Rectangle<u32>& dst_rect) { 1205 const Common::Rectangle<u32>& src_rect, const Common::Rectangle<u32>& dst_rect) {
1197 1206
1198 const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config); 1207 const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config);
1199 const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config); 1208 const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config);
@@ -1257,7 +1266,11 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
1257 case SurfaceTarget::TextureCubemap: 1266 case SurfaceTarget::TextureCubemap:
1258 case SurfaceTarget::Texture2DArray: 1267 case SurfaceTarget::Texture2DArray:
1259 case SurfaceTarget::TextureCubeArray: 1268 case SurfaceTarget::TextureCubeArray:
1260 FastLayeredCopySurface(old_surface, new_surface); 1269 if (old_params.pixel_format == new_params.pixel_format)
1270 FastLayeredCopySurface(old_surface, new_surface);
1271 else {
1272 AccurateCopySurface(old_surface, new_surface);
1273 }
1261 break; 1274 break;
1262 default: 1275 default:
1263 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", 1276 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
@@ -1286,4 +1299,107 @@ Surface RasterizerCacheOpenGL::TryGetReservedSurface(const SurfaceParams& params
1286 return {}; 1299 return {};
1287} 1300}
1288 1301
1302static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfaceParams params,
1303 u32 height) {
1304 for (u32 i = 0; i < params.max_mip_level; i++) {
1305 if (memory == params.GetMipmapSingleSize(i) && params.MipHeight(i) == height) {
1306 return {i};
1307 }
1308 }
1309 return {};
1310}
1311
1312static std::optional<u32> TryFindBestLayer(VAddr addr, const SurfaceParams params, u32 mipmap) {
1313 const std::size_t size = params.LayerMemorySize();
1314 VAddr start = params.addr + params.GetMipmapLevelOffset(mipmap);
1315 for (u32 i = 0; i < params.depth; i++) {
1316 if (start == addr) {
1317 return {i};
1318 }
1319 start += size;
1320 }
1321 return {};
1322}
1323
1324static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surface render_surface,
1325 const Surface blitted_surface) {
1326 const auto& dst_params = blitted_surface->GetSurfaceParams();
1327 const auto& src_params = render_surface->GetSurfaceParams();
1328 const std::size_t src_memory_size = src_params.size_in_bytes;
1329 const std::optional<u32> level =
1330 TryFindBestMipMap(src_memory_size, dst_params, src_params.height);
1331 if (level.has_value()) {
1332 if (src_params.width == dst_params.MipWidthGobAligned(*level) &&
1333 src_params.height == dst_params.MipHeight(*level) &&
1334 src_params.block_height >= dst_params.MipBlockHeight(*level)) {
1335 const std::optional<u32> slot =
1336 TryFindBestLayer(render_surface->GetAddr(), dst_params, *level);
1337 if (slot.has_value()) {
1338 glCopyImageSubData(render_surface->Texture().handle,
1339 SurfaceTargetToGL(src_params.target), 0, 0, 0, 0,
1340 blitted_surface->Texture().handle,
1341 SurfaceTargetToGL(dst_params.target), *level, 0, 0, *slot,
1342 dst_params.MipWidth(*level), dst_params.MipHeight(*level), 1);
1343 blitted_surface->MarkAsModified(true, cache);
1344 return true;
1345 }
1346 }
1347 }
1348 return false;
1349}
1350
1351static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) {
1352 const VAddr bound1 = blitted_surface->GetAddr() + blitted_surface->GetMemorySize();
1353 const VAddr bound2 = render_surface->GetAddr() + render_surface->GetMemorySize();
1354 if (bound2 > bound1)
1355 return true;
1356 const auto& dst_params = blitted_surface->GetSurfaceParams();
1357 const auto& src_params = render_surface->GetSurfaceParams();
1358 return (dst_params.component_type != src_params.component_type);
1359}
1360
1361static bool IsReinterpretInvalidSecond(const Surface render_surface,
1362 const Surface blitted_surface) {
1363 const auto& dst_params = blitted_surface->GetSurfaceParams();
1364 const auto& src_params = render_surface->GetSurfaceParams();
1365 return (dst_params.height > src_params.height && dst_params.width > src_params.width);
1366}
1367
1368bool RasterizerCacheOpenGL::PartialReinterpretSurface(Surface triggering_surface,
1369 Surface intersect) {
1370 if (IsReinterpretInvalid(triggering_surface, intersect)) {
1371 UnregisterSurface(intersect);
1372 return false;
1373 }
1374 if (!LayerFitReinterpretSurface(*this, triggering_surface, intersect)) {
1375 if (IsReinterpretInvalidSecond(triggering_surface, intersect)) {
1376 UnregisterSurface(intersect);
1377 return false;
1378 }
1379 FlushObject(intersect);
1380 FlushObject(triggering_surface);
1381 intersect->MarkForReload(true);
1382 }
1383 return true;
1384}
1385
1386void RasterizerCacheOpenGL::SignalPreDrawCall() {
1387 if (texception && GLAD_GL_ARB_texture_barrier) {
1388 glTextureBarrier();
1389 }
1390 texception = false;
1391}
1392
1393void RasterizerCacheOpenGL::SignalPostDrawCall() {
1394 for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) {
1395 if (current_color_buffers[i] != nullptr) {
1396 Surface intersect = CollideOnReinterpretedSurface(current_color_buffers[i]->GetAddr());
1397 if (intersect != nullptr) {
1398 PartialReinterpretSurface(current_color_buffers[i], intersect);
1399 texception = true;
1400 }
1401 }
1402 }
1403}
1404
1289} // namespace OpenGL 1405} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 89d733c50..797bbdc9c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -28,12 +28,13 @@ namespace OpenGL {
28 28
29class CachedSurface; 29class CachedSurface;
30using Surface = std::shared_ptr<CachedSurface>; 30using Surface = std::shared_ptr<CachedSurface>;
31using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>; 31using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, Common::Rectangle<u32>>;
32 32
33using SurfaceTarget = VideoCore::Surface::SurfaceTarget; 33using SurfaceTarget = VideoCore::Surface::SurfaceTarget;
34using SurfaceType = VideoCore::Surface::SurfaceType; 34using SurfaceType = VideoCore::Surface::SurfaceType;
35using PixelFormat = VideoCore::Surface::PixelFormat; 35using PixelFormat = VideoCore::Surface::PixelFormat;
36using ComponentType = VideoCore::Surface::ComponentType; 36using ComponentType = VideoCore::Surface::ComponentType;
37using Maxwell = Tegra::Engines::Maxwell3D::Regs;
37 38
38struct SurfaceParams { 39struct SurfaceParams {
39 enum class SurfaceClass { 40 enum class SurfaceClass {
@@ -71,7 +72,7 @@ struct SurfaceParams {
71 } 72 }
72 73
73 /// Returns the rectangle corresponding to this surface 74 /// Returns the rectangle corresponding to this surface
74 MathUtil::Rectangle<u32> GetRect(u32 mip_level = 0) const; 75 Common::Rectangle<u32> GetRect(u32 mip_level = 0) const;
75 76
76 /// Returns the total size of this surface in bytes, adjusted for compression 77 /// Returns the total size of this surface in bytes, adjusted for compression
77 std::size_t SizeInBytesRaw(bool ignore_tiled = false) const { 78 std::size_t SizeInBytesRaw(bool ignore_tiled = false) const {
@@ -140,10 +141,18 @@ struct SurfaceParams {
140 return offset; 141 return offset;
141 } 142 }
142 143
144 std::size_t GetMipmapSingleSize(u32 mip_level) const {
145 return InnerMipmapMemorySize(mip_level, false, is_layered);
146 }
147
143 u32 MipWidth(u32 mip_level) const { 148 u32 MipWidth(u32 mip_level) const {
144 return std::max(1U, width >> mip_level); 149 return std::max(1U, width >> mip_level);
145 } 150 }
146 151
152 u32 MipWidthGobAligned(u32 mip_level) const {
153 return Common::AlignUp(std::max(1U, width >> mip_level), 64U * 8U / GetFormatBpp());
154 }
155
147 u32 MipHeight(u32 mip_level) const { 156 u32 MipHeight(u32 mip_level) const {
148 return std::max(1U, height >> mip_level); 157 return std::max(1U, height >> mip_level);
149 } 158 }
@@ -346,6 +355,10 @@ public:
346 return cached_size_in_bytes; 355 return cached_size_in_bytes;
347 } 356 }
348 357
358 std::size_t GetMemorySize() const {
359 return memory_size;
360 }
361
349 void Flush() override { 362 void Flush() override {
350 FlushGLBuffer(); 363 FlushGLBuffer();
351 } 364 }
@@ -395,6 +408,26 @@ public:
395 Tegra::Texture::SwizzleSource swizzle_z, 408 Tegra::Texture::SwizzleSource swizzle_z,
396 Tegra::Texture::SwizzleSource swizzle_w); 409 Tegra::Texture::SwizzleSource swizzle_w);
397 410
411 void MarkReinterpreted() {
412 reinterpreted = true;
413 }
414
415 bool IsReinterpreted() const {
416 return reinterpreted;
417 }
418
419 void MarkForReload(bool reload) {
420 must_reload = reload;
421 }
422
423 bool MustReload() const {
424 return must_reload;
425 }
426
427 bool IsUploaded() const {
428 return params.identity == SurfaceParams::SurfaceClass::Uploaded;
429 }
430
398private: 431private:
399 void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle); 432 void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle);
400 433
@@ -408,6 +441,9 @@ private:
408 GLenum gl_internal_format{}; 441 GLenum gl_internal_format{};
409 std::size_t cached_size_in_bytes{}; 442 std::size_t cached_size_in_bytes{};
410 std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA}; 443 std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
444 std::size_t memory_size;
445 bool reinterpreted = false;
446 bool must_reload = false;
411}; 447};
412 448
413class RasterizerCacheOpenGL final : public RasterizerCache<Surface> { 449class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
@@ -430,8 +466,11 @@ public:
430 /// Copies the contents of one surface to another 466 /// Copies the contents of one surface to another
431 void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, 467 void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
432 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, 468 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
433 const MathUtil::Rectangle<u32>& src_rect, 469 const Common::Rectangle<u32>& src_rect,
434 const MathUtil::Rectangle<u32>& dst_rect); 470 const Common::Rectangle<u32>& dst_rect);
471
472 void SignalPreDrawCall();
473 void SignalPostDrawCall();
435 474
436private: 475private:
437 void LoadSurface(const Surface& surface); 476 void LoadSurface(const Surface& surface);
@@ -449,6 +488,10 @@ private:
449 /// Tries to get a reserved surface for the specified parameters 488 /// Tries to get a reserved surface for the specified parameters
450 Surface TryGetReservedSurface(const SurfaceParams& params); 489 Surface TryGetReservedSurface(const SurfaceParams& params);
451 490
491 // Partialy reinterpret a surface based on a triggering_surface that collides with it.
492 // returns true if the reinterpret was successful, false in case it was not.
493 bool PartialReinterpretSurface(Surface triggering_surface, Surface intersect);
494
452 /// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data 495 /// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data
453 void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface); 496 void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface);
454 void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface); 497 void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface);
@@ -465,12 +508,50 @@ private:
465 OGLFramebuffer read_framebuffer; 508 OGLFramebuffer read_framebuffer;
466 OGLFramebuffer draw_framebuffer; 509 OGLFramebuffer draw_framebuffer;
467 510
511 bool texception = false;
512
468 /// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one 513 /// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one
469 /// using the new format. 514 /// using the new format.
470 OGLBuffer copy_pbo; 515 OGLBuffer copy_pbo;
471 516
472 std::array<Surface, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> last_color_buffers; 517 std::array<Surface, Maxwell::NumRenderTargets> last_color_buffers;
518 std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers;
473 Surface last_depth_buffer; 519 Surface last_depth_buffer;
520
521 using SurfaceIntervalCache = boost::icl::interval_map<VAddr, Surface>;
522 using SurfaceInterval = typename SurfaceIntervalCache::interval_type;
523
524 static auto GetReinterpretInterval(const Surface& object) {
525 return SurfaceInterval::right_open(object->GetAddr() + 1,
526 object->GetAddr() + object->GetMemorySize() - 1);
527 }
528
529 // Reinterpreted surfaces are very fragil as the game may keep rendering into them.
530 SurfaceIntervalCache reinterpreted_surfaces;
531
532 void RegisterReinterpretSurface(Surface reinterpret_surface) {
533 auto interval = GetReinterpretInterval(reinterpret_surface);
534 reinterpreted_surfaces.insert({interval, reinterpret_surface});
535 reinterpret_surface->MarkReinterpreted();
536 }
537
538 Surface CollideOnReinterpretedSurface(VAddr addr) const {
539 const SurfaceInterval interval{addr};
540 for (auto& pair :
541 boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) {
542 return pair.second;
543 }
544 return nullptr;
545 }
546
547 /// Unregisters an object from the cache
548 void UnregisterSurface(const Surface& object) {
549 if (object->IsReinterpreted()) {
550 auto interval = GetReinterpretInterval(object);
551 reinterpreted_surfaces.erase(interval);
552 }
553 Unregister(object);
554 }
474}; 555};
475 556
476} // namespace OpenGL 557} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index db18f4dbe..72ff6ac6a 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -20,6 +20,7 @@
20namespace OpenGL::GLShader { 20namespace OpenGL::GLShader {
21 21
22using Tegra::Shader::Attribute; 22using Tegra::Shader::Attribute;
23using Tegra::Shader::AttributeUse;
23using Tegra::Shader::Header; 24using Tegra::Shader::Header;
24using Tegra::Shader::IpaInterpMode; 25using Tegra::Shader::IpaInterpMode;
25using Tegra::Shader::IpaMode; 26using Tegra::Shader::IpaMode;
@@ -288,34 +289,22 @@ private:
288 code.AddNewLine(); 289 code.AddNewLine();
289 } 290 }
290 291
291 std::string GetInputFlags(const IpaMode& input_mode) { 292 std::string GetInputFlags(AttributeUse attribute) {
292 const IpaSampleMode sample_mode = input_mode.sampling_mode;
293 const IpaInterpMode interp_mode = input_mode.interpolation_mode;
294 std::string out; 293 std::string out;
295 294
296 switch (interp_mode) { 295 switch (attribute) {
297 case IpaInterpMode::Flat: 296 case AttributeUse::Constant:
298 out += "flat "; 297 out += "flat ";
299 break; 298 break;
300 case IpaInterpMode::Linear: 299 case AttributeUse::ScreenLinear:
301 out += "noperspective "; 300 out += "noperspective ";
302 break; 301 break;
303 case IpaInterpMode::Perspective: 302 case AttributeUse::Perspective:
304 // Default, Smooth 303 // Default, Smooth
305 break; 304 break;
306 default: 305 default:
307 UNIMPLEMENTED_MSG("Unhandled IPA interp mode: {}", static_cast<u32>(interp_mode)); 306 LOG_CRITICAL(HW_GPU, "Unused attribute being fetched");
308 } 307 UNREACHABLE();
309 switch (sample_mode) {
310 case IpaSampleMode::Centroid:
311 // It can be implemented with the "centroid " keyword in GLSL
312 UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode centroid");
313 break;
314 case IpaSampleMode::Default:
315 // Default, n/a
316 break;
317 default:
318 UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode: {}", static_cast<u32>(sample_mode));
319 } 308 }
320 return out; 309 return out;
321 } 310 }
@@ -324,16 +313,11 @@ private:
324 const auto& attributes = ir.GetInputAttributes(); 313 const auto& attributes = ir.GetInputAttributes();
325 for (const auto element : attributes) { 314 for (const auto element : attributes) {
326 const Attribute::Index index = element.first; 315 const Attribute::Index index = element.first;
327 const IpaMode& input_mode = *element.second.begin();
328 if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) { 316 if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) {
329 // Skip when it's not a generic attribute 317 // Skip when it's not a generic attribute
330 continue; 318 continue;
331 } 319 }
332 320
333 ASSERT(element.second.size() > 0);
334 UNIMPLEMENTED_IF_MSG(element.second.size() > 1,
335 "Multiple input flag modes are not supported in GLSL");
336
337 // TODO(bunnei): Use proper number of elements for these 321 // TODO(bunnei): Use proper number of elements for these
338 u32 idx = static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); 322 u32 idx = static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
339 if (stage != ShaderStage::Vertex) { 323 if (stage != ShaderStage::Vertex) {
@@ -345,8 +329,14 @@ private:
345 if (stage == ShaderStage::Geometry) { 329 if (stage == ShaderStage::Geometry) {
346 attr = "gs_" + attr + "[]"; 330 attr = "gs_" + attr + "[]";
347 } 331 }
348 code.AddLine("layout (location = " + std::to_string(idx) + ") " + 332 std::string suffix;
349 GetInputFlags(input_mode) + "in vec4 " + attr + ';'); 333 if (stage == ShaderStage::Fragment) {
334 const auto input_mode =
335 header.ps.GetAttributeUse(idx - GENERIC_VARYING_START_LOCATION);
336 suffix = GetInputFlags(input_mode);
337 }
338 code.AddLine("layout (location = " + std::to_string(idx) + ") " + suffix + "in vec4 " +
339 attr + ';');
350 } 340 }
351 if (!attributes.empty()) 341 if (!attributes.empty())
352 code.AddNewLine(); 342 code.AddNewLine();
@@ -1584,4 +1574,4 @@ ProgramResult Decompile(const ShaderIR& ir, Maxwell::ShaderStage stage, const st
1584 return {decompiler.GetResult(), decompiler.GetShaderEntries()}; 1574 return {decompiler.GetResult(), decompiler.GetShaderEntries()};
1585} 1575}
1586 1576
1587} // namespace OpenGL::GLShader \ No newline at end of file 1577} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 81882822b..82fc4d44b 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -2,8 +2,6 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#pragma once
6
7#include <cstring> 5#include <cstring>
8#include <fmt/format.h> 6#include <fmt/format.h>
9#include <lz4.h> 7#include <lz4.h>
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 04e1db911..7d96649af 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -124,7 +124,7 @@ layout (location = 5) out vec4 FragColor5;
124layout (location = 6) out vec4 FragColor6; 124layout (location = 6) out vec4 FragColor6;
125layout (location = 7) out vec4 FragColor7; 125layout (location = 7) out vec4 FragColor7;
126 126
127layout (location = 0) in vec4 position; 127layout (location = 0) in noperspective vec4 position;
128 128
129layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { 129layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
130 vec4 viewport_flip; 130 vec4 viewport_flip;
@@ -172,4 +172,4 @@ void main() {
172 return {out, program.second}; 172 return {out, program.second};
173} 173}
174 174
175} // namespace OpenGL::GLShader \ No newline at end of file 175} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 272fc2e8e..e60b2eb44 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -257,6 +257,7 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
257 const Tegra::FramebufferConfig& framebuffer) { 257 const Tegra::FramebufferConfig& framebuffer) {
258 texture.width = framebuffer.width; 258 texture.width = framebuffer.width;
259 texture.height = framebuffer.height; 259 texture.height = framebuffer.height;
260 texture.pixel_format = framebuffer.pixel_format;
260 261
261 GLint internal_format; 262 GLint internal_format;
262 switch (framebuffer.pixel_format) { 263 switch (framebuffer.pixel_format) {
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 7e13e566b..c168fa89e 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -39,7 +39,7 @@ struct TextureInfo {
39/// Structure used for storing information about the display target for the Switch screen 39/// Structure used for storing information about the display target for the Switch screen
40struct ScreenInfo { 40struct ScreenInfo {
41 GLuint display_texture; 41 GLuint display_texture;
42 const MathUtil::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f}; 42 const Common::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f};
43 TextureInfo texture; 43 TextureInfo texture;
44}; 44};
45 45
@@ -102,7 +102,7 @@ private:
102 102
103 /// Used for transforming the framebuffer orientation 103 /// Used for transforming the framebuffer orientation
104 Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags; 104 Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags;
105 MathUtil::Rectangle<int> framebuffer_crop_rect; 105 Common::Rectangle<int> framebuffer_crop_rect;
106}; 106};
107 107
108} // namespace OpenGL 108} // namespace OpenGL
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
index 17ee93b91..0451babbf 100644
--- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
@@ -238,7 +238,7 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32
238 238
239VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, 239VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory,
240 u8* data, u64 begin, u64 end) 240 u8* data, u64 begin, u64 end)
241 : allocation{allocation}, memory{memory}, data{data}, interval(std::make_pair(begin, end)) {} 241 : interval(std::make_pair(begin, end)), memory{memory}, allocation{allocation}, data{data} {}
242 242
243VKMemoryCommitImpl::~VKMemoryCommitImpl() { 243VKMemoryCommitImpl::~VKMemoryCommitImpl() {
244 allocation->Free(this); 244 allocation->Free(this);
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 55ec601ff..38f01ca50 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -48,7 +48,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
48 UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, 48 UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
49 "Unaligned attribute loads are not supported"); 49 "Unaligned attribute loads are not supported");
50 50
51 Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective, 51 Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Pass,
52 Tegra::Shader::IpaSampleMode::Default}; 52 Tegra::Shader::IpaSampleMode::Default};
53 53
54 u64 next_element = instr.attribute.fmt20.element; 54 u64 next_element = instr.attribute.fmt20.element;
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index f9502e3d0..d750a2936 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -135,7 +135,18 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
135 instr.ipa.sample_mode.Value()}; 135 instr.ipa.sample_mode.Value()};
136 136
137 const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode); 137 const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode);
138 const Node value = GetSaturatedFloat(attr, instr.ipa.saturate); 138 Node value = attr;
139 const Tegra::Shader::Attribute::Index index = attribute.index.Value();
140 if (index >= Tegra::Shader::Attribute::Index::Attribute_0 &&
141 index <= Tegra::Shader::Attribute::Index::Attribute_31) {
142 // TODO(Blinkhawk): There are cases where a perspective attribute use PASS.
143 // In theory by setting them as perspective, OpenGL does the perspective correction.
144 // A way must figured to reverse the last step of it.
145 if (input_mode.interpolation_mode == Tegra::Shader::IpaInterpMode::Multiply) {
146 value = Operation(OperationCode::FMul, PRECISE, value, GetRegister(instr.gpr20));
147 }
148 }
149 value = GetSaturatedFloat(value, instr.ipa.saturate);
139 150
140 SetRegister(bb, instr.gpr0, value); 151 SetRegister(bb, instr.gpr0, value);
141 break; 152 break;
@@ -175,4 +186,4 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
175 return pc; 186 return pc;
176} 187}
177 188
178} // namespace VideoCommon::Shader \ No newline at end of file 189} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index be4635342..33b071747 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -20,9 +20,9 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
20 return {node, cursor}; 20 return {node, cursor};
21 } 21 }
22 if (const auto conditional = std::get_if<ConditionalNode>(node)) { 22 if (const auto conditional = std::get_if<ConditionalNode>(node)) {
23 const auto& code = conditional->GetCode(); 23 const auto& conditional_code = conditional->GetCode();
24 const auto [found, internal_cursor] = 24 const auto [found, internal_cursor] = FindOperation(
25 FindOperation(code, static_cast<s64>(code.size() - 1), operation_code); 25 conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code);
26 if (found) 26 if (found)
27 return {found, cursor}; 27 return {found, cursor};
28 } 28 }
@@ -58,8 +58,8 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) {
58 return nullptr; 58 return nullptr;
59 } 59 }
60 if (const auto conditional = std::get_if<ConditionalNode>(tracked)) { 60 if (const auto conditional = std::get_if<ConditionalNode>(tracked)) {
61 const auto& code = conditional->GetCode(); 61 const auto& conditional_code = conditional->GetCode();
62 return TrackCbuf(tracked, code, static_cast<s64>(code.size())); 62 return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size()));
63 } 63 }
64 return nullptr; 64 return nullptr;
65} 65}
diff --git a/src/yuzu/debugger/graphics/graphics_surface.cpp b/src/yuzu/debugger/graphics/graphics_surface.cpp
index 209798521..71683da8e 100644
--- a/src/yuzu/debugger/graphics/graphics_surface.cpp
+++ b/src/yuzu/debugger/graphics/graphics_surface.cpp
@@ -398,7 +398,7 @@ void GraphicsSurfaceWidget::OnUpdate() {
398 398
399 for (unsigned int y = 0; y < surface_height; ++y) { 399 for (unsigned int y = 0; y < surface_height; ++y) {
400 for (unsigned int x = 0; x < surface_width; ++x) { 400 for (unsigned int x = 0; x < surface_width; ++x) {
401 Math::Vec4<u8> color; 401 Common::Vec4<u8> color;
402 color[0] = texture_data[x + y * surface_width + 0]; 402 color[0] = texture_data[x + y * surface_width + 0];
403 color[1] = texture_data[x + y * surface_width + 1]; 403 color[1] = texture_data[x + y * surface_width + 1];
404 color[2] = texture_data[x + y * surface_width + 2]; 404 color[2] = texture_data[x + y * surface_width + 2];