summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/audio_core/algorithm/interpolate.cpp54
-rw-r--r--src/audio_core/algorithm/interpolate.h7
-rw-r--r--src/audio_core/cubeb_sink.cpp20
-rw-r--r--src/common/CMakeLists.txt8
-rw-r--r--src/common/math_util.h16
-rw-r--r--src/common/page_table.cpp12
-rw-r--r--src/common/page_table.h15
-rw-r--r--src/core/CMakeLists.txt12
-rw-r--r--src/core/arm/arm_interface.h32
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_32.cpp208
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_32.h77
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_64.cpp (renamed from src/core/arm/dynarmic/arm_dynarmic.cpp)83
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_64.h (renamed from src/core/arm/dynarmic/arm_dynarmic.h)34
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_cp15.cpp80
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_cp15.h152
-rw-r--r--src/core/arm/exclusive_monitor.cpp2
-rw-r--r--src/core/arm/unicorn/arm_unicorn.cpp8
-rw-r--r--src/core/arm/unicorn/arm_unicorn.h7
-rw-r--r--src/core/core.cpp6
-rw-r--r--src/core/core_manager.cpp3
-rw-r--r--src/core/frontend/emu_window.h3
-rw-r--r--src/core/frontend/framebuffer_layout.cpp4
-rw-r--r--src/core/frontend/framebuffer_layout.h17
-rw-r--r--src/core/frontend/scope_acquire_context.cpp18
-rw-r--r--src/core/frontend/scope_acquire_context.h (renamed from src/core/frontend/scope_acquire_window_context.h)10
-rw-r--r--src/core/frontend/scope_acquire_window_context.cpp18
-rw-r--r--src/core/gdbstub/gdbstub.cpp23
-rw-r--r--src/core/gdbstub/gdbstub.h7
-rw-r--r--src/core/hle/kernel/kernel.cpp4
-rw-r--r--src/core/hle/kernel/physical_core.cpp19
-rw-r--r--src/core/hle/kernel/physical_core.h6
-rw-r--r--src/core/hle/kernel/process.cpp3
-rw-r--r--src/core/hle/kernel/scheduler.cpp21
-rw-r--r--src/core/hle/kernel/scheduler.h3
-rw-r--r--src/core/hle/kernel/svc.cpp329
-rw-r--r--src/core/hle/kernel/svc_wrap.h158
-rw-r--r--src/core/hle/kernel/thread.cpp31
-rw-r--r--src/core/hle/kernel/thread.h22
-rw-r--r--src/core/hle/service/am/am.cpp15
-rw-r--r--src/core/hle/service/am/am.h1
-rw-r--r--src/core/hle/service/am/applets/web_browser.cpp6
-rw-r--r--src/core/hle/service/hid/controllers/npad.cpp10
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.cpp4
-rw-r--r--src/core/hle/service/set/set.cpp10
-rw-r--r--src/core/hle/service/set/set.h1
-rw-r--r--src/core/hle/service/sm/controller.cpp2
-rw-r--r--src/core/hle/service/time/time_zone_content_manager.cpp2
-rw-r--r--src/core/loader/deconstructed_rom_directory.cpp6
-rw-r--r--src/core/reporter.cpp2
-rw-r--r--src/core/settings.cpp2
-rw-r--r--src/core/settings.h4
-rw-r--r--src/core/telemetry_session.cpp1
-rwxr-xr-xsrc/input_common/analog_from_button.cpp14
-rw-r--r--src/input_common/udp/client.cpp17
-rw-r--r--src/input_common/udp/protocol.cpp1
-rw-r--r--src/input_common/udp/udp.cpp3
-rw-r--r--src/video_core/CMakeLists.txt16
-rw-r--r--src/video_core/dirty_flags.cpp38
-rw-r--r--src/video_core/dirty_flags.h49
-rw-r--r--src/video_core/dma_pusher.cpp2
-rw-r--r--src/video_core/engines/const_buffer_engine_interface.h67
-rw-r--r--src/video_core/engines/kepler_compute.cpp12
-rw-r--r--src/video_core/engines/kepler_memory.cpp2
-rw-r--r--src/video_core/engines/maxwell_3d.cpp214
-rw-r--r--src/video_core/engines/maxwell_3d.h249
-rw-r--r--src/video_core/engines/maxwell_dma.cpp2
-rw-r--r--src/video_core/engines/shader_bytecode.h17
-rw-r--r--src/video_core/gpu.cpp65
-rw-r--r--src/video_core/gpu.h8
-rw-r--r--src/video_core/gpu_thread.cpp4
-rw-r--r--src/video_core/guest_driver.cpp7
-rw-r--r--src/video_core/guest_driver.h21
-rw-r--r--src/video_core/memory_manager.h2
-rw-r--r--src/video_core/morton.cpp4
-rw-r--r--src/video_core/rasterizer_interface.h4
-rw-r--r--src/video_core/renderer_base.h10
-rw-r--r--src/video_core/renderer_opengl/gl_framebuffer_cache.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_framebuffer_cache.h2
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp1042
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h71
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp42
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h50
-rw-r--r--src/video_core/renderer_opengl/gl_sampler_cache.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp513
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h99
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp494
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h24
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp404
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h153
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp109
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h34
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp43
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h39
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp554
-rw-r--r--src/video_core/renderer_opengl/gl_state.h247
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.cpp247
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.h215
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp77
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h10
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h51
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp493
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h40
-rw-r--r--src/video_core/renderer_opengl/utils.cpp13
-rw-r--r--src/video_core/renderer_opengl/utils.h9
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.cpp15
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.h8
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp40
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.h4
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp23
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h12
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp49
-rw-r--r--src/video_core/renderer_vulkan/vk_device.h45
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp3
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp131
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h16
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp248
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h32
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp21
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h42
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp170
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.h13
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp5
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.cpp99
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.h79
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.cpp21
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp7
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h4
-rw-r--r--src/video_core/shader/const_buffer_locker.cpp126
-rw-r--r--src/video_core/shader/const_buffer_locker.h103
-rw-r--r--src/video_core/shader/control_flow.cpp13
-rw-r--r--src/video_core/shader/control_flow.h3
-rw-r--r--src/video_core/shader/decode.cpp22
-rw-r--r--src/video_core/shader/decode/arithmetic.cpp33
-rw-r--r--src/video_core/shader/decode/arithmetic_integer.cpp94
-rw-r--r--src/video_core/shader/decode/bfe.cpp69
-rw-r--r--src/video_core/shader/decode/texture.cpp5
-rw-r--r--src/video_core/shader/decode/xmad.cpp68
-rw-r--r--src/video_core/shader/node.h2
-rw-r--r--src/video_core/shader/node_helper.cpp2
-rw-r--r--src/video_core/shader/registry.cpp161
-rw-r--r--src/video_core/shader/registry.h137
-rw-r--r--src/video_core/shader/shader_ir.cpp90
-rw-r--r--src/video_core/shader/shader_ir.h14
-rw-r--r--src/video_core/shader/track.cpp38
-rw-r--r--src/video_core/shader/transform_feedback.cpp115
-rw-r--r--src/video_core/shader/transform_feedback.h23
-rw-r--r--src/video_core/surface.cpp4
-rw-r--r--src/video_core/surface.h146
-rw-r--r--src/video_core/texture_cache/format_lookup_table.cpp4
-rw-r--r--src/video_core/texture_cache/surface_params.cpp53
-rw-r--r--src/video_core/texture_cache/surface_params.h5
-rw-r--r--src/video_core/texture_cache/texture_cache.h64
-rw-r--r--src/video_core/textures/astc.cpp1074
-rw-r--r--src/video_core/textures/texture.h26
-rw-r--r--src/yuzu/CMakeLists.txt3
-rw-r--r--src/yuzu/bootmanager.cpp420
-rw-r--r--src/yuzu/bootmanager.h67
-rw-r--r--src/yuzu/configuration/config.cpp13
-rw-r--r--src/yuzu/configuration/configure.ui11
-rw-r--r--src/yuzu/configuration/configure_dialog.cpp4
-rw-r--r--src/yuzu/configuration/configure_graphics.cpp5
-rw-r--r--src/yuzu/configuration/configure_graphics.ui14
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.cpp48
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.h30
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.ui111
-rw-r--r--src/yuzu/configuration/configure_system.cpp4
-rw-r--r--src/yuzu/configuration/configure_system.h1
-rw-r--r--src/yuzu/configuration/configure_system.ui62
-rw-r--r--src/yuzu/debugger/wait_tree.cpp4
-rw-r--r--src/yuzu/loading_screen.cpp17
-rw-r--r--src/yuzu/main.cpp49
-rw-r--r--src/yuzu/main.h7
-rw-r--r--src/yuzu_cmd/config.cpp6
-rw-r--r--src/yuzu_cmd/default_ini.h11
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2.cpp2
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2.h14
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp54
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h18
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp9
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h11
-rw-r--r--src/yuzu_cmd/yuzu.cpp25
-rw-r--r--src/yuzu_tester/config.cpp2
-rw-r--r--src/yuzu_tester/default_ini.h4
-rw-r--r--src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp4
-rw-r--r--src/yuzu_tester/emu_window/emu_window_sdl2_hide.h3
187 files changed, 6953 insertions, 5171 deletions
diff --git a/src/audio_core/algorithm/interpolate.cpp b/src/audio_core/algorithm/interpolate.cpp
index a58f24169..49ab9d3e1 100644
--- a/src/audio_core/algorithm/interpolate.cpp
+++ b/src/audio_core/algorithm/interpolate.cpp
@@ -8,13 +8,14 @@
8#include <climits> 8#include <climits>
9#include <cmath> 9#include <cmath>
10#include <vector> 10#include <vector>
11
11#include "audio_core/algorithm/interpolate.h" 12#include "audio_core/algorithm/interpolate.h"
12#include "common/common_types.h" 13#include "common/common_types.h"
13#include "common/logging/log.h" 14#include "common/logging/log.h"
14 15
15namespace AudioCore { 16namespace AudioCore {
16 17
17constexpr std::array<s16, 512> curve_lut0 = { 18constexpr std::array<s16, 512> curve_lut0{
18 6600, 19426, 6722, 3, 6479, 19424, 6845, 9, 6359, 19419, 6968, 15, 6239, 19 6600, 19426, 6722, 3, 6479, 19424, 6845, 9, 6359, 19419, 6968, 15, 6239,
19 19412, 7093, 22, 6121, 19403, 7219, 28, 6004, 19391, 7345, 34, 5888, 19377, 20 19412, 7093, 22, 6121, 19403, 7219, 28, 6004, 19391, 7345, 34, 5888, 19377,
20 7472, 41, 5773, 19361, 7600, 48, 5659, 19342, 7728, 55, 5546, 19321, 7857, 21 7472, 41, 5773, 19361, 7600, 48, 5659, 19342, 7728, 55, 5546, 19321, 7857,
@@ -56,7 +57,7 @@ constexpr std::array<s16, 512> curve_lut0 = {
56 19403, 6121, 22, 7093, 19412, 6239, 15, 6968, 19419, 6359, 9, 6845, 19424, 57 19403, 6121, 22, 7093, 19412, 6239, 15, 6968, 19419, 6359, 9, 6845, 19424,
57 6479, 3, 6722, 19426, 6600}; 58 6479, 3, 6722, 19426, 6600};
58 59
59constexpr std::array<s16, 512> curve_lut1 = { 60constexpr std::array<s16, 512> curve_lut1{
60 -68, 32639, 69, -5, -200, 32630, 212, -15, -328, 32613, 359, -26, -450, 61 -68, 32639, 69, -5, -200, 32630, 212, -15, -328, 32613, 359, -26, -450,
61 32586, 512, -36, -568, 32551, 669, -47, -680, 32507, 832, -58, -788, 32454, 62 32586, 512, -36, -568, 32551, 669, -47, -680, 32507, 832, -58, -788, 32454,
62 1000, -69, -891, 32393, 1174, -80, -990, 32323, 1352, -92, -1084, 32244, 1536, 63 1000, -69, -891, 32393, 1174, -80, -990, 32323, 1352, -92, -1084, 32244, 1536,
@@ -98,7 +99,7 @@ constexpr std::array<s16, 512> curve_lut1 = {
98 32551, -568, -36, 512, 32586, -450, -26, 359, 32613, -328, -15, 212, 32630, 99 32551, -568, -36, 512, 32586, -450, -26, 359, 32613, -328, -15, 212, 32630,
99 -200, -5, 69, 32639, -68}; 100 -200, -5, 69, 32639, -68};
100 101
101constexpr std::array<s16, 512> curve_lut2 = { 102constexpr std::array<s16, 512> curve_lut2{
102 3195, 26287, 3329, -32, 3064, 26281, 3467, -34, 2936, 26270, 3608, -38, 2811, 103 3195, 26287, 3329, -32, 3064, 26281, 3467, -34, 2936, 26270, 3608, -38, 2811,
103 26253, 3751, -42, 2688, 26230, 3897, -46, 2568, 26202, 4046, -50, 2451, 26169, 104 26253, 3751, -42, 2688, 26230, 3897, -46, 2568, 26202, 4046, -50, 2451, 26169,
104 4199, -54, 2338, 26130, 4354, -58, 2227, 26085, 4512, -63, 2120, 26035, 4673, 105 4199, -54, 2338, 26130, 4354, -58, 2227, 26085, 4512, -63, 2120, 26035, 4673,
@@ -146,10 +147,10 @@ std::vector<s16> Interpolate(InterpolationState& state, std::vector<s16> input,
146 147
147 if (ratio <= 0) { 148 if (ratio <= 0) {
148 LOG_CRITICAL(Audio, "Nonsensical interpolation ratio {}", ratio); 149 LOG_CRITICAL(Audio, "Nonsensical interpolation ratio {}", ratio);
149 ratio = 1.0; 150 return input;
150 } 151 }
151 152
152 const int step = static_cast<int>(ratio * 0x8000); 153 const s32 step{static_cast<s32>(ratio * 0x8000)};
153 const std::array<s16, 512>& lut = [step] { 154 const std::array<s16, 512>& lut = [step] {
154 if (step > 0xaaaa) { 155 if (step > 0xaaaa) {
155 return curve_lut0; 156 return curve_lut0;
@@ -160,28 +161,37 @@ std::vector<s16> Interpolate(InterpolationState& state, std::vector<s16> input,
160 return curve_lut2; 161 return curve_lut2;
161 }(); 162 }();
162 163
163 std::vector<s16> output(static_cast<std::size_t>(input.size() / ratio)); 164 const std::size_t num_frames{input.size() / 2};
164 int in_offset = 0; 165
165 for (std::size_t out_offset = 0; out_offset < output.size(); out_offset += 2) { 166 std::vector<s16> output;
166 const int lut_index = (state.fraction >> 8) * 4; 167 output.reserve(static_cast<std::size_t>(input.size() / ratio + InterpolationState::taps));
167 168
168 const int l = input[(in_offset + 0) * 2 + 0] * lut[lut_index + 0] + 169 for (std::size_t frame{}; frame < num_frames; ++frame) {
169 input[(in_offset + 1) * 2 + 0] * lut[lut_index + 1] + 170 const std::size_t lut_index{(state.fraction >> 8) * InterpolationState::taps};
170 input[(in_offset + 2) * 2 + 0] * lut[lut_index + 2] +
171 input[(in_offset + 3) * 2 + 0] * lut[lut_index + 3];
172 171
173 const int r = input[(in_offset + 0) * 2 + 1] * lut[lut_index + 0] + 172 std::rotate(state.history.begin(), state.history.end() - 1, state.history.end());
174 input[(in_offset + 1) * 2 + 1] * lut[lut_index + 1] + 173 state.history[0][0] = input[frame * 2 + 0];
175 input[(in_offset + 2) * 2 + 1] * lut[lut_index + 2] + 174 state.history[0][1] = input[frame * 2 + 1];
176 input[(in_offset + 3) * 2 + 1] * lut[lut_index + 3];
177 175
178 const int new_offset = state.fraction + step; 176 while (state.position <= 1.0) {
177 const s32 left{state.history[0][0] * lut[lut_index + 0] +
178 state.history[1][0] * lut[lut_index + 1] +
179 state.history[2][0] * lut[lut_index + 2] +
180 state.history[3][0] * lut[lut_index + 3]};
181 const s32 right{state.history[0][1] * lut[lut_index + 0] +
182 state.history[1][1] * lut[lut_index + 1] +
183 state.history[2][1] * lut[lut_index + 2] +
184 state.history[3][1] * lut[lut_index + 3]};
185 const s32 new_offset{state.fraction + step};
179 186
180 in_offset += new_offset >> 15; 187 state.fraction = new_offset & 0x7fff;
181 state.fraction = new_offset & 0x7fff;
182 188
183 output[out_offset + 0] = static_cast<s16>(std::clamp(l >> 15, SHRT_MIN, SHRT_MAX)); 189 output.emplace_back(static_cast<s16>(std::clamp(left >> 15, SHRT_MIN, SHRT_MAX)));
184 output[out_offset + 1] = static_cast<s16>(std::clamp(r >> 15, SHRT_MIN, SHRT_MAX)); 190 output.emplace_back(static_cast<s16>(std::clamp(right >> 15, SHRT_MIN, SHRT_MAX)));
191
192 state.position += ratio;
193 }
194 state.position -= 1.0;
185 } 195 }
186 196
187 return output; 197 return output;
diff --git a/src/audio_core/algorithm/interpolate.h b/src/audio_core/algorithm/interpolate.h
index 1b9831a75..ab1a31754 100644
--- a/src/audio_core/algorithm/interpolate.h
+++ b/src/audio_core/algorithm/interpolate.h
@@ -6,12 +6,17 @@
6 6
7#include <array> 7#include <array>
8#include <vector> 8#include <vector>
9
9#include "common/common_types.h" 10#include "common/common_types.h"
10 11
11namespace AudioCore { 12namespace AudioCore {
12 13
13struct InterpolationState { 14struct InterpolationState {
14 int fraction = 0; 15 static constexpr std::size_t taps{4};
16 static constexpr std::size_t history_size{taps * 2 - 1};
17 std::array<std::array<s16, 2>, history_size> history{};
18 double position{};
19 s32 fraction{};
15}; 20};
16 21
17/// Interpolates input signal to produce output signal. 22/// Interpolates input signal to produce output signal.
diff --git a/src/audio_core/cubeb_sink.cpp b/src/audio_core/cubeb_sink.cpp
index 7047ed9cf..c4e0e30fe 100644
--- a/src/audio_core/cubeb_sink.cpp
+++ b/src/audio_core/cubeb_sink.cpp
@@ -8,6 +8,7 @@
8#include "audio_core/cubeb_sink.h" 8#include "audio_core/cubeb_sink.h"
9#include "audio_core/stream.h" 9#include "audio_core/stream.h"
10#include "audio_core/time_stretch.h" 10#include "audio_core/time_stretch.h"
11#include "common/assert.h"
11#include "common/logging/log.h" 12#include "common/logging/log.h"
12#include "common/ring_buffer.h" 13#include "common/ring_buffer.h"
13#include "core/settings.h" 14#include "core/settings.h"
@@ -65,12 +66,25 @@ public:
65 void EnqueueSamples(u32 source_num_channels, const std::vector<s16>& samples) override { 66 void EnqueueSamples(u32 source_num_channels, const std::vector<s16>& samples) override {
66 if (source_num_channels > num_channels) { 67 if (source_num_channels > num_channels) {
67 // Downsample 6 channels to 2 68 // Downsample 6 channels to 2
69 ASSERT_MSG(source_num_channels == 6, "Channel count must be 6");
70
68 std::vector<s16> buf; 71 std::vector<s16> buf;
69 buf.reserve(samples.size() * num_channels / source_num_channels); 72 buf.reserve(samples.size() * num_channels / source_num_channels);
70 for (std::size_t i = 0; i < samples.size(); i += source_num_channels) { 73 for (std::size_t i = 0; i < samples.size(); i += source_num_channels) {
71 for (std::size_t ch = 0; ch < num_channels; ch++) { 74 // Downmixing implementation taken from the ATSC standard
72 buf.push_back(samples[i + ch]); 75 const s16 left{samples[i + 0]};
73 } 76 const s16 right{samples[i + 1]};
77 const s16 center{samples[i + 2]};
78 const s16 surround_left{samples[i + 4]};
79 const s16 surround_right{samples[i + 5]};
80 // Not used in the ATSC reference implementation
81 [[maybe_unused]] const s16 low_frequency_effects { samples[i + 3] };
82
83 constexpr s32 clev{707}; // center mixing level coefficient
84 constexpr s32 slev{707}; // surround mixing level coefficient
85
86 buf.push_back(left + (clev * center / 1000) + (slev * surround_left / 1000));
87 buf.push_back(right + (clev * center / 1000) + (slev * surround_right / 1000));
74 } 88 }
75 queue.Push(buf); 89 queue.Push(buf);
76 return; 90 return;
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 9afc6105d..fbebed715 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -38,8 +38,6 @@ add_custom_command(OUTPUT scm_rev.cpp
38 "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h" 38 "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h"
39 "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp" 39 "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp"
40 "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h" 40 "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h"
41 "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.cpp"
42 "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.h"
43 "${VIDEO_CORE}/shader/decode/arithmetic.cpp" 41 "${VIDEO_CORE}/shader/decode/arithmetic.cpp"
44 "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp" 42 "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp"
45 "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp" 43 "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp"
@@ -72,8 +70,6 @@ add_custom_command(OUTPUT scm_rev.cpp
72 "${VIDEO_CORE}/shader/ast.h" 70 "${VIDEO_CORE}/shader/ast.h"
73 "${VIDEO_CORE}/shader/compiler_settings.cpp" 71 "${VIDEO_CORE}/shader/compiler_settings.cpp"
74 "${VIDEO_CORE}/shader/compiler_settings.h" 72 "${VIDEO_CORE}/shader/compiler_settings.h"
75 "${VIDEO_CORE}/shader/const_buffer_locker.cpp"
76 "${VIDEO_CORE}/shader/const_buffer_locker.h"
77 "${VIDEO_CORE}/shader/control_flow.cpp" 73 "${VIDEO_CORE}/shader/control_flow.cpp"
78 "${VIDEO_CORE}/shader/control_flow.h" 74 "${VIDEO_CORE}/shader/control_flow.h"
79 "${VIDEO_CORE}/shader/decode.cpp" 75 "${VIDEO_CORE}/shader/decode.cpp"
@@ -82,9 +78,13 @@ add_custom_command(OUTPUT scm_rev.cpp
82 "${VIDEO_CORE}/shader/node.h" 78 "${VIDEO_CORE}/shader/node.h"
83 "${VIDEO_CORE}/shader/node_helper.cpp" 79 "${VIDEO_CORE}/shader/node_helper.cpp"
84 "${VIDEO_CORE}/shader/node_helper.h" 80 "${VIDEO_CORE}/shader/node_helper.h"
81 "${VIDEO_CORE}/shader/registry.cpp"
82 "${VIDEO_CORE}/shader/registry.h"
85 "${VIDEO_CORE}/shader/shader_ir.cpp" 83 "${VIDEO_CORE}/shader/shader_ir.cpp"
86 "${VIDEO_CORE}/shader/shader_ir.h" 84 "${VIDEO_CORE}/shader/shader_ir.h"
87 "${VIDEO_CORE}/shader/track.cpp" 85 "${VIDEO_CORE}/shader/track.cpp"
86 "${VIDEO_CORE}/shader/transform_feedback.cpp"
87 "${VIDEO_CORE}/shader/transform_feedback.h"
88 # and also check that the scm_rev files haven't changed 88 # and also check that the scm_rev files haven't changed
89 "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" 89 "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in"
90 "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h" 90 "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h"
diff --git a/src/common/math_util.h b/src/common/math_util.h
index d6c35ee89..83ef0201f 100644
--- a/src/common/math_util.h
+++ b/src/common/math_util.h
@@ -24,17 +24,29 @@ struct Rectangle {
24 : left(left), top(top), right(right), bottom(bottom) {} 24 : left(left), top(top), right(right), bottom(bottom) {}
25 25
26 T GetWidth() const { 26 T GetWidth() const {
27 return std::abs(static_cast<std::make_signed_t<T>>(right - left)); 27 if constexpr (std::is_floating_point_v<T>) {
28 return std::abs(right - left);
29 } else {
30 return std::abs(static_cast<std::make_signed_t<T>>(right - left));
31 }
28 } 32 }
33
29 T GetHeight() const { 34 T GetHeight() const {
30 return std::abs(static_cast<std::make_signed_t<T>>(bottom - top)); 35 if constexpr (std::is_floating_point_v<T>) {
36 return std::abs(bottom - top);
37 } else {
38 return std::abs(static_cast<std::make_signed_t<T>>(bottom - top));
39 }
31 } 40 }
41
32 Rectangle<T> TranslateX(const T x) const { 42 Rectangle<T> TranslateX(const T x) const {
33 return Rectangle{left + x, top, right + x, bottom}; 43 return Rectangle{left + x, top, right + x, bottom};
34 } 44 }
45
35 Rectangle<T> TranslateY(const T y) const { 46 Rectangle<T> TranslateY(const T y) const {
36 return Rectangle{left, top + y, right, bottom + y}; 47 return Rectangle{left, top + y, right, bottom + y};
37 } 48 }
49
38 Rectangle<T> Scale(const float s) const { 50 Rectangle<T> Scale(const float s) const {
39 return Rectangle{left, top, static_cast<T>(left + GetWidth() * s), 51 return Rectangle{left, top, static_cast<T>(left + GetWidth() * s),
40 static_cast<T>(top + GetHeight() * s)}; 52 static_cast<T>(top + GetHeight() * s)};
diff --git a/src/common/page_table.cpp b/src/common/page_table.cpp
index 69b7abc54..566b57b62 100644
--- a/src/common/page_table.cpp
+++ b/src/common/page_table.cpp
@@ -16,7 +16,6 @@ void PageTable::Resize(std::size_t address_space_width_in_bits) {
16 16
17 pointers.resize(num_page_table_entries); 17 pointers.resize(num_page_table_entries);
18 attributes.resize(num_page_table_entries); 18 attributes.resize(num_page_table_entries);
19 backing_addr.resize(num_page_table_entries);
20 19
21 // The default is a 39-bit address space, which causes an initial 1GB allocation size. If the 20 // The default is a 39-bit address space, which causes an initial 1GB allocation size. If the
22 // vector size is subsequently decreased (via resize), the vector might not automatically 21 // vector size is subsequently decreased (via resize), the vector might not automatically
@@ -25,6 +24,17 @@ void PageTable::Resize(std::size_t address_space_width_in_bits) {
25 24
26 pointers.shrink_to_fit(); 25 pointers.shrink_to_fit();
27 attributes.shrink_to_fit(); 26 attributes.shrink_to_fit();
27}
28
29BackingPageTable::BackingPageTable(std::size_t page_size_in_bits) : PageTable{page_size_in_bits} {}
30
31BackingPageTable::~BackingPageTable() = default;
32
33void BackingPageTable::Resize(std::size_t address_space_width_in_bits) {
34 PageTable::Resize(address_space_width_in_bits);
35 const std::size_t num_page_table_entries = 1ULL
36 << (address_space_width_in_bits - page_size_in_bits);
37 backing_addr.resize(num_page_table_entries);
28 backing_addr.shrink_to_fit(); 38 backing_addr.shrink_to_fit();
29} 39}
30 40
diff --git a/src/common/page_table.h b/src/common/page_table.h
index 8b8ff0bb8..dbc272ab7 100644
--- a/src/common/page_table.h
+++ b/src/common/page_table.h
@@ -76,9 +76,20 @@ struct PageTable {
76 */ 76 */
77 std::vector<PageType> attributes; 77 std::vector<PageType> attributes;
78 78
79 std::vector<u64> backing_addr;
80
81 const std::size_t page_size_in_bits{}; 79 const std::size_t page_size_in_bits{};
82}; 80};
83 81
82/**
83 * A more advanced Page Table with the ability to save a backing address when using it
84 * depends on another MMU.
85 */
86struct BackingPageTable : PageTable {
87 explicit BackingPageTable(std::size_t page_size_in_bits);
88 ~BackingPageTable();
89
90 void Resize(std::size_t address_space_width_in_bits);
91
92 std::vector<u64> backing_addr;
93};
94
84} // namespace Common 95} // namespace Common
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 88c06b2ce..b31a0328c 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -131,8 +131,8 @@ add_library(core STATIC
131 frontend/framebuffer_layout.cpp 131 frontend/framebuffer_layout.cpp
132 frontend/framebuffer_layout.h 132 frontend/framebuffer_layout.h
133 frontend/input.h 133 frontend/input.h
134 frontend/scope_acquire_window_context.cpp 134 frontend/scope_acquire_context.cpp
135 frontend/scope_acquire_window_context.h 135 frontend/scope_acquire_context.h
136 gdbstub/gdbstub.cpp 136 gdbstub/gdbstub.cpp
137 gdbstub/gdbstub.h 137 gdbstub/gdbstub.h
138 hardware_interrupt_manager.cpp 138 hardware_interrupt_manager.cpp
@@ -595,8 +595,12 @@ endif()
595 595
596if (ARCHITECTURE_x86_64) 596if (ARCHITECTURE_x86_64)
597 target_sources(core PRIVATE 597 target_sources(core PRIVATE
598 arm/dynarmic/arm_dynarmic.cpp 598 arm/dynarmic/arm_dynarmic_32.cpp
599 arm/dynarmic/arm_dynarmic.h 599 arm/dynarmic/arm_dynarmic_32.h
600 arm/dynarmic/arm_dynarmic_64.cpp
601 arm/dynarmic/arm_dynarmic_64.h
602 arm/dynarmic/arm_dynarmic_cp15.cpp
603 arm/dynarmic/arm_dynarmic_cp15.h
600 ) 604 )
601 target_link_libraries(core PRIVATE dynarmic) 605 target_link_libraries(core PRIVATE dynarmic)
602endif() 606endif()
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index 47b964eb7..57eae839e 100644
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -25,7 +25,20 @@ public:
25 explicit ARM_Interface(System& system_) : system{system_} {} 25 explicit ARM_Interface(System& system_) : system{system_} {}
26 virtual ~ARM_Interface() = default; 26 virtual ~ARM_Interface() = default;
27 27
28 struct ThreadContext { 28 struct ThreadContext32 {
29 std::array<u32, 16> cpu_registers;
30 u32 cpsr;
31 std::array<u8, 4> padding;
32 std::array<u64, 32> fprs;
33 u32 fpscr;
34 u32 fpexc;
35 u32 tpidr;
36 };
37 // Internally within the kernel, it expects the AArch32 version of the
38 // thread context to be 344 bytes in size.
39 static_assert(sizeof(ThreadContext32) == 0x158);
40
41 struct ThreadContext64 {
29 std::array<u64, 31> cpu_registers; 42 std::array<u64, 31> cpu_registers;
30 u64 sp; 43 u64 sp;
31 u64 pc; 44 u64 pc;
@@ -38,7 +51,7 @@ public:
38 }; 51 };
39 // Internally within the kernel, it expects the AArch64 version of the 52 // Internally within the kernel, it expects the AArch64 version of the
40 // thread context to be 800 bytes in size. 53 // thread context to be 800 bytes in size.
41 static_assert(sizeof(ThreadContext) == 0x320); 54 static_assert(sizeof(ThreadContext64) == 0x320);
42 55
43 /// Runs the CPU until an event happens 56 /// Runs the CPU until an event happens
44 virtual void Run() = 0; 57 virtual void Run() = 0;
@@ -130,17 +143,10 @@ public:
130 */ 143 */
131 virtual void SetTPIDR_EL0(u64 value) = 0; 144 virtual void SetTPIDR_EL0(u64 value) = 0;
132 145
133 /** 146 virtual void SaveContext(ThreadContext32& ctx) = 0;
134 * Saves the current CPU context 147 virtual void SaveContext(ThreadContext64& ctx) = 0;
135 * @param ctx Thread context to save 148 virtual void LoadContext(const ThreadContext32& ctx) = 0;
136 */ 149 virtual void LoadContext(const ThreadContext64& ctx) = 0;
137 virtual void SaveContext(ThreadContext& ctx) = 0;
138
139 /**
140 * Loads a CPU context
141 * @param ctx Thread context to load
142 */
143 virtual void LoadContext(const ThreadContext& ctx) = 0;
144 150
145 /// Clears the exclusive monitor's state. 151 /// Clears the exclusive monitor's state.
146 virtual void ClearExclusiveState() = 0; 152 virtual void ClearExclusiveState() = 0;
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
new file mode 100644
index 000000000..187a972ac
--- /dev/null
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -0,0 +1,208 @@
1// Copyright 2020 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cinttypes>
6#include <memory>
7#include <dynarmic/A32/a32.h>
8#include <dynarmic/A32/config.h>
9#include <dynarmic/A32/context.h>
10#include "common/microprofile.h"
11#include "core/arm/dynarmic/arm_dynarmic_32.h"
12#include "core/arm/dynarmic/arm_dynarmic_64.h"
13#include "core/arm/dynarmic/arm_dynarmic_cp15.h"
14#include "core/core.h"
15#include "core/core_manager.h"
16#include "core/core_timing.h"
17#include "core/hle/kernel/svc.h"
18#include "core/memory.h"
19
20namespace Core {
21
22class DynarmicCallbacks32 : public Dynarmic::A32::UserCallbacks {
23public:
24 explicit DynarmicCallbacks32(ARM_Dynarmic_32& parent) : parent(parent) {}
25
26 u8 MemoryRead8(u32 vaddr) override {
27 return parent.system.Memory().Read8(vaddr);
28 }
29 u16 MemoryRead16(u32 vaddr) override {
30 return parent.system.Memory().Read16(vaddr);
31 }
32 u32 MemoryRead32(u32 vaddr) override {
33 return parent.system.Memory().Read32(vaddr);
34 }
35 u64 MemoryRead64(u32 vaddr) override {
36 return parent.system.Memory().Read64(vaddr);
37 }
38
39 void MemoryWrite8(u32 vaddr, u8 value) override {
40 parent.system.Memory().Write8(vaddr, value);
41 }
42 void MemoryWrite16(u32 vaddr, u16 value) override {
43 parent.system.Memory().Write16(vaddr, value);
44 }
45 void MemoryWrite32(u32 vaddr, u32 value) override {
46 parent.system.Memory().Write32(vaddr, value);
47 }
48 void MemoryWrite64(u32 vaddr, u64 value) override {
49 parent.system.Memory().Write64(vaddr, value);
50 }
51
52 void InterpreterFallback(u32 pc, std::size_t num_instructions) override {
53 UNIMPLEMENTED();
54 }
55
56 void ExceptionRaised(u32 pc, Dynarmic::A32::Exception exception) override {
57 switch (exception) {
58 case Dynarmic::A32::Exception::UndefinedInstruction:
59 case Dynarmic::A32::Exception::UnpredictableInstruction:
60 break;
61 case Dynarmic::A32::Exception::Breakpoint:
62 break;
63 }
64 LOG_CRITICAL(HW_GPU, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})",
65 static_cast<std::size_t>(exception), pc, MemoryReadCode(pc));
66 UNIMPLEMENTED();
67 }
68
69 void CallSVC(u32 swi) override {
70 Kernel::CallSVC(parent.system, swi);
71 }
72
73 void AddTicks(u64 ticks) override {
74 // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a
75 // rough approximation of the amount of executed ticks in the system, it may be thrown off
76 // if not all cores are doing a similar amount of work. Instead of doing this, we should
77 // device a way so that timing is consistent across all cores without increasing the ticks 4
78 // times.
79 u64 amortized_ticks = (ticks - num_interpreted_instructions) / Core::NUM_CPU_CORES;
80 // Always execute at least one tick.
81 amortized_ticks = std::max<u64>(amortized_ticks, 1);
82
83 parent.system.CoreTiming().AddTicks(amortized_ticks);
84 num_interpreted_instructions = 0;
85 }
86 u64 GetTicksRemaining() override {
87 return std::max(parent.system.CoreTiming().GetDowncount(), {});
88 }
89
90 ARM_Dynarmic_32& parent;
91 std::size_t num_interpreted_instructions{};
92 u64 tpidrro_el0{};
93 u64 tpidr_el0{};
94};
95
96std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable& page_table,
97 std::size_t address_space_bits) const {
98 Dynarmic::A32::UserConfig config;
99 config.callbacks = cb.get();
100 // TODO(bunnei): Implement page table for 32-bit
101 // config.page_table = &page_table.pointers;
102 config.coprocessors[15] = std::make_shared<DynarmicCP15>((u32*)&CP15_regs[0]);
103 config.define_unpredictable_behaviour = true;
104 return std::make_unique<Dynarmic::A32::Jit>(config);
105}
106
107MICROPROFILE_DEFINE(ARM_Jit_Dynarmic_32, "ARM JIT", "Dynarmic", MP_RGB(255, 64, 64));
108
109void ARM_Dynarmic_32::Run() {
110 MICROPROFILE_SCOPE(ARM_Jit_Dynarmic_32);
111 jit->Run();
112}
113
114void ARM_Dynarmic_32::Step() {
115 cb->InterpreterFallback(jit->Regs()[15], 1);
116}
117
118ARM_Dynarmic_32::ARM_Dynarmic_32(System& system, ExclusiveMonitor& exclusive_monitor,
119 std::size_t core_index)
120 : ARM_Interface{system},
121 cb(std::make_unique<DynarmicCallbacks32>(*this)), core_index{core_index},
122 exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {}
123
124ARM_Dynarmic_32::~ARM_Dynarmic_32() = default;
125
126void ARM_Dynarmic_32::SetPC(u64 pc) {
127 jit->Regs()[15] = static_cast<u32>(pc);
128}
129
130u64 ARM_Dynarmic_32::GetPC() const {
131 return jit->Regs()[15];
132}
133
134u64 ARM_Dynarmic_32::GetReg(int index) const {
135 return jit->Regs()[index];
136}
137
138void ARM_Dynarmic_32::SetReg(int index, u64 value) {
139 jit->Regs()[index] = static_cast<u32>(value);
140}
141
142u128 ARM_Dynarmic_32::GetVectorReg(int index) const {
143 return {};
144}
145
146void ARM_Dynarmic_32::SetVectorReg(int index, u128 value) {}
147
148u32 ARM_Dynarmic_32::GetPSTATE() const {
149 return jit->Cpsr();
150}
151
152void ARM_Dynarmic_32::SetPSTATE(u32 cpsr) {
153 jit->SetCpsr(cpsr);
154}
155
156u64 ARM_Dynarmic_32::GetTlsAddress() const {
157 return CP15_regs[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)];
158}
159
160void ARM_Dynarmic_32::SetTlsAddress(VAddr address) {
161 CP15_regs[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)] = static_cast<u32>(address);
162}
163
164u64 ARM_Dynarmic_32::GetTPIDR_EL0() const {
165 return cb->tpidr_el0;
166}
167
168void ARM_Dynarmic_32::SetTPIDR_EL0(u64 value) {
169 cb->tpidr_el0 = value;
170}
171
172void ARM_Dynarmic_32::SaveContext(ThreadContext32& ctx) {
173 Dynarmic::A32::Context context;
174 jit->SaveContext(context);
175 ctx.cpu_registers = context.Regs();
176 ctx.cpsr = context.Cpsr();
177}
178
179void ARM_Dynarmic_32::LoadContext(const ThreadContext32& ctx) {
180 Dynarmic::A32::Context context;
181 context.Regs() = ctx.cpu_registers;
182 context.SetCpsr(ctx.cpsr);
183 jit->LoadContext(context);
184}
185
186void ARM_Dynarmic_32::PrepareReschedule() {
187 jit->HaltExecution();
188}
189
190void ARM_Dynarmic_32::ClearInstructionCache() {
191 jit->ClearCache();
192}
193
194void ARM_Dynarmic_32::ClearExclusiveState() {}
195
196void ARM_Dynarmic_32::PageTableChanged(Common::PageTable& page_table,
197 std::size_t new_address_space_size_in_bits) {
198 auto key = std::make_pair(&page_table, new_address_space_size_in_bits);
199 auto iter = jit_cache.find(key);
200 if (iter != jit_cache.end()) {
201 jit = iter->second;
202 return;
203 }
204 jit = MakeJit(page_table, new_address_space_size_in_bits);
205 jit_cache.emplace(key, jit);
206}
207
208} // namespace Core
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.h b/src/core/arm/dynarmic/arm_dynarmic_32.h
new file mode 100644
index 000000000..143e46e4d
--- /dev/null
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.h
@@ -0,0 +1,77 @@
1// Copyright 2020 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <unordered_map>
9
10#include <dynarmic/A32/a32.h>
11#include <dynarmic/A64/a64.h>
12#include <dynarmic/A64/exclusive_monitor.h>
13#include "common/common_types.h"
14#include "common/hash.h"
15#include "core/arm/arm_interface.h"
16#include "core/arm/exclusive_monitor.h"
17
18namespace Memory {
19class Memory;
20}
21
22namespace Core {
23
24class DynarmicCallbacks32;
25class DynarmicExclusiveMonitor;
26class System;
27
28class ARM_Dynarmic_32 final : public ARM_Interface {
29public:
30 ARM_Dynarmic_32(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index);
31 ~ARM_Dynarmic_32() override;
32
33 void SetPC(u64 pc) override;
34 u64 GetPC() const override;
35 u64 GetReg(int index) const override;
36 void SetReg(int index, u64 value) override;
37 u128 GetVectorReg(int index) const override;
38 void SetVectorReg(int index, u128 value) override;
39 u32 GetPSTATE() const override;
40 void SetPSTATE(u32 pstate) override;
41 void Run() override;
42 void Step() override;
43 VAddr GetTlsAddress() const override;
44 void SetTlsAddress(VAddr address) override;
45 void SetTPIDR_EL0(u64 value) override;
46 u64 GetTPIDR_EL0() const override;
47
48 void SaveContext(ThreadContext32& ctx) override;
49 void SaveContext(ThreadContext64& ctx) override {}
50 void LoadContext(const ThreadContext32& ctx) override;
51 void LoadContext(const ThreadContext64& ctx) override {}
52
53 void PrepareReschedule() override;
54 void ClearExclusiveState() override;
55
56 void ClearInstructionCache() override;
57 void PageTableChanged(Common::PageTable& new_page_table,
58 std::size_t new_address_space_size_in_bits) override;
59
60private:
61 std::shared_ptr<Dynarmic::A32::Jit> MakeJit(Common::PageTable& page_table,
62 std::size_t address_space_bits) const;
63
64 using JitCacheKey = std::pair<Common::PageTable*, std::size_t>;
65 using JitCacheType =
66 std::unordered_map<JitCacheKey, std::shared_ptr<Dynarmic::A32::Jit>, Common::PairHash>;
67
68 friend class DynarmicCallbacks32;
69 std::unique_ptr<DynarmicCallbacks32> cb;
70 JitCacheType jit_cache;
71 std::shared_ptr<Dynarmic::A32::Jit> jit;
72 std::size_t core_index;
73 DynarmicExclusiveMonitor& exclusive_monitor;
74 std::array<u32, 84> CP15_regs{};
75};
76
77} // namespace Core
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index 29eaf74e5..a53a58ba0 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -8,7 +8,7 @@
8#include <dynarmic/A64/config.h> 8#include <dynarmic/A64/config.h>
9#include "common/logging/log.h" 9#include "common/logging/log.h"
10#include "common/microprofile.h" 10#include "common/microprofile.h"
11#include "core/arm/dynarmic/arm_dynarmic.h" 11#include "core/arm/dynarmic/arm_dynarmic_64.h"
12#include "core/core.h" 12#include "core/core.h"
13#include "core/core_manager.h" 13#include "core/core_manager.h"
14#include "core/core_timing.h" 14#include "core/core_timing.h"
@@ -25,9 +25,9 @@ namespace Core {
25 25
26using Vector = Dynarmic::A64::Vector; 26using Vector = Dynarmic::A64::Vector;
27 27
28class ARM_Dynarmic_Callbacks : public Dynarmic::A64::UserCallbacks { 28class DynarmicCallbacks64 : public Dynarmic::A64::UserCallbacks {
29public: 29public:
30 explicit ARM_Dynarmic_Callbacks(ARM_Dynarmic& parent) : parent(parent) {} 30 explicit DynarmicCallbacks64(ARM_Dynarmic_64& parent) : parent(parent) {}
31 31
32 u8 MemoryRead8(u64 vaddr) override { 32 u8 MemoryRead8(u64 vaddr) override {
33 return parent.system.Memory().Read8(vaddr); 33 return parent.system.Memory().Read8(vaddr);
@@ -68,7 +68,7 @@ public:
68 LOG_INFO(Core_ARM, "Unicorn fallback @ 0x{:X} for {} instructions (instr = {:08X})", pc, 68 LOG_INFO(Core_ARM, "Unicorn fallback @ 0x{:X} for {} instructions (instr = {:08X})", pc,
69 num_instructions, MemoryReadCode(pc)); 69 num_instructions, MemoryReadCode(pc));
70 70
71 ARM_Interface::ThreadContext ctx; 71 ARM_Interface::ThreadContext64 ctx;
72 parent.SaveContext(ctx); 72 parent.SaveContext(ctx);
73 parent.inner_unicorn.LoadContext(ctx); 73 parent.inner_unicorn.LoadContext(ctx);
74 parent.inner_unicorn.ExecuteInstructions(num_instructions); 74 parent.inner_unicorn.ExecuteInstructions(num_instructions);
@@ -90,7 +90,7 @@ public:
90 parent.jit->HaltExecution(); 90 parent.jit->HaltExecution();
91 parent.SetPC(pc); 91 parent.SetPC(pc);
92 Kernel::Thread* const thread = parent.system.CurrentScheduler().GetCurrentThread(); 92 Kernel::Thread* const thread = parent.system.CurrentScheduler().GetCurrentThread();
93 parent.SaveContext(thread->GetContext()); 93 parent.SaveContext(thread->GetContext64());
94 GDBStub::Break(); 94 GDBStub::Break();
95 GDBStub::SendTrap(thread, 5); 95 GDBStub::SendTrap(thread, 5);
96 return; 96 return;
@@ -126,14 +126,14 @@ public:
126 return Timing::CpuCyclesToClockCycles(parent.system.CoreTiming().GetTicks()); 126 return Timing::CpuCyclesToClockCycles(parent.system.CoreTiming().GetTicks());
127 } 127 }
128 128
129 ARM_Dynarmic& parent; 129 ARM_Dynarmic_64& parent;
130 std::size_t num_interpreted_instructions = 0; 130 std::size_t num_interpreted_instructions = 0;
131 u64 tpidrro_el0 = 0; 131 u64 tpidrro_el0 = 0;
132 u64 tpidr_el0 = 0; 132 u64 tpidr_el0 = 0;
133}; 133};
134 134
135std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit(Common::PageTable& page_table, 135std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable& page_table,
136 std::size_t address_space_bits) const { 136 std::size_t address_space_bits) const {
137 Dynarmic::A64::UserConfig config; 137 Dynarmic::A64::UserConfig config;
138 138
139 // Callbacks 139 // Callbacks
@@ -159,79 +159,79 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit(Common::PageTable& pag
159 // Unpredictable instructions 159 // Unpredictable instructions
160 config.define_unpredictable_behaviour = true; 160 config.define_unpredictable_behaviour = true;
161 161
162 return std::make_unique<Dynarmic::A64::Jit>(config); 162 return std::make_shared<Dynarmic::A64::Jit>(config);
163} 163}
164 164
165MICROPROFILE_DEFINE(ARM_Jit_Dynarmic, "ARM JIT", "Dynarmic", MP_RGB(255, 64, 64)); 165MICROPROFILE_DEFINE(ARM_Jit_Dynarmic_64, "ARM JIT", "Dynarmic", MP_RGB(255, 64, 64));
166 166
167void ARM_Dynarmic::Run() { 167void ARM_Dynarmic_64::Run() {
168 MICROPROFILE_SCOPE(ARM_Jit_Dynarmic); 168 MICROPROFILE_SCOPE(ARM_Jit_Dynarmic_64);
169 169
170 jit->Run(); 170 jit->Run();
171} 171}
172 172
173void ARM_Dynarmic::Step() { 173void ARM_Dynarmic_64::Step() {
174 cb->InterpreterFallback(jit->GetPC(), 1); 174 cb->InterpreterFallback(jit->GetPC(), 1);
175} 175}
176 176
177ARM_Dynarmic::ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor, 177ARM_Dynarmic_64::ARM_Dynarmic_64(System& system, ExclusiveMonitor& exclusive_monitor,
178 std::size_t core_index) 178 std::size_t core_index)
179 : ARM_Interface{system}, 179 : ARM_Interface{system},
180 cb(std::make_unique<ARM_Dynarmic_Callbacks>(*this)), inner_unicorn{system}, 180 cb(std::make_unique<DynarmicCallbacks64>(*this)), inner_unicorn{system},
181 core_index{core_index}, exclusive_monitor{ 181 core_index{core_index}, exclusive_monitor{
182 dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {} 182 dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {}
183 183
184ARM_Dynarmic::~ARM_Dynarmic() = default; 184ARM_Dynarmic_64::~ARM_Dynarmic_64() = default;
185 185
186void ARM_Dynarmic::SetPC(u64 pc) { 186void ARM_Dynarmic_64::SetPC(u64 pc) {
187 jit->SetPC(pc); 187 jit->SetPC(pc);
188} 188}
189 189
190u64 ARM_Dynarmic::GetPC() const { 190u64 ARM_Dynarmic_64::GetPC() const {
191 return jit->GetPC(); 191 return jit->GetPC();
192} 192}
193 193
194u64 ARM_Dynarmic::GetReg(int index) const { 194u64 ARM_Dynarmic_64::GetReg(int index) const {
195 return jit->GetRegister(index); 195 return jit->GetRegister(index);
196} 196}
197 197
198void ARM_Dynarmic::SetReg(int index, u64 value) { 198void ARM_Dynarmic_64::SetReg(int index, u64 value) {
199 jit->SetRegister(index, value); 199 jit->SetRegister(index, value);
200} 200}
201 201
202u128 ARM_Dynarmic::GetVectorReg(int index) const { 202u128 ARM_Dynarmic_64::GetVectorReg(int index) const {
203 return jit->GetVector(index); 203 return jit->GetVector(index);
204} 204}
205 205
206void ARM_Dynarmic::SetVectorReg(int index, u128 value) { 206void ARM_Dynarmic_64::SetVectorReg(int index, u128 value) {
207 jit->SetVector(index, value); 207 jit->SetVector(index, value);
208} 208}
209 209
210u32 ARM_Dynarmic::GetPSTATE() const { 210u32 ARM_Dynarmic_64::GetPSTATE() const {
211 return jit->GetPstate(); 211 return jit->GetPstate();
212} 212}
213 213
214void ARM_Dynarmic::SetPSTATE(u32 pstate) { 214void ARM_Dynarmic_64::SetPSTATE(u32 pstate) {
215 jit->SetPstate(pstate); 215 jit->SetPstate(pstate);
216} 216}
217 217
218u64 ARM_Dynarmic::GetTlsAddress() const { 218u64 ARM_Dynarmic_64::GetTlsAddress() const {
219 return cb->tpidrro_el0; 219 return cb->tpidrro_el0;
220} 220}
221 221
222void ARM_Dynarmic::SetTlsAddress(VAddr address) { 222void ARM_Dynarmic_64::SetTlsAddress(VAddr address) {
223 cb->tpidrro_el0 = address; 223 cb->tpidrro_el0 = address;
224} 224}
225 225
226u64 ARM_Dynarmic::GetTPIDR_EL0() const { 226u64 ARM_Dynarmic_64::GetTPIDR_EL0() const {
227 return cb->tpidr_el0; 227 return cb->tpidr_el0;
228} 228}
229 229
230void ARM_Dynarmic::SetTPIDR_EL0(u64 value) { 230void ARM_Dynarmic_64::SetTPIDR_EL0(u64 value) {
231 cb->tpidr_el0 = value; 231 cb->tpidr_el0 = value;
232} 232}
233 233
234void ARM_Dynarmic::SaveContext(ThreadContext& ctx) { 234void ARM_Dynarmic_64::SaveContext(ThreadContext64& ctx) {
235 ctx.cpu_registers = jit->GetRegisters(); 235 ctx.cpu_registers = jit->GetRegisters();
236 ctx.sp = jit->GetSP(); 236 ctx.sp = jit->GetSP();
237 ctx.pc = jit->GetPC(); 237 ctx.pc = jit->GetPC();
@@ -242,7 +242,7 @@ void ARM_Dynarmic::SaveContext(ThreadContext& ctx) {
242 ctx.tpidr = cb->tpidr_el0; 242 ctx.tpidr = cb->tpidr_el0;
243} 243}
244 244
245void ARM_Dynarmic::LoadContext(const ThreadContext& ctx) { 245void ARM_Dynarmic_64::LoadContext(const ThreadContext64& ctx) {
246 jit->SetRegisters(ctx.cpu_registers); 246 jit->SetRegisters(ctx.cpu_registers);
247 jit->SetSP(ctx.sp); 247 jit->SetSP(ctx.sp);
248 jit->SetPC(ctx.pc); 248 jit->SetPC(ctx.pc);
@@ -253,25 +253,32 @@ void ARM_Dynarmic::LoadContext(const ThreadContext& ctx) {
253 SetTPIDR_EL0(ctx.tpidr); 253 SetTPIDR_EL0(ctx.tpidr);
254} 254}
255 255
256void ARM_Dynarmic::PrepareReschedule() { 256void ARM_Dynarmic_64::PrepareReschedule() {
257 jit->HaltExecution(); 257 jit->HaltExecution();
258} 258}
259 259
260void ARM_Dynarmic::ClearInstructionCache() { 260void ARM_Dynarmic_64::ClearInstructionCache() {
261 jit->ClearCache(); 261 jit->ClearCache();
262} 262}
263 263
264void ARM_Dynarmic::ClearExclusiveState() { 264void ARM_Dynarmic_64::ClearExclusiveState() {
265 jit->ClearExclusiveState(); 265 jit->ClearExclusiveState();
266} 266}
267 267
268void ARM_Dynarmic::PageTableChanged(Common::PageTable& page_table, 268void ARM_Dynarmic_64::PageTableChanged(Common::PageTable& page_table,
269 std::size_t new_address_space_size_in_bits) { 269 std::size_t new_address_space_size_in_bits) {
270 auto key = std::make_pair(&page_table, new_address_space_size_in_bits);
271 auto iter = jit_cache.find(key);
272 if (iter != jit_cache.end()) {
273 jit = iter->second;
274 return;
275 }
270 jit = MakeJit(page_table, new_address_space_size_in_bits); 276 jit = MakeJit(page_table, new_address_space_size_in_bits);
277 jit_cache.emplace(key, jit);
271} 278}
272 279
273DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(Memory::Memory& memory_, std::size_t core_count) 280DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(Memory::Memory& memory, std::size_t core_count)
274 : monitor(core_count), memory{memory_} {} 281 : monitor(core_count), memory{memory} {}
275 282
276DynarmicExclusiveMonitor::~DynarmicExclusiveMonitor() = default; 283DynarmicExclusiveMonitor::~DynarmicExclusiveMonitor() = default;
277 284
diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic_64.h
index 9cd475cfb..e71240a96 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.h
@@ -5,9 +5,12 @@
5#pragma once 5#pragma once
6 6
7#include <memory> 7#include <memory>
8#include <unordered_map>
9
8#include <dynarmic/A64/a64.h> 10#include <dynarmic/A64/a64.h>
9#include <dynarmic/A64/exclusive_monitor.h> 11#include <dynarmic/A64/exclusive_monitor.h>
10#include "common/common_types.h" 12#include "common/common_types.h"
13#include "common/hash.h"
11#include "core/arm/arm_interface.h" 14#include "core/arm/arm_interface.h"
12#include "core/arm/exclusive_monitor.h" 15#include "core/arm/exclusive_monitor.h"
13#include "core/arm/unicorn/arm_unicorn.h" 16#include "core/arm/unicorn/arm_unicorn.h"
@@ -18,14 +21,14 @@ class Memory;
18 21
19namespace Core { 22namespace Core {
20 23
21class ARM_Dynarmic_Callbacks; 24class DynarmicCallbacks64;
22class DynarmicExclusiveMonitor; 25class DynarmicExclusiveMonitor;
23class System; 26class System;
24 27
25class ARM_Dynarmic final : public ARM_Interface { 28class ARM_Dynarmic_64 final : public ARM_Interface {
26public: 29public:
27 ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index); 30 ARM_Dynarmic_64(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index);
28 ~ARM_Dynarmic() override; 31 ~ARM_Dynarmic_64() override;
29 32
30 void SetPC(u64 pc) override; 33 void SetPC(u64 pc) override;
31 u64 GetPC() const override; 34 u64 GetPC() const override;
@@ -42,8 +45,10 @@ public:
42 void SetTPIDR_EL0(u64 value) override; 45 void SetTPIDR_EL0(u64 value) override;
43 u64 GetTPIDR_EL0() const override; 46 u64 GetTPIDR_EL0() const override;
44 47
45 void SaveContext(ThreadContext& ctx) override; 48 void SaveContext(ThreadContext32& ctx) override {}
46 void LoadContext(const ThreadContext& ctx) override; 49 void SaveContext(ThreadContext64& ctx) override;
50 void LoadContext(const ThreadContext32& ctx) override {}
51 void LoadContext(const ThreadContext64& ctx) override;
47 52
48 void PrepareReschedule() override; 53 void PrepareReschedule() override;
49 void ClearExclusiveState() override; 54 void ClearExclusiveState() override;
@@ -53,12 +58,17 @@ public:
53 std::size_t new_address_space_size_in_bits) override; 58 std::size_t new_address_space_size_in_bits) override;
54 59
55private: 60private:
56 std::unique_ptr<Dynarmic::A64::Jit> MakeJit(Common::PageTable& page_table, 61 std::shared_ptr<Dynarmic::A64::Jit> MakeJit(Common::PageTable& page_table,
57 std::size_t address_space_bits) const; 62 std::size_t address_space_bits) const;
58 63
59 friend class ARM_Dynarmic_Callbacks; 64 using JitCacheKey = std::pair<Common::PageTable*, std::size_t>;
60 std::unique_ptr<ARM_Dynarmic_Callbacks> cb; 65 using JitCacheType =
61 std::unique_ptr<Dynarmic::A64::Jit> jit; 66 std::unordered_map<JitCacheKey, std::shared_ptr<Dynarmic::A64::Jit>, Common::PairHash>;
67
68 friend class DynarmicCallbacks64;
69 std::unique_ptr<DynarmicCallbacks64> cb;
70 JitCacheType jit_cache;
71 std::shared_ptr<Dynarmic::A64::Jit> jit;
62 ARM_Unicorn inner_unicorn; 72 ARM_Unicorn inner_unicorn;
63 73
64 std::size_t core_index; 74 std::size_t core_index;
@@ -67,7 +77,7 @@ private:
67 77
68class DynarmicExclusiveMonitor final : public ExclusiveMonitor { 78class DynarmicExclusiveMonitor final : public ExclusiveMonitor {
69public: 79public:
70 explicit DynarmicExclusiveMonitor(Memory::Memory& memory_, std::size_t core_count); 80 explicit DynarmicExclusiveMonitor(Memory::Memory& memory, std::size_t core_count);
71 ~DynarmicExclusiveMonitor() override; 81 ~DynarmicExclusiveMonitor() override;
72 82
73 void SetExclusive(std::size_t core_index, VAddr addr) override; 83 void SetExclusive(std::size_t core_index, VAddr addr) override;
@@ -80,7 +90,7 @@ public:
80 bool ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) override; 90 bool ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) override;
81 91
82private: 92private:
83 friend class ARM_Dynarmic; 93 friend class ARM_Dynarmic_64;
84 Dynarmic::A64::ExclusiveMonitor monitor; 94 Dynarmic::A64::ExclusiveMonitor monitor;
85 Memory::Memory& memory; 95 Memory::Memory& memory;
86}; 96};
diff --git a/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp b/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp
new file mode 100644
index 000000000..3fdcdebde
--- /dev/null
+++ b/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp
@@ -0,0 +1,80 @@
1// Copyright 2017 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/arm/dynarmic/arm_dynarmic_cp15.h"
6
7using Callback = Dynarmic::A32::Coprocessor::Callback;
8using CallbackOrAccessOneWord = Dynarmic::A32::Coprocessor::CallbackOrAccessOneWord;
9using CallbackOrAccessTwoWords = Dynarmic::A32::Coprocessor::CallbackOrAccessTwoWords;
10
11std::optional<Callback> DynarmicCP15::CompileInternalOperation(bool two, unsigned opc1,
12 CoprocReg CRd, CoprocReg CRn,
13 CoprocReg CRm, unsigned opc2) {
14 return {};
15}
16
17CallbackOrAccessOneWord DynarmicCP15::CompileSendOneWord(bool two, unsigned opc1, CoprocReg CRn,
18 CoprocReg CRm, unsigned opc2) {
19 // TODO(merry): Privileged CP15 registers
20
21 if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C5 && opc2 == 4) {
22 // This is a dummy write, we ignore the value written here.
23 return &CP15[static_cast<std::size_t>(CP15Register::CP15_FLUSH_PREFETCH_BUFFER)];
24 }
25
26 if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C10) {
27 switch (opc2) {
28 case 4:
29 // This is a dummy write, we ignore the value written here.
30 return &CP15[static_cast<std::size_t>(CP15Register::CP15_DATA_SYNC_BARRIER)];
31 case 5:
32 // This is a dummy write, we ignore the value written here.
33 return &CP15[static_cast<std::size_t>(CP15Register::CP15_DATA_MEMORY_BARRIER)];
34 default:
35 return {};
36 }
37 }
38
39 if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0 && opc2 == 2) {
40 return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_UPRW)];
41 }
42
43 return {};
44}
45
46CallbackOrAccessTwoWords DynarmicCP15::CompileSendTwoWords(bool two, unsigned opc, CoprocReg CRm) {
47 return {};
48}
49
50CallbackOrAccessOneWord DynarmicCP15::CompileGetOneWord(bool two, unsigned opc1, CoprocReg CRn,
51 CoprocReg CRm, unsigned opc2) {
52 // TODO(merry): Privileged CP15 registers
53
54 if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0) {
55 switch (opc2) {
56 case 2:
57 return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_UPRW)];
58 case 3:
59 return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)];
60 default:
61 return {};
62 }
63 }
64
65 return {};
66}
67
68CallbackOrAccessTwoWords DynarmicCP15::CompileGetTwoWords(bool two, unsigned opc, CoprocReg CRm) {
69 return {};
70}
71
72std::optional<Callback> DynarmicCP15::CompileLoadWords(bool two, bool long_transfer, CoprocReg CRd,
73 std::optional<u8> option) {
74 return {};
75}
76
77std::optional<Callback> DynarmicCP15::CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd,
78 std::optional<u8> option) {
79 return {};
80}
diff --git a/src/core/arm/dynarmic/arm_dynarmic_cp15.h b/src/core/arm/dynarmic/arm_dynarmic_cp15.h
new file mode 100644
index 000000000..07bcde5f9
--- /dev/null
+++ b/src/core/arm/dynarmic/arm_dynarmic_cp15.h
@@ -0,0 +1,152 @@
1// Copyright 2017 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <optional>
9
10#include <dynarmic/A32/coprocessor.h>
11#include "common/common_types.h"
12
13enum class CP15Register {
14 // c0 - Information registers
15 CP15_MAIN_ID,
16 CP15_CACHE_TYPE,
17 CP15_TCM_STATUS,
18 CP15_TLB_TYPE,
19 CP15_CPU_ID,
20 CP15_PROCESSOR_FEATURE_0,
21 CP15_PROCESSOR_FEATURE_1,
22 CP15_DEBUG_FEATURE_0,
23 CP15_AUXILIARY_FEATURE_0,
24 CP15_MEMORY_MODEL_FEATURE_0,
25 CP15_MEMORY_MODEL_FEATURE_1,
26 CP15_MEMORY_MODEL_FEATURE_2,
27 CP15_MEMORY_MODEL_FEATURE_3,
28 CP15_ISA_FEATURE_0,
29 CP15_ISA_FEATURE_1,
30 CP15_ISA_FEATURE_2,
31 CP15_ISA_FEATURE_3,
32 CP15_ISA_FEATURE_4,
33
34 // c1 - Control registers
35 CP15_CONTROL,
36 CP15_AUXILIARY_CONTROL,
37 CP15_COPROCESSOR_ACCESS_CONTROL,
38
39 // c2 - Translation table registers
40 CP15_TRANSLATION_BASE_TABLE_0,
41 CP15_TRANSLATION_BASE_TABLE_1,
42 CP15_TRANSLATION_BASE_CONTROL,
43 CP15_DOMAIN_ACCESS_CONTROL,
44 CP15_RESERVED,
45
46 // c5 - Fault status registers
47 CP15_FAULT_STATUS,
48 CP15_INSTR_FAULT_STATUS,
49 CP15_COMBINED_DATA_FSR = CP15_FAULT_STATUS,
50 CP15_INST_FSR,
51
52 // c6 - Fault Address registers
53 CP15_FAULT_ADDRESS,
54 CP15_COMBINED_DATA_FAR = CP15_FAULT_ADDRESS,
55 CP15_WFAR,
56 CP15_IFAR,
57
58 // c7 - Cache operation registers
59 CP15_WAIT_FOR_INTERRUPT,
60 CP15_PHYS_ADDRESS,
61 CP15_INVALIDATE_INSTR_CACHE,
62 CP15_INVALIDATE_INSTR_CACHE_USING_MVA,
63 CP15_INVALIDATE_INSTR_CACHE_USING_INDEX,
64 CP15_FLUSH_PREFETCH_BUFFER,
65 CP15_FLUSH_BRANCH_TARGET_CACHE,
66 CP15_FLUSH_BRANCH_TARGET_CACHE_ENTRY,
67 CP15_INVALIDATE_DATA_CACHE,
68 CP15_INVALIDATE_DATA_CACHE_LINE_USING_MVA,
69 CP15_INVALIDATE_DATA_CACHE_LINE_USING_INDEX,
70 CP15_INVALIDATE_DATA_AND_INSTR_CACHE,
71 CP15_CLEAN_DATA_CACHE,
72 CP15_CLEAN_DATA_CACHE_LINE_USING_MVA,
73 CP15_CLEAN_DATA_CACHE_LINE_USING_INDEX,
74 CP15_DATA_SYNC_BARRIER,
75 CP15_DATA_MEMORY_BARRIER,
76 CP15_CLEAN_AND_INVALIDATE_DATA_CACHE,
77 CP15_CLEAN_AND_INVALIDATE_DATA_CACHE_LINE_USING_MVA,
78 CP15_CLEAN_AND_INVALIDATE_DATA_CACHE_LINE_USING_INDEX,
79
80 // c8 - TLB operations
81 CP15_INVALIDATE_ITLB,
82 CP15_INVALIDATE_ITLB_SINGLE_ENTRY,
83 CP15_INVALIDATE_ITLB_ENTRY_ON_ASID_MATCH,
84 CP15_INVALIDATE_ITLB_ENTRY_ON_MVA,
85 CP15_INVALIDATE_DTLB,
86 CP15_INVALIDATE_DTLB_SINGLE_ENTRY,
87 CP15_INVALIDATE_DTLB_ENTRY_ON_ASID_MATCH,
88 CP15_INVALIDATE_DTLB_ENTRY_ON_MVA,
89 CP15_INVALIDATE_UTLB,
90 CP15_INVALIDATE_UTLB_SINGLE_ENTRY,
91 CP15_INVALIDATE_UTLB_ENTRY_ON_ASID_MATCH,
92 CP15_INVALIDATE_UTLB_ENTRY_ON_MVA,
93
94 // c9 - Data cache lockdown register
95 CP15_DATA_CACHE_LOCKDOWN,
96
97 // c10 - TLB/Memory map registers
98 CP15_TLB_LOCKDOWN,
99 CP15_PRIMARY_REGION_REMAP,
100 CP15_NORMAL_REGION_REMAP,
101
102 // c13 - Thread related registers
103 CP15_PID,
104 CP15_CONTEXT_ID,
105 CP15_THREAD_UPRW, // Thread ID register - User/Privileged Read/Write
106 CP15_THREAD_URO, // Thread ID register - User Read Only (Privileged R/W)
107 CP15_THREAD_PRW, // Thread ID register - Privileged R/W only.
108
109 // c15 - Performance and TLB lockdown registers
110 CP15_PERFORMANCE_MONITOR_CONTROL,
111 CP15_CYCLE_COUNTER,
112 CP15_COUNT_0,
113 CP15_COUNT_1,
114 CP15_READ_MAIN_TLB_LOCKDOWN_ENTRY,
115 CP15_WRITE_MAIN_TLB_LOCKDOWN_ENTRY,
116 CP15_MAIN_TLB_LOCKDOWN_VIRT_ADDRESS,
117 CP15_MAIN_TLB_LOCKDOWN_PHYS_ADDRESS,
118 CP15_MAIN_TLB_LOCKDOWN_ATTRIBUTE,
119 CP15_TLB_DEBUG_CONTROL,
120
121 // Skyeye defined
122 CP15_TLB_FAULT_ADDR,
123 CP15_TLB_FAULT_STATUS,
124
125 // Not an actual register.
126 // All registers should be defined above this.
127 CP15_REGISTER_COUNT,
128};
129
130class DynarmicCP15 final : public Dynarmic::A32::Coprocessor {
131public:
132 using CoprocReg = Dynarmic::A32::CoprocReg;
133
134 explicit DynarmicCP15(u32* cp15) : CP15(cp15){};
135
136 std::optional<Callback> CompileInternalOperation(bool two, unsigned opc1, CoprocReg CRd,
137 CoprocReg CRn, CoprocReg CRm,
138 unsigned opc2) override;
139 CallbackOrAccessOneWord CompileSendOneWord(bool two, unsigned opc1, CoprocReg CRn,
140 CoprocReg CRm, unsigned opc2) override;
141 CallbackOrAccessTwoWords CompileSendTwoWords(bool two, unsigned opc, CoprocReg CRm) override;
142 CallbackOrAccessOneWord CompileGetOneWord(bool two, unsigned opc1, CoprocReg CRn, CoprocReg CRm,
143 unsigned opc2) override;
144 CallbackOrAccessTwoWords CompileGetTwoWords(bool two, unsigned opc, CoprocReg CRm) override;
145 std::optional<Callback> CompileLoadWords(bool two, bool long_transfer, CoprocReg CRd,
146 std::optional<u8> option) override;
147 std::optional<Callback> CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd,
148 std::optional<u8> option) override;
149
150private:
151 u32* CP15{};
152};
diff --git a/src/core/arm/exclusive_monitor.cpp b/src/core/arm/exclusive_monitor.cpp
index 94570e520..b32401e0b 100644
--- a/src/core/arm/exclusive_monitor.cpp
+++ b/src/core/arm/exclusive_monitor.cpp
@@ -3,7 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#ifdef ARCHITECTURE_x86_64 5#ifdef ARCHITECTURE_x86_64
6#include "core/arm/dynarmic/arm_dynarmic.h" 6#include "core/arm/dynarmic/arm_dynarmic_64.h"
7#endif 7#endif
8#include "core/arm/exclusive_monitor.h" 8#include "core/arm/exclusive_monitor.h"
9#include "core/memory.h" 9#include "core/memory.h"
diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp
index f99ad5802..8a9800a96 100644
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -53,7 +53,7 @@ static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int si
53 void* user_data) { 53 void* user_data) {
54 auto* const system = static_cast<System*>(user_data); 54 auto* const system = static_cast<System*>(user_data);
55 55
56 ARM_Interface::ThreadContext ctx{}; 56 ARM_Interface::ThreadContext64 ctx{};
57 system->CurrentArmInterface().SaveContext(ctx); 57 system->CurrentArmInterface().SaveContext(ctx);
58 ASSERT_MSG(false, "Attempted to read from unmapped memory: 0x{:X}, pc=0x{:X}, lr=0x{:X}", addr, 58 ASSERT_MSG(false, "Attempted to read from unmapped memory: 0x{:X}, pc=0x{:X}, lr=0x{:X}", addr,
59 ctx.pc, ctx.cpu_registers[30]); 59 ctx.pc, ctx.cpu_registers[30]);
@@ -179,7 +179,7 @@ void ARM_Unicorn::ExecuteInstructions(std::size_t num_instructions) {
179 } 179 }
180 180
181 Kernel::Thread* const thread = system.CurrentScheduler().GetCurrentThread(); 181 Kernel::Thread* const thread = system.CurrentScheduler().GetCurrentThread();
182 SaveContext(thread->GetContext()); 182 SaveContext(thread->GetContext64());
183 if (last_bkpt_hit || GDBStub::IsMemoryBreak() || GDBStub::GetCpuStepFlag()) { 183 if (last_bkpt_hit || GDBStub::IsMemoryBreak() || GDBStub::GetCpuStepFlag()) {
184 last_bkpt_hit = false; 184 last_bkpt_hit = false;
185 GDBStub::Break(); 185 GDBStub::Break();
@@ -188,7 +188,7 @@ void ARM_Unicorn::ExecuteInstructions(std::size_t num_instructions) {
188 } 188 }
189} 189}
190 190
191void ARM_Unicorn::SaveContext(ThreadContext& ctx) { 191void ARM_Unicorn::SaveContext(ThreadContext64& ctx) {
192 int uregs[32]; 192 int uregs[32];
193 void* tregs[32]; 193 void* tregs[32];
194 194
@@ -215,7 +215,7 @@ void ARM_Unicorn::SaveContext(ThreadContext& ctx) {
215 CHECKED(uc_reg_read_batch(uc, uregs, tregs, 32)); 215 CHECKED(uc_reg_read_batch(uc, uregs, tregs, 32));
216} 216}
217 217
218void ARM_Unicorn::LoadContext(const ThreadContext& ctx) { 218void ARM_Unicorn::LoadContext(const ThreadContext64& ctx) {
219 int uregs[32]; 219 int uregs[32];
220 void* tregs[32]; 220 void* tregs[32];
221 221
diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h
index 3c5b155f9..f30d13cb6 100644
--- a/src/core/arm/unicorn/arm_unicorn.h
+++ b/src/core/arm/unicorn/arm_unicorn.h
@@ -30,8 +30,6 @@ public:
30 void SetTlsAddress(VAddr address) override; 30 void SetTlsAddress(VAddr address) override;
31 void SetTPIDR_EL0(u64 value) override; 31 void SetTPIDR_EL0(u64 value) override;
32 u64 GetTPIDR_EL0() const override; 32 u64 GetTPIDR_EL0() const override;
33 void SaveContext(ThreadContext& ctx) override;
34 void LoadContext(const ThreadContext& ctx) override;
35 void PrepareReschedule() override; 33 void PrepareReschedule() override;
36 void ClearExclusiveState() override; 34 void ClearExclusiveState() override;
37 void ExecuteInstructions(std::size_t num_instructions); 35 void ExecuteInstructions(std::size_t num_instructions);
@@ -41,6 +39,11 @@ public:
41 void PageTableChanged(Common::PageTable&, std::size_t) override {} 39 void PageTableChanged(Common::PageTable&, std::size_t) override {}
42 void RecordBreak(GDBStub::BreakpointAddress bkpt); 40 void RecordBreak(GDBStub::BreakpointAddress bkpt);
43 41
42 void SaveContext(ThreadContext32& ctx) override {}
43 void SaveContext(ThreadContext64& ctx) override;
44 void LoadContext(const ThreadContext32& ctx) override {}
45 void LoadContext(const ThreadContext64& ctx) override;
46
44private: 47private:
45 static void InterruptHook(uc_engine* uc, u32 int_no, void* user_data); 48 static void InterruptHook(uc_engine* uc, u32 int_no, void* user_data);
46 49
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 86e314c94..d1bc9340d 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -24,6 +24,7 @@
24#include "core/file_sys/sdmc_factory.h" 24#include "core/file_sys/sdmc_factory.h"
25#include "core/file_sys/vfs_concat.h" 25#include "core/file_sys/vfs_concat.h"
26#include "core/file_sys/vfs_real.h" 26#include "core/file_sys/vfs_real.h"
27#include "core/frontend/scope_acquire_context.h"
27#include "core/gdbstub/gdbstub.h" 28#include "core/gdbstub/gdbstub.h"
28#include "core/hardware_interrupt_manager.h" 29#include "core/hardware_interrupt_manager.h"
29#include "core/hle/kernel/client_port.h" 30#include "core/hle/kernel/client_port.h"
@@ -165,7 +166,7 @@ struct System::Impl {
165 service_manager = std::make_shared<Service::SM::ServiceManager>(); 166 service_manager = std::make_shared<Service::SM::ServiceManager>();
166 167
167 Service::Init(service_manager, system); 168 Service::Init(service_manager, system);
168 GDBStub::Init(); 169 GDBStub::DeferStart();
169 170
170 renderer = VideoCore::CreateRenderer(emu_window, system); 171 renderer = VideoCore::CreateRenderer(emu_window, system);
171 if (!renderer->Init()) { 172 if (!renderer->Init()) {
@@ -173,6 +174,7 @@ struct System::Impl {
173 } 174 }
174 interrupt_manager = std::make_unique<Core::Hardware::InterruptManager>(system); 175 interrupt_manager = std::make_unique<Core::Hardware::InterruptManager>(system);
175 gpu_core = VideoCore::CreateGPU(system); 176 gpu_core = VideoCore::CreateGPU(system);
177 renderer->Rasterizer().SetupDirtyFlags();
176 178
177 is_powered_on = true; 179 is_powered_on = true;
178 exit_lock = false; 180 exit_lock = false;
@@ -184,6 +186,8 @@ struct System::Impl {
184 186
185 ResultStatus Load(System& system, Frontend::EmuWindow& emu_window, 187 ResultStatus Load(System& system, Frontend::EmuWindow& emu_window,
186 const std::string& filepath) { 188 const std::string& filepath) {
189 Core::Frontend::ScopeAcquireContext acquire_context{emu_window};
190
187 app_loader = Loader::GetLoader(GetGameFileFromPath(virtual_filesystem, filepath)); 191 app_loader = Loader::GetLoader(GetGameFileFromPath(virtual_filesystem, filepath));
188 if (!app_loader) { 192 if (!app_loader) {
189 LOG_CRITICAL(Core, "Failed to obtain loader for {}!", filepath); 193 LOG_CRITICAL(Core, "Failed to obtain loader for {}!", filepath);
diff --git a/src/core/core_manager.cpp b/src/core/core_manager.cpp
index 8eacf92dd..b6b797c80 100644
--- a/src/core/core_manager.cpp
+++ b/src/core/core_manager.cpp
@@ -6,9 +6,6 @@
6#include <mutex> 6#include <mutex>
7 7
8#include "common/logging/log.h" 8#include "common/logging/log.h"
9#ifdef ARCHITECTURE_x86_64
10#include "core/arm/dynarmic/arm_dynarmic.h"
11#endif
12#include "core/arm/exclusive_monitor.h" 9#include "core/arm/exclusive_monitor.h"
13#include "core/arm/unicorn/arm_unicorn.h" 10#include "core/arm/unicorn/arm_unicorn.h"
14#include "core/core.h" 11#include "core/core.h"
diff --git a/src/core/frontend/emu_window.h b/src/core/frontend/emu_window.h
index 3376eedc5..5eb87fb63 100644
--- a/src/core/frontend/emu_window.h
+++ b/src/core/frontend/emu_window.h
@@ -26,9 +26,6 @@ public:
26 26
27 /// Releases (dunno if this is the "right" word) the context from the caller thread 27 /// Releases (dunno if this is the "right" word) the context from the caller thread
28 virtual void DoneCurrent() = 0; 28 virtual void DoneCurrent() = 0;
29
30 /// Swap buffers to display the next frame
31 virtual void SwapBuffers() = 0;
32}; 29};
33 30
34/** 31/**
diff --git a/src/core/frontend/framebuffer_layout.cpp b/src/core/frontend/framebuffer_layout.cpp
index 2dc795d56..68a0e0906 100644
--- a/src/core/frontend/framebuffer_layout.cpp
+++ b/src/core/frontend/framebuffer_layout.cpp
@@ -48,8 +48,8 @@ FramebufferLayout FrameLayoutFromResolutionScale(u32 res_scale) {
48 u32 width, height; 48 u32 width, height;
49 49
50 if (Settings::values.use_docked_mode) { 50 if (Settings::values.use_docked_mode) {
51 width = ScreenDocked::WidthDocked * res_scale; 51 width = ScreenDocked::Width * res_scale;
52 height = ScreenDocked::HeightDocked * res_scale; 52 height = ScreenDocked::Height * res_scale;
53 } else { 53 } else {
54 width = ScreenUndocked::Width * res_scale; 54 width = ScreenUndocked::Width * res_scale;
55 height = ScreenUndocked::Height * res_scale; 55 height = ScreenUndocked::Height * res_scale;
diff --git a/src/core/frontend/framebuffer_layout.h b/src/core/frontend/framebuffer_layout.h
index 1d39c1faf..15ecfb13d 100644
--- a/src/core/frontend/framebuffer_layout.h
+++ b/src/core/frontend/framebuffer_layout.h
@@ -8,15 +8,15 @@
8 8
9namespace Layout { 9namespace Layout {
10 10
11enum ScreenUndocked : u32 { 11namespace ScreenUndocked {
12 Width = 1280, 12constexpr u32 Width = 1280;
13 Height = 720, 13constexpr u32 Height = 720;
14}; 14} // namespace ScreenUndocked
15 15
16enum ScreenDocked : u32 { 16namespace ScreenDocked {
17 WidthDocked = 1920, 17constexpr u32 Width = 1920;
18 HeightDocked = 1080, 18constexpr u32 Height = 1080;
19}; 19} // namespace ScreenDocked
20 20
21enum class AspectRatio { 21enum class AspectRatio {
22 Default, 22 Default,
@@ -29,6 +29,7 @@ enum class AspectRatio {
29struct FramebufferLayout { 29struct FramebufferLayout {
30 u32 width{ScreenUndocked::Width}; 30 u32 width{ScreenUndocked::Width};
31 u32 height{ScreenUndocked::Height}; 31 u32 height{ScreenUndocked::Height};
32 bool is_srgb{};
32 33
33 Common::Rectangle<u32> screen; 34 Common::Rectangle<u32> screen;
34 35
diff --git a/src/core/frontend/scope_acquire_context.cpp b/src/core/frontend/scope_acquire_context.cpp
new file mode 100644
index 000000000..878c3157c
--- /dev/null
+++ b/src/core/frontend/scope_acquire_context.cpp
@@ -0,0 +1,18 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/frontend/emu_window.h"
6#include "core/frontend/scope_acquire_context.h"
7
8namespace Core::Frontend {
9
10ScopeAcquireContext::ScopeAcquireContext(Core::Frontend::GraphicsContext& context)
11 : context{context} {
12 context.MakeCurrent();
13}
14ScopeAcquireContext::~ScopeAcquireContext() {
15 context.DoneCurrent();
16}
17
18} // namespace Core::Frontend
diff --git a/src/core/frontend/scope_acquire_window_context.h b/src/core/frontend/scope_acquire_context.h
index 2d9f6e825..7a65c0623 100644
--- a/src/core/frontend/scope_acquire_window_context.h
+++ b/src/core/frontend/scope_acquire_context.h
@@ -8,16 +8,16 @@
8 8
9namespace Core::Frontend { 9namespace Core::Frontend {
10 10
11class EmuWindow; 11class GraphicsContext;
12 12
13/// Helper class to acquire/release window context within a given scope 13/// Helper class to acquire/release window context within a given scope
14class ScopeAcquireWindowContext : NonCopyable { 14class ScopeAcquireContext : NonCopyable {
15public: 15public:
16 explicit ScopeAcquireWindowContext(Core::Frontend::EmuWindow& window); 16 explicit ScopeAcquireContext(Core::Frontend::GraphicsContext& context);
17 ~ScopeAcquireWindowContext(); 17 ~ScopeAcquireContext();
18 18
19private: 19private:
20 Core::Frontend::EmuWindow& emu_window; 20 Core::Frontend::GraphicsContext& context;
21}; 21};
22 22
23} // namespace Core::Frontend 23} // namespace Core::Frontend
diff --git a/src/core/frontend/scope_acquire_window_context.cpp b/src/core/frontend/scope_acquire_window_context.cpp
deleted file mode 100644
index 3663dad17..000000000
--- a/src/core/frontend/scope_acquire_window_context.cpp
+++ /dev/null
@@ -1,18 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/frontend/emu_window.h"
6#include "core/frontend/scope_acquire_window_context.h"
7
8namespace Core::Frontend {
9
10ScopeAcquireWindowContext::ScopeAcquireWindowContext(Core::Frontend::EmuWindow& emu_window_)
11 : emu_window{emu_window_} {
12 emu_window.MakeCurrent();
13}
14ScopeAcquireWindowContext::~ScopeAcquireWindowContext() {
15 emu_window.DoneCurrent();
16}
17
18} // namespace Core::Frontend
diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp
index 67e95999d..6d15aeed9 100644
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -141,6 +141,7 @@ constexpr char target_xml[] =
141)"; 141)";
142 142
143int gdbserver_socket = -1; 143int gdbserver_socket = -1;
144bool defer_start = false;
144 145
145u8 command_buffer[GDB_BUFFER_SIZE]; 146u8 command_buffer[GDB_BUFFER_SIZE];
146u32 command_length; 147u32 command_length;
@@ -217,7 +218,7 @@ static u64 RegRead(std::size_t id, Kernel::Thread* thread = nullptr) {
217 return 0; 218 return 0;
218 } 219 }
219 220
220 const auto& thread_context = thread->GetContext(); 221 const auto& thread_context = thread->GetContext64();
221 222
222 if (id < SP_REGISTER) { 223 if (id < SP_REGISTER) {
223 return thread_context.cpu_registers[id]; 224 return thread_context.cpu_registers[id];
@@ -239,7 +240,7 @@ static void RegWrite(std::size_t id, u64 val, Kernel::Thread* thread = nullptr)
239 return; 240 return;
240 } 241 }
241 242
242 auto& thread_context = thread->GetContext(); 243 auto& thread_context = thread->GetContext64();
243 244
244 if (id < SP_REGISTER) { 245 if (id < SP_REGISTER) {
245 thread_context.cpu_registers[id] = val; 246 thread_context.cpu_registers[id] = val;
@@ -259,7 +260,7 @@ static u128 FpuRead(std::size_t id, Kernel::Thread* thread = nullptr) {
259 return u128{0}; 260 return u128{0};
260 } 261 }
261 262
262 auto& thread_context = thread->GetContext(); 263 auto& thread_context = thread->GetContext64();
263 264
264 if (id >= UC_ARM64_REG_Q0 && id < FPCR_REGISTER) { 265 if (id >= UC_ARM64_REG_Q0 && id < FPCR_REGISTER) {
265 return thread_context.vector_registers[id - UC_ARM64_REG_Q0]; 266 return thread_context.vector_registers[id - UC_ARM64_REG_Q0];
@@ -275,7 +276,7 @@ static void FpuWrite(std::size_t id, u128 val, Kernel::Thread* thread = nullptr)
275 return; 276 return;
276 } 277 }
277 278
278 auto& thread_context = thread->GetContext(); 279 auto& thread_context = thread->GetContext64();
279 280
280 if (id >= UC_ARM64_REG_Q0 && id < FPCR_REGISTER) { 281 if (id >= UC_ARM64_REG_Q0 && id < FPCR_REGISTER) {
281 thread_context.vector_registers[id - UC_ARM64_REG_Q0] = val; 282 thread_context.vector_registers[id - UC_ARM64_REG_Q0] = val;
@@ -916,7 +917,7 @@ static void WriteRegister() {
916 // Update ARM context, skipping scheduler - no running threads at this point 917 // Update ARM context, skipping scheduler - no running threads at this point
917 Core::System::GetInstance() 918 Core::System::GetInstance()
918 .ArmInterface(current_core) 919 .ArmInterface(current_core)
919 .LoadContext(current_thread->GetContext()); 920 .LoadContext(current_thread->GetContext64());
920 921
921 SendReply("OK"); 922 SendReply("OK");
922} 923}
@@ -947,7 +948,7 @@ static void WriteRegisters() {
947 // Update ARM context, skipping scheduler - no running threads at this point 948 // Update ARM context, skipping scheduler - no running threads at this point
948 Core::System::GetInstance() 949 Core::System::GetInstance()
949 .ArmInterface(current_core) 950 .ArmInterface(current_core)
950 .LoadContext(current_thread->GetContext()); 951 .LoadContext(current_thread->GetContext64());
951 952
952 SendReply("OK"); 953 SendReply("OK");
953} 954}
@@ -1019,7 +1020,7 @@ static void Step() {
1019 // Update ARM context, skipping scheduler - no running threads at this point 1020 // Update ARM context, skipping scheduler - no running threads at this point
1020 Core::System::GetInstance() 1021 Core::System::GetInstance()
1021 .ArmInterface(current_core) 1022 .ArmInterface(current_core)
1022 .LoadContext(current_thread->GetContext()); 1023 .LoadContext(current_thread->GetContext64());
1023 } 1024 }
1024 step_loop = true; 1025 step_loop = true;
1025 halt_loop = true; 1026 halt_loop = true;
@@ -1166,6 +1167,9 @@ static void RemoveBreakpoint() {
1166 1167
1167void HandlePacket() { 1168void HandlePacket() {
1168 if (!IsConnected()) { 1169 if (!IsConnected()) {
1170 if (defer_start) {
1171 ToggleServer(true);
1172 }
1169 return; 1173 return;
1170 } 1174 }
1171 1175
@@ -1256,6 +1260,10 @@ void ToggleServer(bool status) {
1256 } 1260 }
1257} 1261}
1258 1262
1263void DeferStart() {
1264 defer_start = true;
1265}
1266
1259static void Init(u16 port) { 1267static void Init(u16 port) {
1260 if (!server_enabled) { 1268 if (!server_enabled) {
1261 // Set the halt loop to false in case the user enabled the gdbstub mid-execution. 1269 // Set the halt loop to false in case the user enabled the gdbstub mid-execution.
@@ -1341,6 +1349,7 @@ void Shutdown() {
1341 if (!server_enabled) { 1349 if (!server_enabled) {
1342 return; 1350 return;
1343 } 1351 }
1352 defer_start = false;
1344 1353
1345 LOG_INFO(Debug_GDBStub, "Stopping GDB ..."); 1354 LOG_INFO(Debug_GDBStub, "Stopping GDB ...");
1346 if (gdbserver_socket != -1) { 1355 if (gdbserver_socket != -1) {
diff --git a/src/core/gdbstub/gdbstub.h b/src/core/gdbstub/gdbstub.h
index 5a36524b2..8fe3c320b 100644
--- a/src/core/gdbstub/gdbstub.h
+++ b/src/core/gdbstub/gdbstub.h
@@ -43,6 +43,13 @@ void ToggleServer(bool status);
43/// Start the gdbstub server. 43/// Start the gdbstub server.
44void Init(); 44void Init();
45 45
46/**
47 * Defer initialization of the gdbstub to the first packet processing functions.
48 * This avoids a case where the gdbstub thread is frozen after initialization
49 * and fails to respond in time to packets.
50 */
51void DeferStart();
52
46/// Stop gdbstub server. 53/// Stop gdbstub server.
47void Shutdown(); 54void Shutdown();
48 55
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 9232f4d7e..e47f1deed 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -186,6 +186,10 @@ struct KernelCore::Impl {
186 return; 186 return;
187 } 187 }
188 188
189 for (auto& core : cores) {
190 core.SetIs64Bit(process->Is64BitProcess());
191 }
192
189 system.Memory().SetCurrentPageTable(*process); 193 system.Memory().SetCurrentPageTable(*process);
190 } 194 }
191 195
diff --git a/src/core/hle/kernel/physical_core.cpp b/src/core/hle/kernel/physical_core.cpp
index 9303dd273..aa2787467 100644
--- a/src/core/hle/kernel/physical_core.cpp
+++ b/src/core/hle/kernel/physical_core.cpp
@@ -5,7 +5,8 @@
5#include "common/logging/log.h" 5#include "common/logging/log.h"
6#include "core/arm/arm_interface.h" 6#include "core/arm/arm_interface.h"
7#ifdef ARCHITECTURE_x86_64 7#ifdef ARCHITECTURE_x86_64
8#include "core/arm/dynarmic/arm_dynarmic.h" 8#include "core/arm/dynarmic/arm_dynarmic_32.h"
9#include "core/arm/dynarmic/arm_dynarmic_64.h"
9#endif 10#endif
10#include "core/arm/exclusive_monitor.h" 11#include "core/arm/exclusive_monitor.h"
11#include "core/arm/unicorn/arm_unicorn.h" 12#include "core/arm/unicorn/arm_unicorn.h"
@@ -20,13 +21,17 @@ PhysicalCore::PhysicalCore(Core::System& system, std::size_t id,
20 Core::ExclusiveMonitor& exclusive_monitor) 21 Core::ExclusiveMonitor& exclusive_monitor)
21 : core_index{id} { 22 : core_index{id} {
22#ifdef ARCHITECTURE_x86_64 23#ifdef ARCHITECTURE_x86_64
23 arm_interface = std::make_unique<Core::ARM_Dynarmic>(system, exclusive_monitor, core_index); 24 arm_interface_32 =
25 std::make_unique<Core::ARM_Dynarmic_32>(system, exclusive_monitor, core_index);
26 arm_interface_64 =
27 std::make_unique<Core::ARM_Dynarmic_64>(system, exclusive_monitor, core_index);
28
24#else 29#else
25 arm_interface = std::make_shared<Core::ARM_Unicorn>(system); 30 arm_interface = std::make_shared<Core::ARM_Unicorn>(system);
26 LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available"); 31 LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
27#endif 32#endif
28 33
29 scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface, core_index); 34 scheduler = std::make_unique<Kernel::Scheduler>(system, core_index);
30} 35}
31 36
32PhysicalCore::~PhysicalCore() = default; 37PhysicalCore::~PhysicalCore() = default;
@@ -48,4 +53,12 @@ void PhysicalCore::Shutdown() {
48 scheduler->Shutdown(); 53 scheduler->Shutdown();
49} 54}
50 55
56void PhysicalCore::SetIs64Bit(bool is_64_bit) {
57 if (is_64_bit) {
58 arm_interface = arm_interface_64.get();
59 } else {
60 arm_interface = arm_interface_32.get();
61 }
62}
63
51} // namespace Kernel 64} // namespace Kernel
diff --git a/src/core/hle/kernel/physical_core.h b/src/core/hle/kernel/physical_core.h
index 4c32c0f1b..3269166be 100644
--- a/src/core/hle/kernel/physical_core.h
+++ b/src/core/hle/kernel/physical_core.h
@@ -68,10 +68,14 @@ public:
68 return *scheduler; 68 return *scheduler;
69 } 69 }
70 70
71 void SetIs64Bit(bool is_64_bit);
72
71private: 73private:
72 std::size_t core_index; 74 std::size_t core_index;
73 std::unique_ptr<Core::ARM_Interface> arm_interface; 75 std::unique_ptr<Core::ARM_Interface> arm_interface_32;
76 std::unique_ptr<Core::ARM_Interface> arm_interface_64;
74 std::unique_ptr<Kernel::Scheduler> scheduler; 77 std::unique_ptr<Kernel::Scheduler> scheduler;
78 Core::ARM_Interface* arm_interface{};
75}; 79};
76 80
77} // namespace Kernel 81} // namespace Kernel
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index 2fcb7326c..edc414d69 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -42,7 +42,8 @@ void SetupMainThread(Process& owner_process, KernelCore& kernel, u32 priority) {
42 42
43 // Register 1 must be a handle to the main thread 43 // Register 1 must be a handle to the main thread
44 const Handle thread_handle = owner_process.GetHandleTable().Create(thread).Unwrap(); 44 const Handle thread_handle = owner_process.GetHandleTable().Create(thread).Unwrap();
45 thread->GetContext().cpu_registers[1] = thread_handle; 45 thread->GetContext32().cpu_registers[1] = thread_handle;
46 thread->GetContext64().cpu_registers[1] = thread_handle;
46 47
47 // Threads by default are dormant, wake up the main thread so it runs when the scheduler fires 48 // Threads by default are dormant, wake up the main thread so it runs when the scheduler fires
48 thread->ResumeFromWait(); 49 thread->ResumeFromWait();
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index c65f82fb7..1140c72a3 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -383,8 +383,8 @@ void GlobalScheduler::Unlock() {
383 // TODO(Blinkhawk): Setup the interrupts and change context on current core. 383 // TODO(Blinkhawk): Setup the interrupts and change context on current core.
384} 384}
385 385
386Scheduler::Scheduler(Core::System& system, Core::ARM_Interface& cpu_core, std::size_t core_id) 386Scheduler::Scheduler(Core::System& system, std::size_t core_id)
387 : system(system), cpu_core(cpu_core), core_id(core_id) {} 387 : system{system}, core_id{core_id} {}
388 388
389Scheduler::~Scheduler() = default; 389Scheduler::~Scheduler() = default;
390 390
@@ -422,9 +422,10 @@ void Scheduler::UnloadThread() {
422 422
423 // Save context for previous thread 423 // Save context for previous thread
424 if (previous_thread) { 424 if (previous_thread) {
425 cpu_core.SaveContext(previous_thread->GetContext()); 425 system.ArmInterface(core_id).SaveContext(previous_thread->GetContext32());
426 system.ArmInterface(core_id).SaveContext(previous_thread->GetContext64());
426 // Save the TPIDR_EL0 system register in case it was modified. 427 // Save the TPIDR_EL0 system register in case it was modified.
427 previous_thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0()); 428 previous_thread->SetTPIDR_EL0(system.ArmInterface(core_id).GetTPIDR_EL0());
428 429
429 if (previous_thread->GetStatus() == ThreadStatus::Running) { 430 if (previous_thread->GetStatus() == ThreadStatus::Running) {
430 // This is only the case when a reschedule is triggered without the current thread 431 // This is only the case when a reschedule is triggered without the current thread
@@ -451,9 +452,10 @@ void Scheduler::SwitchContext() {
451 452
452 // Save context for previous thread 453 // Save context for previous thread
453 if (previous_thread) { 454 if (previous_thread) {
454 cpu_core.SaveContext(previous_thread->GetContext()); 455 system.ArmInterface(core_id).SaveContext(previous_thread->GetContext32());
456 system.ArmInterface(core_id).SaveContext(previous_thread->GetContext64());
455 // Save the TPIDR_EL0 system register in case it was modified. 457 // Save the TPIDR_EL0 system register in case it was modified.
456 previous_thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0()); 458 previous_thread->SetTPIDR_EL0(system.ArmInterface(core_id).GetTPIDR_EL0());
457 459
458 if (previous_thread->GetStatus() == ThreadStatus::Running) { 460 if (previous_thread->GetStatus() == ThreadStatus::Running) {
459 // This is only the case when a reschedule is triggered without the current thread 461 // This is only the case when a reschedule is triggered without the current thread
@@ -481,9 +483,10 @@ void Scheduler::SwitchContext() {
481 system.Kernel().MakeCurrentProcess(thread_owner_process); 483 system.Kernel().MakeCurrentProcess(thread_owner_process);
482 } 484 }
483 485
484 cpu_core.LoadContext(new_thread->GetContext()); 486 system.ArmInterface(core_id).LoadContext(new_thread->GetContext32());
485 cpu_core.SetTlsAddress(new_thread->GetTLSAddress()); 487 system.ArmInterface(core_id).LoadContext(new_thread->GetContext64());
486 cpu_core.SetTPIDR_EL0(new_thread->GetTPIDR_EL0()); 488 system.ArmInterface(core_id).SetTlsAddress(new_thread->GetTLSAddress());
489 system.ArmInterface(core_id).SetTPIDR_EL0(new_thread->GetTPIDR_EL0());
487 } else { 490 } else {
488 current_thread = nullptr; 491 current_thread = nullptr;
489 // Note: We do not reset the current process and current page table when idling because 492 // Note: We do not reset the current process and current page table when idling because
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index 1c93a838c..07df33f9c 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -181,7 +181,7 @@ private:
181 181
182class Scheduler final { 182class Scheduler final {
183public: 183public:
184 explicit Scheduler(Core::System& system, Core::ARM_Interface& cpu_core, std::size_t core_id); 184 explicit Scheduler(Core::System& system, std::size_t core_id);
185 ~Scheduler(); 185 ~Scheduler();
186 186
187 /// Returns whether there are any threads that are ready to run. 187 /// Returns whether there are any threads that are ready to run.
@@ -235,7 +235,6 @@ private:
235 std::shared_ptr<Thread> selected_thread = nullptr; 235 std::shared_ptr<Thread> selected_thread = nullptr;
236 236
237 Core::System& system; 237 Core::System& system;
238 Core::ARM_Interface& cpu_core;
239 u64 last_context_switch_time = 0; 238 u64 last_context_switch_time = 0;
240 u64 idle_selection_count = 0; 239 u64 idle_selection_count = 0;
241 const std::size_t core_id; 240 const std::size_t core_id;
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index fd91779a3..4ffc113c2 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -187,6 +187,13 @@ static ResultCode SetHeapSize(Core::System& system, VAddr* heap_addr, u64 heap_s
187 return RESULT_SUCCESS; 187 return RESULT_SUCCESS;
188} 188}
189 189
190static ResultCode SetHeapSize32(Core::System& system, u32* heap_addr, u32 heap_size) {
191 VAddr temp_heap_addr{};
192 const ResultCode result{SetHeapSize(system, &temp_heap_addr, heap_size)};
193 *heap_addr = static_cast<u32>(temp_heap_addr);
194 return result;
195}
196
190static ResultCode SetMemoryPermission(Core::System& system, VAddr addr, u64 size, u32 prot) { 197static ResultCode SetMemoryPermission(Core::System& system, VAddr addr, u64 size, u32 prot) {
191 LOG_TRACE(Kernel_SVC, "called, addr=0x{:X}, size=0x{:X}, prot=0x{:X}", addr, size, prot); 198 LOG_TRACE(Kernel_SVC, "called, addr=0x{:X}, size=0x{:X}, prot=0x{:X}", addr, size, prot);
192 199
@@ -371,6 +378,12 @@ static ResultCode ConnectToNamedPort(Core::System& system, Handle* out_handle,
371 return RESULT_SUCCESS; 378 return RESULT_SUCCESS;
372} 379}
373 380
381static ResultCode ConnectToNamedPort32(Core::System& system, Handle* out_handle,
382 u32 port_name_address) {
383
384 return ConnectToNamedPort(system, out_handle, port_name_address);
385}
386
374/// Makes a blocking IPC call to an OS service. 387/// Makes a blocking IPC call to an OS service.
375static ResultCode SendSyncRequest(Core::System& system, Handle handle) { 388static ResultCode SendSyncRequest(Core::System& system, Handle handle) {
376 const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); 389 const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable();
@@ -390,6 +403,10 @@ static ResultCode SendSyncRequest(Core::System& system, Handle handle) {
390 return session->SendSyncRequest(SharedFrom(thread), system.Memory()); 403 return session->SendSyncRequest(SharedFrom(thread), system.Memory());
391} 404}
392 405
406static ResultCode SendSyncRequest32(Core::System& system, Handle handle) {
407 return SendSyncRequest(system, handle);
408}
409
393/// Get the ID for the specified thread. 410/// Get the ID for the specified thread.
394static ResultCode GetThreadId(Core::System& system, u64* thread_id, Handle thread_handle) { 411static ResultCode GetThreadId(Core::System& system, u64* thread_id, Handle thread_handle) {
395 LOG_TRACE(Kernel_SVC, "called thread=0x{:08X}", thread_handle); 412 LOG_TRACE(Kernel_SVC, "called thread=0x{:08X}", thread_handle);
@@ -405,6 +422,17 @@ static ResultCode GetThreadId(Core::System& system, u64* thread_id, Handle threa
405 return RESULT_SUCCESS; 422 return RESULT_SUCCESS;
406} 423}
407 424
425static ResultCode GetThreadId32(Core::System& system, u32* thread_id_low, u32* thread_id_high,
426 Handle thread_handle) {
427 u64 thread_id{};
428 const ResultCode result{GetThreadId(system, &thread_id, thread_handle)};
429
430 *thread_id_low = static_cast<u32>(thread_id >> 32);
431 *thread_id_high = static_cast<u32>(thread_id & std::numeric_limits<u32>::max());
432
433 return result;
434}
435
408/// Gets the ID of the specified process or a specified thread's owning process. 436/// Gets the ID of the specified process or a specified thread's owning process.
409static ResultCode GetProcessId(Core::System& system, u64* process_id, Handle handle) { 437static ResultCode GetProcessId(Core::System& system, u64* process_id, Handle handle) {
410 LOG_DEBUG(Kernel_SVC, "called handle=0x{:08X}", handle); 438 LOG_DEBUG(Kernel_SVC, "called handle=0x{:08X}", handle);
@@ -479,6 +507,12 @@ static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr
479 return result; 507 return result;
480} 508}
481 509
510static ResultCode WaitSynchronization32(Core::System& system, u32 timeout_low, u32 handles_address,
511 s32 handle_count, u32 timeout_high, Handle* index) {
512 const s64 nano_seconds{(static_cast<s64>(timeout_high) << 32) | static_cast<s64>(timeout_low)};
513 return WaitSynchronization(system, index, handles_address, handle_count, nano_seconds);
514}
515
482/// Resumes a thread waiting on WaitSynchronization 516/// Resumes a thread waiting on WaitSynchronization
483static ResultCode CancelSynchronization(Core::System& system, Handle thread_handle) { 517static ResultCode CancelSynchronization(Core::System& system, Handle thread_handle) {
484 LOG_TRACE(Kernel_SVC, "called thread=0x{:X}", thread_handle); 518 LOG_TRACE(Kernel_SVC, "called thread=0x{:X}", thread_handle);
@@ -917,6 +951,18 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
917 } 951 }
918} 952}
919 953
954static ResultCode GetInfo32(Core::System& system, u32* result_low, u32* result_high, u32 sub_id_low,
955 u32 info_id, u32 handle, u32 sub_id_high) {
956 const u64 sub_id{static_cast<u64>(sub_id_low | (static_cast<u64>(sub_id_high) << 32))};
957 u64 res_value{};
958
959 const ResultCode result{GetInfo(system, &res_value, info_id, handle, sub_id)};
960 *result_high = static_cast<u32>(res_value >> 32);
961 *result_low = static_cast<u32>(res_value & std::numeric_limits<u32>::max());
962
963 return result;
964}
965
920/// Maps memory at a desired address 966/// Maps memory at a desired address
921static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size) { 967static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size) {
922 LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size); 968 LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size);
@@ -1058,7 +1104,7 @@ static ResultCode GetThreadContext(Core::System& system, VAddr thread_context, H
1058 return ERR_BUSY; 1104 return ERR_BUSY;
1059 } 1105 }
1060 1106
1061 Core::ARM_Interface::ThreadContext ctx = thread->GetContext(); 1107 Core::ARM_Interface::ThreadContext64 ctx = thread->GetContext64();
1062 // Mask away mode bits, interrupt bits, IL bit, and other reserved bits. 1108 // Mask away mode bits, interrupt bits, IL bit, and other reserved bits.
1063 ctx.pstate &= 0xFF0FFE20; 1109 ctx.pstate &= 0xFF0FFE20;
1064 1110
@@ -1088,6 +1134,10 @@ static ResultCode GetThreadPriority(Core::System& system, u32* priority, Handle
1088 return RESULT_SUCCESS; 1134 return RESULT_SUCCESS;
1089} 1135}
1090 1136
1137static ResultCode GetThreadPriority32(Core::System& system, u32* priority, Handle handle) {
1138 return GetThreadPriority(system, priority, handle);
1139}
1140
1091/// Sets the priority for the specified thread 1141/// Sets the priority for the specified thread
1092static ResultCode SetThreadPriority(Core::System& system, Handle handle, u32 priority) { 1142static ResultCode SetThreadPriority(Core::System& system, Handle handle, u32 priority) {
1093 LOG_TRACE(Kernel_SVC, "called"); 1143 LOG_TRACE(Kernel_SVC, "called");
@@ -1259,6 +1309,11 @@ static ResultCode QueryMemory(Core::System& system, VAddr memory_info_address,
1259 query_address); 1309 query_address);
1260} 1310}
1261 1311
1312static ResultCode QueryMemory32(Core::System& system, u32 memory_info_address,
1313 u32 page_info_address, u32 query_address) {
1314 return QueryMemory(system, memory_info_address, page_info_address, query_address);
1315}
1316
1262static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_handle, u64 dst_address, 1317static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_handle, u64 dst_address,
1263 u64 src_address, u64 size) { 1318 u64 src_address, u64 size) {
1264 LOG_DEBUG(Kernel_SVC, 1319 LOG_DEBUG(Kernel_SVC,
@@ -1675,6 +1730,10 @@ static void SignalProcessWideKey(Core::System& system, VAddr condition_variable_
1675 } 1730 }
1676} 1731}
1677 1732
1733static void SignalProcessWideKey32(Core::System& system, u32 condition_variable_addr, s32 target) {
1734 SignalProcessWideKey(system, condition_variable_addr, target);
1735}
1736
1678// Wait for an address (via Address Arbiter) 1737// Wait for an address (via Address Arbiter)
1679static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, s32 value, 1738static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, s32 value,
1680 s64 timeout) { 1739 s64 timeout) {
@@ -1760,6 +1819,10 @@ static ResultCode CloseHandle(Core::System& system, Handle handle) {
1760 return handle_table.Close(handle); 1819 return handle_table.Close(handle);
1761} 1820}
1762 1821
1822static ResultCode CloseHandle32(Core::System& system, Handle handle) {
1823 return CloseHandle(system, handle);
1824}
1825
1763/// Clears the signaled state of an event or process. 1826/// Clears the signaled state of an event or process.
1764static ResultCode ResetSignal(Core::System& system, Handle handle) { 1827static ResultCode ResetSignal(Core::System& system, Handle handle) {
1765 LOG_DEBUG(Kernel_SVC, "called handle 0x{:08X}", handle); 1828 LOG_DEBUG(Kernel_SVC, "called handle 0x{:08X}", handle);
@@ -2317,69 +2380,196 @@ struct FunctionDef {
2317}; 2380};
2318} // namespace 2381} // namespace
2319 2382
2320static const FunctionDef SVC_Table[] = { 2383static const FunctionDef SVC_Table_32[] = {
2321 {0x00, nullptr, "Unknown"}, 2384 {0x00, nullptr, "Unknown"},
2322 {0x01, SvcWrap<SetHeapSize>, "SetHeapSize"}, 2385 {0x01, SvcWrap32<SetHeapSize32>, "SetHeapSize32"},
2323 {0x02, SvcWrap<SetMemoryPermission>, "SetMemoryPermission"}, 2386 {0x02, nullptr, "Unknown"},
2324 {0x03, SvcWrap<SetMemoryAttribute>, "SetMemoryAttribute"}, 2387 {0x03, nullptr, "SetMemoryAttribute32"},
2325 {0x04, SvcWrap<MapMemory>, "MapMemory"}, 2388 {0x04, nullptr, "MapMemory32"},
2326 {0x05, SvcWrap<UnmapMemory>, "UnmapMemory"}, 2389 {0x05, nullptr, "UnmapMemory32"},
2327 {0x06, SvcWrap<QueryMemory>, "QueryMemory"}, 2390 {0x06, SvcWrap32<QueryMemory32>, "QueryMemory32"},
2328 {0x07, SvcWrap<ExitProcess>, "ExitProcess"}, 2391 {0x07, nullptr, "ExitProcess32"},
2329 {0x08, SvcWrap<CreateThread>, "CreateThread"}, 2392 {0x08, nullptr, "CreateThread32"},
2330 {0x09, SvcWrap<StartThread>, "StartThread"}, 2393 {0x09, nullptr, "StartThread32"},
2331 {0x0A, SvcWrap<ExitThread>, "ExitThread"}, 2394 {0x0a, nullptr, "ExitThread32"},
2332 {0x0B, SvcWrap<SleepThread>, "SleepThread"}, 2395 {0x0b, nullptr, "SleepThread32"},
2333 {0x0C, SvcWrap<GetThreadPriority>, "GetThreadPriority"}, 2396 {0x0c, SvcWrap32<GetThreadPriority32>, "GetThreadPriority32"},
2334 {0x0D, SvcWrap<SetThreadPriority>, "SetThreadPriority"}, 2397 {0x0d, nullptr, "SetThreadPriority32"},
2335 {0x0E, SvcWrap<GetThreadCoreMask>, "GetThreadCoreMask"}, 2398 {0x0e, nullptr, "GetThreadCoreMask32"},
2336 {0x0F, SvcWrap<SetThreadCoreMask>, "SetThreadCoreMask"}, 2399 {0x0f, nullptr, "SetThreadCoreMask32"},
2337 {0x10, SvcWrap<GetCurrentProcessorNumber>, "GetCurrentProcessorNumber"}, 2400 {0x10, nullptr, "GetCurrentProcessorNumber32"},
2338 {0x11, SvcWrap<SignalEvent>, "SignalEvent"}, 2401 {0x11, nullptr, "SignalEvent32"},
2339 {0x12, SvcWrap<ClearEvent>, "ClearEvent"}, 2402 {0x12, nullptr, "ClearEvent32"},
2340 {0x13, SvcWrap<MapSharedMemory>, "MapSharedMemory"}, 2403 {0x13, nullptr, "MapSharedMemory32"},
2341 {0x14, SvcWrap<UnmapSharedMemory>, "UnmapSharedMemory"}, 2404 {0x14, nullptr, "UnmapSharedMemory32"},
2342 {0x15, SvcWrap<CreateTransferMemory>, "CreateTransferMemory"}, 2405 {0x15, nullptr, "CreateTransferMemory32"},
2343 {0x16, SvcWrap<CloseHandle>, "CloseHandle"}, 2406 {0x16, SvcWrap32<CloseHandle32>, "CloseHandle32"},
2344 {0x17, SvcWrap<ResetSignal>, "ResetSignal"}, 2407 {0x17, nullptr, "ResetSignal32"},
2345 {0x18, SvcWrap<WaitSynchronization>, "WaitSynchronization"}, 2408 {0x18, SvcWrap32<WaitSynchronization32>, "WaitSynchronization32"},
2346 {0x19, SvcWrap<CancelSynchronization>, "CancelSynchronization"}, 2409 {0x19, nullptr, "CancelSynchronization32"},
2347 {0x1A, SvcWrap<ArbitrateLock>, "ArbitrateLock"}, 2410 {0x1a, nullptr, "ArbitrateLock32"},
2348 {0x1B, SvcWrap<ArbitrateUnlock>, "ArbitrateUnlock"}, 2411 {0x1b, nullptr, "ArbitrateUnlock32"},
2349 {0x1C, SvcWrap<WaitProcessWideKeyAtomic>, "WaitProcessWideKeyAtomic"}, 2412 {0x1c, nullptr, "WaitProcessWideKeyAtomic32"},
2350 {0x1D, SvcWrap<SignalProcessWideKey>, "SignalProcessWideKey"}, 2413 {0x1d, SvcWrap32<SignalProcessWideKey32>, "SignalProcessWideKey32"},
2351 {0x1E, SvcWrap<GetSystemTick>, "GetSystemTick"}, 2414 {0x1e, nullptr, "GetSystemTick32"},
2352 {0x1F, SvcWrap<ConnectToNamedPort>, "ConnectToNamedPort"}, 2415 {0x1f, SvcWrap32<ConnectToNamedPort32>, "ConnectToNamedPort32"},
2416 {0x20, nullptr, "Unknown"},
2417 {0x21, SvcWrap32<SendSyncRequest32>, "SendSyncRequest32"},
2418 {0x22, nullptr, "SendSyncRequestWithUserBuffer32"},
2419 {0x23, nullptr, "Unknown"},
2420 {0x24, nullptr, "GetProcessId32"},
2421 {0x25, SvcWrap32<GetThreadId32>, "GetThreadId32"},
2422 {0x26, nullptr, "Break32"},
2423 {0x27, nullptr, "OutputDebugString32"},
2424 {0x28, nullptr, "Unknown"},
2425 {0x29, SvcWrap32<GetInfo32>, "GetInfo32"},
2426 {0x2a, nullptr, "Unknown"},
2427 {0x2b, nullptr, "Unknown"},
2428 {0x2c, nullptr, "MapPhysicalMemory32"},
2429 {0x2d, nullptr, "UnmapPhysicalMemory32"},
2430 {0x2e, nullptr, "Unknown"},
2431 {0x2f, nullptr, "Unknown"},
2432 {0x30, nullptr, "Unknown"},
2433 {0x31, nullptr, "Unknown"},
2434 {0x32, nullptr, "SetThreadActivity32"},
2435 {0x33, nullptr, "GetThreadContext32"},
2436 {0x34, nullptr, "WaitForAddress32"},
2437 {0x35, nullptr, "SignalToAddress32"},
2438 {0x36, nullptr, "Unknown"},
2439 {0x37, nullptr, "Unknown"},
2440 {0x38, nullptr, "Unknown"},
2441 {0x39, nullptr, "Unknown"},
2442 {0x3a, nullptr, "Unknown"},
2443 {0x3b, nullptr, "Unknown"},
2444 {0x3c, nullptr, "Unknown"},
2445 {0x3d, nullptr, "Unknown"},
2446 {0x3e, nullptr, "Unknown"},
2447 {0x3f, nullptr, "Unknown"},
2448 {0x40, nullptr, "CreateSession32"},
2449 {0x41, nullptr, "AcceptSession32"},
2450 {0x42, nullptr, "Unknown"},
2451 {0x43, nullptr, "ReplyAndReceive32"},
2452 {0x44, nullptr, "Unknown"},
2453 {0x45, nullptr, "CreateEvent32"},
2454 {0x46, nullptr, "Unknown"},
2455 {0x47, nullptr, "Unknown"},
2456 {0x48, nullptr, "Unknown"},
2457 {0x49, nullptr, "Unknown"},
2458 {0x4a, nullptr, "Unknown"},
2459 {0x4b, nullptr, "Unknown"},
2460 {0x4c, nullptr, "Unknown"},
2461 {0x4d, nullptr, "Unknown"},
2462 {0x4e, nullptr, "Unknown"},
2463 {0x4f, nullptr, "Unknown"},
2464 {0x50, nullptr, "Unknown"},
2465 {0x51, nullptr, "Unknown"},
2466 {0x52, nullptr, "Unknown"},
2467 {0x53, nullptr, "Unknown"},
2468 {0x54, nullptr, "Unknown"},
2469 {0x55, nullptr, "Unknown"},
2470 {0x56, nullptr, "Unknown"},
2471 {0x57, nullptr, "Unknown"},
2472 {0x58, nullptr, "Unknown"},
2473 {0x59, nullptr, "Unknown"},
2474 {0x5a, nullptr, "Unknown"},
2475 {0x5b, nullptr, "Unknown"},
2476 {0x5c, nullptr, "Unknown"},
2477 {0x5d, nullptr, "Unknown"},
2478 {0x5e, nullptr, "Unknown"},
2479 {0x5F, nullptr, "FlushProcessDataCache32"},
2480 {0x60, nullptr, "Unknown"},
2481 {0x61, nullptr, "Unknown"},
2482 {0x62, nullptr, "Unknown"},
2483 {0x63, nullptr, "Unknown"},
2484 {0x64, nullptr, "Unknown"},
2485 {0x65, nullptr, "GetProcessList32"},
2486 {0x66, nullptr, "Unknown"},
2487 {0x67, nullptr, "Unknown"},
2488 {0x68, nullptr, "Unknown"},
2489 {0x69, nullptr, "Unknown"},
2490 {0x6A, nullptr, "Unknown"},
2491 {0x6B, nullptr, "Unknown"},
2492 {0x6C, nullptr, "Unknown"},
2493 {0x6D, nullptr, "Unknown"},
2494 {0x6E, nullptr, "Unknown"},
2495 {0x6f, nullptr, "GetSystemInfo32"},
2496 {0x70, nullptr, "CreatePort32"},
2497 {0x71, nullptr, "ManageNamedPort32"},
2498 {0x72, nullptr, "ConnectToPort32"},
2499 {0x73, nullptr, "SetProcessMemoryPermission32"},
2500 {0x74, nullptr, "Unknown"},
2501 {0x75, nullptr, "Unknown"},
2502 {0x76, nullptr, "Unknown"},
2503 {0x77, nullptr, "MapProcessCodeMemory32"},
2504 {0x78, nullptr, "UnmapProcessCodeMemory32"},
2505 {0x79, nullptr, "Unknown"},
2506 {0x7A, nullptr, "Unknown"},
2507 {0x7B, nullptr, "TerminateProcess32"},
2508};
2509
2510static const FunctionDef SVC_Table_64[] = {
2511 {0x00, nullptr, "Unknown"},
2512 {0x01, SvcWrap64<SetHeapSize>, "SetHeapSize"},
2513 {0x02, SvcWrap64<SetMemoryPermission>, "SetMemoryPermission"},
2514 {0x03, SvcWrap64<SetMemoryAttribute>, "SetMemoryAttribute"},
2515 {0x04, SvcWrap64<MapMemory>, "MapMemory"},
2516 {0x05, SvcWrap64<UnmapMemory>, "UnmapMemory"},
2517 {0x06, SvcWrap64<QueryMemory>, "QueryMemory"},
2518 {0x07, SvcWrap64<ExitProcess>, "ExitProcess"},
2519 {0x08, SvcWrap64<CreateThread>, "CreateThread"},
2520 {0x09, SvcWrap64<StartThread>, "StartThread"},
2521 {0x0A, SvcWrap64<ExitThread>, "ExitThread"},
2522 {0x0B, SvcWrap64<SleepThread>, "SleepThread"},
2523 {0x0C, SvcWrap64<GetThreadPriority>, "GetThreadPriority"},
2524 {0x0D, SvcWrap64<SetThreadPriority>, "SetThreadPriority"},
2525 {0x0E, SvcWrap64<GetThreadCoreMask>, "GetThreadCoreMask"},
2526 {0x0F, SvcWrap64<SetThreadCoreMask>, "SetThreadCoreMask"},
2527 {0x10, SvcWrap64<GetCurrentProcessorNumber>, "GetCurrentProcessorNumber"},
2528 {0x11, SvcWrap64<SignalEvent>, "SignalEvent"},
2529 {0x12, SvcWrap64<ClearEvent>, "ClearEvent"},
2530 {0x13, SvcWrap64<MapSharedMemory>, "MapSharedMemory"},
2531 {0x14, SvcWrap64<UnmapSharedMemory>, "UnmapSharedMemory"},
2532 {0x15, SvcWrap64<CreateTransferMemory>, "CreateTransferMemory"},
2533 {0x16, SvcWrap64<CloseHandle>, "CloseHandle"},
2534 {0x17, SvcWrap64<ResetSignal>, "ResetSignal"},
2535 {0x18, SvcWrap64<WaitSynchronization>, "WaitSynchronization"},
2536 {0x19, SvcWrap64<CancelSynchronization>, "CancelSynchronization"},
2537 {0x1A, SvcWrap64<ArbitrateLock>, "ArbitrateLock"},
2538 {0x1B, SvcWrap64<ArbitrateUnlock>, "ArbitrateUnlock"},
2539 {0x1C, SvcWrap64<WaitProcessWideKeyAtomic>, "WaitProcessWideKeyAtomic"},
2540 {0x1D, SvcWrap64<SignalProcessWideKey>, "SignalProcessWideKey"},
2541 {0x1E, SvcWrap64<GetSystemTick>, "GetSystemTick"},
2542 {0x1F, SvcWrap64<ConnectToNamedPort>, "ConnectToNamedPort"},
2353 {0x20, nullptr, "SendSyncRequestLight"}, 2543 {0x20, nullptr, "SendSyncRequestLight"},
2354 {0x21, SvcWrap<SendSyncRequest>, "SendSyncRequest"}, 2544 {0x21, SvcWrap64<SendSyncRequest>, "SendSyncRequest"},
2355 {0x22, nullptr, "SendSyncRequestWithUserBuffer"}, 2545 {0x22, nullptr, "SendSyncRequestWithUserBuffer"},
2356 {0x23, nullptr, "SendAsyncRequestWithUserBuffer"}, 2546 {0x23, nullptr, "SendAsyncRequestWithUserBuffer"},
2357 {0x24, SvcWrap<GetProcessId>, "GetProcessId"}, 2547 {0x24, SvcWrap64<GetProcessId>, "GetProcessId"},
2358 {0x25, SvcWrap<GetThreadId>, "GetThreadId"}, 2548 {0x25, SvcWrap64<GetThreadId>, "GetThreadId"},
2359 {0x26, SvcWrap<Break>, "Break"}, 2549 {0x26, SvcWrap64<Break>, "Break"},
2360 {0x27, SvcWrap<OutputDebugString>, "OutputDebugString"}, 2550 {0x27, SvcWrap64<OutputDebugString>, "OutputDebugString"},
2361 {0x28, nullptr, "ReturnFromException"}, 2551 {0x28, nullptr, "ReturnFromException"},
2362 {0x29, SvcWrap<GetInfo>, "GetInfo"}, 2552 {0x29, SvcWrap64<GetInfo>, "GetInfo"},
2363 {0x2A, nullptr, "FlushEntireDataCache"}, 2553 {0x2A, nullptr, "FlushEntireDataCache"},
2364 {0x2B, nullptr, "FlushDataCache"}, 2554 {0x2B, nullptr, "FlushDataCache"},
2365 {0x2C, SvcWrap<MapPhysicalMemory>, "MapPhysicalMemory"}, 2555 {0x2C, SvcWrap64<MapPhysicalMemory>, "MapPhysicalMemory"},
2366 {0x2D, SvcWrap<UnmapPhysicalMemory>, "UnmapPhysicalMemory"}, 2556 {0x2D, SvcWrap64<UnmapPhysicalMemory>, "UnmapPhysicalMemory"},
2367 {0x2E, nullptr, "GetFutureThreadInfo"}, 2557 {0x2E, nullptr, "GetFutureThreadInfo"},
2368 {0x2F, nullptr, "GetLastThreadInfo"}, 2558 {0x2F, nullptr, "GetLastThreadInfo"},
2369 {0x30, SvcWrap<GetResourceLimitLimitValue>, "GetResourceLimitLimitValue"}, 2559 {0x30, SvcWrap64<GetResourceLimitLimitValue>, "GetResourceLimitLimitValue"},
2370 {0x31, SvcWrap<GetResourceLimitCurrentValue>, "GetResourceLimitCurrentValue"}, 2560 {0x31, SvcWrap64<GetResourceLimitCurrentValue>, "GetResourceLimitCurrentValue"},
2371 {0x32, SvcWrap<SetThreadActivity>, "SetThreadActivity"}, 2561 {0x32, SvcWrap64<SetThreadActivity>, "SetThreadActivity"},
2372 {0x33, SvcWrap<GetThreadContext>, "GetThreadContext"}, 2562 {0x33, SvcWrap64<GetThreadContext>, "GetThreadContext"},
2373 {0x34, SvcWrap<WaitForAddress>, "WaitForAddress"}, 2563 {0x34, SvcWrap64<WaitForAddress>, "WaitForAddress"},
2374 {0x35, SvcWrap<SignalToAddress>, "SignalToAddress"}, 2564 {0x35, SvcWrap64<SignalToAddress>, "SignalToAddress"},
2375 {0x36, nullptr, "SynchronizePreemptionState"}, 2565 {0x36, nullptr, "SynchronizePreemptionState"},
2376 {0x37, nullptr, "Unknown"}, 2566 {0x37, nullptr, "Unknown"},
2377 {0x38, nullptr, "Unknown"}, 2567 {0x38, nullptr, "Unknown"},
2378 {0x39, nullptr, "Unknown"}, 2568 {0x39, nullptr, "Unknown"},
2379 {0x3A, nullptr, "Unknown"}, 2569 {0x3A, nullptr, "Unknown"},
2380 {0x3B, nullptr, "Unknown"}, 2570 {0x3B, nullptr, "Unknown"},
2381 {0x3C, SvcWrap<KernelDebug>, "KernelDebug"}, 2571 {0x3C, SvcWrap64<KernelDebug>, "KernelDebug"},
2382 {0x3D, SvcWrap<ChangeKernelTraceState>, "ChangeKernelTraceState"}, 2572 {0x3D, SvcWrap64<ChangeKernelTraceState>, "ChangeKernelTraceState"},
2383 {0x3E, nullptr, "Unknown"}, 2573 {0x3E, nullptr, "Unknown"},
2384 {0x3F, nullptr, "Unknown"}, 2574 {0x3F, nullptr, "Unknown"},
2385 {0x40, nullptr, "CreateSession"}, 2575 {0x40, nullptr, "CreateSession"},
@@ -2387,7 +2577,7 @@ static const FunctionDef SVC_Table[] = {
2387 {0x42, nullptr, "ReplyAndReceiveLight"}, 2577 {0x42, nullptr, "ReplyAndReceiveLight"},
2388 {0x43, nullptr, "ReplyAndReceive"}, 2578 {0x43, nullptr, "ReplyAndReceive"},
2389 {0x44, nullptr, "ReplyAndReceiveWithUserBuffer"}, 2579 {0x44, nullptr, "ReplyAndReceiveWithUserBuffer"},
2390 {0x45, SvcWrap<CreateEvent>, "CreateEvent"}, 2580 {0x45, SvcWrap64<CreateEvent>, "CreateEvent"},
2391 {0x46, nullptr, "Unknown"}, 2581 {0x46, nullptr, "Unknown"},
2392 {0x47, nullptr, "Unknown"}, 2582 {0x47, nullptr, "Unknown"},
2393 {0x48, nullptr, "MapPhysicalMemoryUnsafe"}, 2583 {0x48, nullptr, "MapPhysicalMemoryUnsafe"},
@@ -2398,9 +2588,9 @@ static const FunctionDef SVC_Table[] = {
2398 {0x4D, nullptr, "SleepSystem"}, 2588 {0x4D, nullptr, "SleepSystem"},
2399 {0x4E, nullptr, "ReadWriteRegister"}, 2589 {0x4E, nullptr, "ReadWriteRegister"},
2400 {0x4F, nullptr, "SetProcessActivity"}, 2590 {0x4F, nullptr, "SetProcessActivity"},
2401 {0x50, SvcWrap<CreateSharedMemory>, "CreateSharedMemory"}, 2591 {0x50, SvcWrap64<CreateSharedMemory>, "CreateSharedMemory"},
2402 {0x51, SvcWrap<MapTransferMemory>, "MapTransferMemory"}, 2592 {0x51, SvcWrap64<MapTransferMemory>, "MapTransferMemory"},
2403 {0x52, SvcWrap<UnmapTransferMemory>, "UnmapTransferMemory"}, 2593 {0x52, SvcWrap64<UnmapTransferMemory>, "UnmapTransferMemory"},
2404 {0x53, nullptr, "CreateInterruptEvent"}, 2594 {0x53, nullptr, "CreateInterruptEvent"},
2405 {0x54, nullptr, "QueryPhysicalAddress"}, 2595 {0x54, nullptr, "QueryPhysicalAddress"},
2406 {0x55, nullptr, "QueryIoMapping"}, 2596 {0x55, nullptr, "QueryIoMapping"},
@@ -2419,8 +2609,8 @@ static const FunctionDef SVC_Table[] = {
2419 {0x62, nullptr, "TerminateDebugProcess"}, 2609 {0x62, nullptr, "TerminateDebugProcess"},
2420 {0x63, nullptr, "GetDebugEvent"}, 2610 {0x63, nullptr, "GetDebugEvent"},
2421 {0x64, nullptr, "ContinueDebugEvent"}, 2611 {0x64, nullptr, "ContinueDebugEvent"},
2422 {0x65, SvcWrap<GetProcessList>, "GetProcessList"}, 2612 {0x65, SvcWrap64<GetProcessList>, "GetProcessList"},
2423 {0x66, SvcWrap<GetThreadList>, "GetThreadList"}, 2613 {0x66, SvcWrap64<GetThreadList>, "GetThreadList"},
2424 {0x67, nullptr, "GetDebugThreadContext"}, 2614 {0x67, nullptr, "GetDebugThreadContext"},
2425 {0x68, nullptr, "SetDebugThreadContext"}, 2615 {0x68, nullptr, "SetDebugThreadContext"},
2426 {0x69, nullptr, "QueryDebugProcessMemory"}, 2616 {0x69, nullptr, "QueryDebugProcessMemory"},
@@ -2436,24 +2626,32 @@ static const FunctionDef SVC_Table[] = {
2436 {0x73, nullptr, "SetProcessMemoryPermission"}, 2626 {0x73, nullptr, "SetProcessMemoryPermission"},
2437 {0x74, nullptr, "MapProcessMemory"}, 2627 {0x74, nullptr, "MapProcessMemory"},
2438 {0x75, nullptr, "UnmapProcessMemory"}, 2628 {0x75, nullptr, "UnmapProcessMemory"},
2439 {0x76, SvcWrap<QueryProcessMemory>, "QueryProcessMemory"}, 2629 {0x76, SvcWrap64<QueryProcessMemory>, "QueryProcessMemory"},
2440 {0x77, SvcWrap<MapProcessCodeMemory>, "MapProcessCodeMemory"}, 2630 {0x77, SvcWrap64<MapProcessCodeMemory>, "MapProcessCodeMemory"},
2441 {0x78, SvcWrap<UnmapProcessCodeMemory>, "UnmapProcessCodeMemory"}, 2631 {0x78, SvcWrap64<UnmapProcessCodeMemory>, "UnmapProcessCodeMemory"},
2442 {0x79, nullptr, "CreateProcess"}, 2632 {0x79, nullptr, "CreateProcess"},
2443 {0x7A, nullptr, "StartProcess"}, 2633 {0x7A, nullptr, "StartProcess"},
2444 {0x7B, nullptr, "TerminateProcess"}, 2634 {0x7B, nullptr, "TerminateProcess"},
2445 {0x7C, SvcWrap<GetProcessInfo>, "GetProcessInfo"}, 2635 {0x7C, SvcWrap64<GetProcessInfo>, "GetProcessInfo"},
2446 {0x7D, SvcWrap<CreateResourceLimit>, "CreateResourceLimit"}, 2636 {0x7D, SvcWrap64<CreateResourceLimit>, "CreateResourceLimit"},
2447 {0x7E, SvcWrap<SetResourceLimitLimitValue>, "SetResourceLimitLimitValue"}, 2637 {0x7E, SvcWrap64<SetResourceLimitLimitValue>, "SetResourceLimitLimitValue"},
2448 {0x7F, nullptr, "CallSecureMonitor"}, 2638 {0x7F, nullptr, "CallSecureMonitor"},
2449}; 2639};
2450 2640
2451static const FunctionDef* GetSVCInfo(u32 func_num) { 2641static const FunctionDef* GetSVCInfo32(u32 func_num) {
2452 if (func_num >= std::size(SVC_Table)) { 2642 if (func_num >= std::size(SVC_Table_32)) {
2643 LOG_ERROR(Kernel_SVC, "Unknown svc=0x{:02X}", func_num);
2644 return nullptr;
2645 }
2646 return &SVC_Table_32[func_num];
2647}
2648
2649static const FunctionDef* GetSVCInfo64(u32 func_num) {
2650 if (func_num >= std::size(SVC_Table_64)) {
2453 LOG_ERROR(Kernel_SVC, "Unknown svc=0x{:02X}", func_num); 2651 LOG_ERROR(Kernel_SVC, "Unknown svc=0x{:02X}", func_num);
2454 return nullptr; 2652 return nullptr;
2455 } 2653 }
2456 return &SVC_Table[func_num]; 2654 return &SVC_Table_64[func_num];
2457} 2655}
2458 2656
2459MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70)); 2657MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70));
@@ -2464,7 +2662,8 @@ void CallSVC(Core::System& system, u32 immediate) {
2464 // Lock the global kernel mutex when we enter the kernel HLE. 2662 // Lock the global kernel mutex when we enter the kernel HLE.
2465 std::lock_guard lock{HLE::g_hle_lock}; 2663 std::lock_guard lock{HLE::g_hle_lock};
2466 2664
2467 const FunctionDef* info = GetSVCInfo(immediate); 2665 const FunctionDef* info = system.CurrentProcess()->Is64BitProcess() ? GetSVCInfo64(immediate)
2666 : GetSVCInfo32(immediate);
2468 if (info) { 2667 if (info) {
2469 if (info->func) { 2668 if (info->func) {
2470 info->func(system); 2669 info->func(system);
diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h
index 29a2cfa9d..7d735e3fa 100644
--- a/src/core/hle/kernel/svc_wrap.h
+++ b/src/core/hle/kernel/svc_wrap.h
@@ -15,6 +15,10 @@ static inline u64 Param(const Core::System& system, int n) {
15 return system.CurrentArmInterface().GetReg(n); 15 return system.CurrentArmInterface().GetReg(n);
16} 16}
17 17
18static inline u32 Param32(const Core::System& system, int n) {
19 return static_cast<u32>(system.CurrentArmInterface().GetReg(n));
20}
21
18/** 22/**
19 * HLE a function return from the current ARM userland process 23 * HLE a function return from the current ARM userland process
20 * @param system System context 24 * @param system System context
@@ -24,40 +28,44 @@ static inline void FuncReturn(Core::System& system, u64 result) {
24 system.CurrentArmInterface().SetReg(0, result); 28 system.CurrentArmInterface().SetReg(0, result);
25} 29}
26 30
31static inline void FuncReturn32(Core::System& system, u32 result) {
32 system.CurrentArmInterface().SetReg(0, (u64)result);
33}
34
27//////////////////////////////////////////////////////////////////////////////////////////////////// 35////////////////////////////////////////////////////////////////////////////////////////////////////
28// Function wrappers that return type ResultCode 36// Function wrappers that return type ResultCode
29 37
30template <ResultCode func(Core::System&, u64)> 38template <ResultCode func(Core::System&, u64)>
31void SvcWrap(Core::System& system) { 39void SvcWrap64(Core::System& system) {
32 FuncReturn(system, func(system, Param(system, 0)).raw); 40 FuncReturn(system, func(system, Param(system, 0)).raw);
33} 41}
34 42
35template <ResultCode func(Core::System&, u64, u64)> 43template <ResultCode func(Core::System&, u64, u64)>
36void SvcWrap(Core::System& system) { 44void SvcWrap64(Core::System& system) {
37 FuncReturn(system, func(system, Param(system, 0), Param(system, 1)).raw); 45 FuncReturn(system, func(system, Param(system, 0), Param(system, 1)).raw);
38} 46}
39 47
40template <ResultCode func(Core::System&, u32)> 48template <ResultCode func(Core::System&, u32)>
41void SvcWrap(Core::System& system) { 49void SvcWrap64(Core::System& system) {
42 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw); 50 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw);
43} 51}
44 52
45template <ResultCode func(Core::System&, u32, u32)> 53template <ResultCode func(Core::System&, u32, u32)>
46void SvcWrap(Core::System& system) { 54void SvcWrap64(Core::System& system) {
47 FuncReturn( 55 FuncReturn(
48 system, 56 system,
49 func(system, static_cast<u32>(Param(system, 0)), static_cast<u32>(Param(system, 1))).raw); 57 func(system, static_cast<u32>(Param(system, 0)), static_cast<u32>(Param(system, 1))).raw);
50} 58}
51 59
52template <ResultCode func(Core::System&, u32, u64, u64, u64)> 60template <ResultCode func(Core::System&, u32, u64, u64, u64)>
53void SvcWrap(Core::System& system) { 61void SvcWrap64(Core::System& system) {
54 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), 62 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1),
55 Param(system, 2), Param(system, 3)) 63 Param(system, 2), Param(system, 3))
56 .raw); 64 .raw);
57} 65}
58 66
59template <ResultCode func(Core::System&, u32*)> 67template <ResultCode func(Core::System&, u32*)>
60void SvcWrap(Core::System& system) { 68void SvcWrap64(Core::System& system) {
61 u32 param = 0; 69 u32 param = 0;
62 const u32 retval = func(system, &param).raw; 70 const u32 retval = func(system, &param).raw;
63 system.CurrentArmInterface().SetReg(1, param); 71 system.CurrentArmInterface().SetReg(1, param);
@@ -65,7 +73,7 @@ void SvcWrap(Core::System& system) {
65} 73}
66 74
67template <ResultCode func(Core::System&, u32*, u32)> 75template <ResultCode func(Core::System&, u32*, u32)>
68void SvcWrap(Core::System& system) { 76void SvcWrap64(Core::System& system) {
69 u32 param_1 = 0; 77 u32 param_1 = 0;
70 const u32 retval = func(system, &param_1, static_cast<u32>(Param(system, 1))).raw; 78 const u32 retval = func(system, &param_1, static_cast<u32>(Param(system, 1))).raw;
71 system.CurrentArmInterface().SetReg(1, param_1); 79 system.CurrentArmInterface().SetReg(1, param_1);
@@ -73,7 +81,7 @@ void SvcWrap(Core::System& system) {
73} 81}
74 82
75template <ResultCode func(Core::System&, u32*, u32*)> 83template <ResultCode func(Core::System&, u32*, u32*)>
76void SvcWrap(Core::System& system) { 84void SvcWrap64(Core::System& system) {
77 u32 param_1 = 0; 85 u32 param_1 = 0;
78 u32 param_2 = 0; 86 u32 param_2 = 0;
79 const u32 retval = func(system, &param_1, &param_2).raw; 87 const u32 retval = func(system, &param_1, &param_2).raw;
@@ -86,7 +94,7 @@ void SvcWrap(Core::System& system) {
86} 94}
87 95
88template <ResultCode func(Core::System&, u32*, u64)> 96template <ResultCode func(Core::System&, u32*, u64)>
89void SvcWrap(Core::System& system) { 97void SvcWrap64(Core::System& system) {
90 u32 param_1 = 0; 98 u32 param_1 = 0;
91 const u32 retval = func(system, &param_1, Param(system, 1)).raw; 99 const u32 retval = func(system, &param_1, Param(system, 1)).raw;
92 system.CurrentArmInterface().SetReg(1, param_1); 100 system.CurrentArmInterface().SetReg(1, param_1);
@@ -94,7 +102,7 @@ void SvcWrap(Core::System& system) {
94} 102}
95 103
96template <ResultCode func(Core::System&, u32*, u64, u32)> 104template <ResultCode func(Core::System&, u32*, u64, u32)>
97void SvcWrap(Core::System& system) { 105void SvcWrap64(Core::System& system) {
98 u32 param_1 = 0; 106 u32 param_1 = 0;
99 const u32 retval = 107 const u32 retval =
100 func(system, &param_1, Param(system, 1), static_cast<u32>(Param(system, 2))).raw; 108 func(system, &param_1, Param(system, 1), static_cast<u32>(Param(system, 2))).raw;
@@ -104,7 +112,7 @@ void SvcWrap(Core::System& system) {
104} 112}
105 113
106template <ResultCode func(Core::System&, u64*, u32)> 114template <ResultCode func(Core::System&, u64*, u32)>
107void SvcWrap(Core::System& system) { 115void SvcWrap64(Core::System& system) {
108 u64 param_1 = 0; 116 u64 param_1 = 0;
109 const u32 retval = func(system, &param_1, static_cast<u32>(Param(system, 1))).raw; 117 const u32 retval = func(system, &param_1, static_cast<u32>(Param(system, 1))).raw;
110 118
@@ -113,12 +121,12 @@ void SvcWrap(Core::System& system) {
113} 121}
114 122
115template <ResultCode func(Core::System&, u64, u32)> 123template <ResultCode func(Core::System&, u64, u32)>
116void SvcWrap(Core::System& system) { 124void SvcWrap64(Core::System& system) {
117 FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1))).raw); 125 FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1))).raw);
118} 126}
119 127
120template <ResultCode func(Core::System&, u64*, u64)> 128template <ResultCode func(Core::System&, u64*, u64)>
121void SvcWrap(Core::System& system) { 129void SvcWrap64(Core::System& system) {
122 u64 param_1 = 0; 130 u64 param_1 = 0;
123 const u32 retval = func(system, &param_1, Param(system, 1)).raw; 131 const u32 retval = func(system, &param_1, Param(system, 1)).raw;
124 132
@@ -127,7 +135,7 @@ void SvcWrap(Core::System& system) {
127} 135}
128 136
129template <ResultCode func(Core::System&, u64*, u32, u32)> 137template <ResultCode func(Core::System&, u64*, u32, u32)>
130void SvcWrap(Core::System& system) { 138void SvcWrap64(Core::System& system) {
131 u64 param_1 = 0; 139 u64 param_1 = 0;
132 const u32 retval = func(system, &param_1, static_cast<u32>(Param(system, 1)), 140 const u32 retval = func(system, &param_1, static_cast<u32>(Param(system, 1)),
133 static_cast<u32>(Param(system, 2))) 141 static_cast<u32>(Param(system, 2)))
@@ -138,19 +146,19 @@ void SvcWrap(Core::System& system) {
138} 146}
139 147
140template <ResultCode func(Core::System&, u32, u64)> 148template <ResultCode func(Core::System&, u32, u64)>
141void SvcWrap(Core::System& system) { 149void SvcWrap64(Core::System& system) {
142 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1)).raw); 150 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1)).raw);
143} 151}
144 152
145template <ResultCode func(Core::System&, u32, u32, u64)> 153template <ResultCode func(Core::System&, u32, u32, u64)>
146void SvcWrap(Core::System& system) { 154void SvcWrap64(Core::System& system) {
147 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), 155 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)),
148 static_cast<u32>(Param(system, 1)), Param(system, 2)) 156 static_cast<u32>(Param(system, 1)), Param(system, 2))
149 .raw); 157 .raw);
150} 158}
151 159
152template <ResultCode func(Core::System&, u32, u32*, u64*)> 160template <ResultCode func(Core::System&, u32, u32*, u64*)>
153void SvcWrap(Core::System& system) { 161void SvcWrap64(Core::System& system) {
154 u32 param_1 = 0; 162 u32 param_1 = 0;
155 u64 param_2 = 0; 163 u64 param_2 = 0;
156 const ResultCode retval = func(system, static_cast<u32>(Param(system, 2)), &param_1, &param_2); 164 const ResultCode retval = func(system, static_cast<u32>(Param(system, 2)), &param_1, &param_2);
@@ -161,54 +169,54 @@ void SvcWrap(Core::System& system) {
161} 169}
162 170
163template <ResultCode func(Core::System&, u64, u64, u32, u32)> 171template <ResultCode func(Core::System&, u64, u64, u32, u32)>
164void SvcWrap(Core::System& system) { 172void SvcWrap64(Core::System& system) {
165 FuncReturn(system, func(system, Param(system, 0), Param(system, 1), 173 FuncReturn(system, func(system, Param(system, 0), Param(system, 1),
166 static_cast<u32>(Param(system, 2)), static_cast<u32>(Param(system, 3))) 174 static_cast<u32>(Param(system, 2)), static_cast<u32>(Param(system, 3)))
167 .raw); 175 .raw);
168} 176}
169 177
170template <ResultCode func(Core::System&, u64, u64, u32, u64)> 178template <ResultCode func(Core::System&, u64, u64, u32, u64)>
171void SvcWrap(Core::System& system) { 179void SvcWrap64(Core::System& system) {
172 FuncReturn(system, func(system, Param(system, 0), Param(system, 1), 180 FuncReturn(system, func(system, Param(system, 0), Param(system, 1),
173 static_cast<u32>(Param(system, 2)), Param(system, 3)) 181 static_cast<u32>(Param(system, 2)), Param(system, 3))
174 .raw); 182 .raw);
175} 183}
176 184
177template <ResultCode func(Core::System&, u32, u64, u32)> 185template <ResultCode func(Core::System&, u32, u64, u32)>
178void SvcWrap(Core::System& system) { 186void SvcWrap64(Core::System& system) {
179 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), 187 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1),
180 static_cast<u32>(Param(system, 2))) 188 static_cast<u32>(Param(system, 2)))
181 .raw); 189 .raw);
182} 190}
183 191
184template <ResultCode func(Core::System&, u64, u64, u64)> 192template <ResultCode func(Core::System&, u64, u64, u64)>
185void SvcWrap(Core::System& system) { 193void SvcWrap64(Core::System& system) {
186 FuncReturn(system, func(system, Param(system, 0), Param(system, 1), Param(system, 2)).raw); 194 FuncReturn(system, func(system, Param(system, 0), Param(system, 1), Param(system, 2)).raw);
187} 195}
188 196
189template <ResultCode func(Core::System&, u64, u64, u32)> 197template <ResultCode func(Core::System&, u64, u64, u32)>
190void SvcWrap(Core::System& system) { 198void SvcWrap64(Core::System& system) {
191 FuncReturn( 199 FuncReturn(
192 system, 200 system,
193 func(system, Param(system, 0), Param(system, 1), static_cast<u32>(Param(system, 2))).raw); 201 func(system, Param(system, 0), Param(system, 1), static_cast<u32>(Param(system, 2))).raw);
194} 202}
195 203
196template <ResultCode func(Core::System&, u32, u64, u64, u32)> 204template <ResultCode func(Core::System&, u32, u64, u64, u32)>
197void SvcWrap(Core::System& system) { 205void SvcWrap64(Core::System& system) {
198 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), 206 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0)), Param(system, 1),
199 Param(system, 2), static_cast<u32>(Param(system, 3))) 207 Param(system, 2), static_cast<u32>(Param(system, 3)))
200 .raw); 208 .raw);
201} 209}
202 210
203template <ResultCode func(Core::System&, u32, u64, u64)> 211template <ResultCode func(Core::System&, u32, u64, u64)>
204void SvcWrap(Core::System& system) { 212void SvcWrap64(Core::System& system) {
205 FuncReturn( 213 FuncReturn(
206 system, 214 system,
207 func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2)).raw); 215 func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2)).raw);
208} 216}
209 217
210template <ResultCode func(Core::System&, u32*, u64, u64, s64)> 218template <ResultCode func(Core::System&, u32*, u64, u64, s64)>
211void SvcWrap(Core::System& system) { 219void SvcWrap64(Core::System& system) {
212 u32 param_1 = 0; 220 u32 param_1 = 0;
213 const u32 retval = func(system, &param_1, Param(system, 1), static_cast<u32>(Param(system, 2)), 221 const u32 retval = func(system, &param_1, Param(system, 1), static_cast<u32>(Param(system, 2)),
214 static_cast<s64>(Param(system, 3))) 222 static_cast<s64>(Param(system, 3)))
@@ -219,14 +227,14 @@ void SvcWrap(Core::System& system) {
219} 227}
220 228
221template <ResultCode func(Core::System&, u64, u64, u32, s64)> 229template <ResultCode func(Core::System&, u64, u64, u32, s64)>
222void SvcWrap(Core::System& system) { 230void SvcWrap64(Core::System& system) {
223 FuncReturn(system, func(system, Param(system, 0), Param(system, 1), 231 FuncReturn(system, func(system, Param(system, 0), Param(system, 1),
224 static_cast<u32>(Param(system, 2)), static_cast<s64>(Param(system, 3))) 232 static_cast<u32>(Param(system, 2)), static_cast<s64>(Param(system, 3)))
225 .raw); 233 .raw);
226} 234}
227 235
228template <ResultCode func(Core::System&, u64*, u64, u64, u64)> 236template <ResultCode func(Core::System&, u64*, u64, u64, u64)>
229void SvcWrap(Core::System& system) { 237void SvcWrap64(Core::System& system) {
230 u64 param_1 = 0; 238 u64 param_1 = 0;
231 const u32 retval = 239 const u32 retval =
232 func(system, &param_1, Param(system, 1), Param(system, 2), Param(system, 3)).raw; 240 func(system, &param_1, Param(system, 1), Param(system, 2), Param(system, 3)).raw;
@@ -236,7 +244,7 @@ void SvcWrap(Core::System& system) {
236} 244}
237 245
238template <ResultCode func(Core::System&, u32*, u64, u64, u64, u32, s32)> 246template <ResultCode func(Core::System&, u32*, u64, u64, u64, u32, s32)>
239void SvcWrap(Core::System& system) { 247void SvcWrap64(Core::System& system) {
240 u32 param_1 = 0; 248 u32 param_1 = 0;
241 const u32 retval = func(system, &param_1, Param(system, 1), Param(system, 2), Param(system, 3), 249 const u32 retval = func(system, &param_1, Param(system, 1), Param(system, 2), Param(system, 3),
242 static_cast<u32>(Param(system, 4)), static_cast<s32>(Param(system, 5))) 250 static_cast<u32>(Param(system, 4)), static_cast<s32>(Param(system, 5)))
@@ -247,7 +255,7 @@ void SvcWrap(Core::System& system) {
247} 255}
248 256
249template <ResultCode func(Core::System&, u32*, u64, u64, u32)> 257template <ResultCode func(Core::System&, u32*, u64, u64, u32)>
250void SvcWrap(Core::System& system) { 258void SvcWrap64(Core::System& system) {
251 u32 param_1 = 0; 259 u32 param_1 = 0;
252 const u32 retval = func(system, &param_1, Param(system, 1), Param(system, 2), 260 const u32 retval = func(system, &param_1, Param(system, 1), Param(system, 2),
253 static_cast<u32>(Param(system, 3))) 261 static_cast<u32>(Param(system, 3)))
@@ -258,7 +266,7 @@ void SvcWrap(Core::System& system) {
258} 266}
259 267
260template <ResultCode func(Core::System&, Handle*, u64, u32, u32)> 268template <ResultCode func(Core::System&, Handle*, u64, u32, u32)>
261void SvcWrap(Core::System& system) { 269void SvcWrap64(Core::System& system) {
262 u32 param_1 = 0; 270 u32 param_1 = 0;
263 const u32 retval = func(system, &param_1, Param(system, 1), static_cast<u32>(Param(system, 2)), 271 const u32 retval = func(system, &param_1, Param(system, 1), static_cast<u32>(Param(system, 2)),
264 static_cast<u32>(Param(system, 3))) 272 static_cast<u32>(Param(system, 3)))
@@ -269,14 +277,14 @@ void SvcWrap(Core::System& system) {
269} 277}
270 278
271template <ResultCode func(Core::System&, u64, u32, s32, s64)> 279template <ResultCode func(Core::System&, u64, u32, s32, s64)>
272void SvcWrap(Core::System& system) { 280void SvcWrap64(Core::System& system) {
273 FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1)), 281 FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1)),
274 static_cast<s32>(Param(system, 2)), static_cast<s64>(Param(system, 3))) 282 static_cast<s32>(Param(system, 2)), static_cast<s64>(Param(system, 3)))
275 .raw); 283 .raw);
276} 284}
277 285
278template <ResultCode func(Core::System&, u64, u32, s32, s32)> 286template <ResultCode func(Core::System&, u64, u32, s32, s32)>
279void SvcWrap(Core::System& system) { 287void SvcWrap64(Core::System& system) {
280 FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1)), 288 FuncReturn(system, func(system, Param(system, 0), static_cast<u32>(Param(system, 1)),
281 static_cast<s32>(Param(system, 2)), static_cast<s32>(Param(system, 3))) 289 static_cast<s32>(Param(system, 2)), static_cast<s32>(Param(system, 3)))
282 .raw); 290 .raw);
@@ -286,7 +294,7 @@ void SvcWrap(Core::System& system) {
286// Function wrappers that return type u32 294// Function wrappers that return type u32
287 295
288template <u32 func(Core::System&)> 296template <u32 func(Core::System&)>
289void SvcWrap(Core::System& system) { 297void SvcWrap64(Core::System& system) {
290 FuncReturn(system, func(system)); 298 FuncReturn(system, func(system));
291} 299}
292 300
@@ -294,7 +302,7 @@ void SvcWrap(Core::System& system) {
294// Function wrappers that return type u64 302// Function wrappers that return type u64
295 303
296template <u64 func(Core::System&)> 304template <u64 func(Core::System&)>
297void SvcWrap(Core::System& system) { 305void SvcWrap64(Core::System& system) {
298 FuncReturn(system, func(system)); 306 FuncReturn(system, func(system));
299} 307}
300 308
@@ -302,44 +310,110 @@ void SvcWrap(Core::System& system) {
302/// Function wrappers that return type void 310/// Function wrappers that return type void
303 311
304template <void func(Core::System&)> 312template <void func(Core::System&)>
305void SvcWrap(Core::System& system) { 313void SvcWrap64(Core::System& system) {
306 func(system); 314 func(system);
307} 315}
308 316
309template <void func(Core::System&, u32)> 317template <void func(Core::System&, u32)>
310void SvcWrap(Core::System& system) { 318void SvcWrap64(Core::System& system) {
311 func(system, static_cast<u32>(Param(system, 0))); 319 func(system, static_cast<u32>(Param(system, 0)));
312} 320}
313 321
314template <void func(Core::System&, u32, u64, u64, u64)> 322template <void func(Core::System&, u32, u64, u64, u64)>
315void SvcWrap(Core::System& system) { 323void SvcWrap64(Core::System& system) {
316 func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2), 324 func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2),
317 Param(system, 3)); 325 Param(system, 3));
318} 326}
319 327
320template <void func(Core::System&, s64)> 328template <void func(Core::System&, s64)>
321void SvcWrap(Core::System& system) { 329void SvcWrap64(Core::System& system) {
322 func(system, static_cast<s64>(Param(system, 0))); 330 func(system, static_cast<s64>(Param(system, 0)));
323} 331}
324 332
325template <void func(Core::System&, u64, s32)> 333template <void func(Core::System&, u64, s32)>
326void SvcWrap(Core::System& system) { 334void SvcWrap64(Core::System& system) {
327 func(system, Param(system, 0), static_cast<s32>(Param(system, 1))); 335 func(system, Param(system, 0), static_cast<s32>(Param(system, 1)));
328} 336}
329 337
330template <void func(Core::System&, u64, u64)> 338template <void func(Core::System&, u64, u64)>
331void SvcWrap(Core::System& system) { 339void SvcWrap64(Core::System& system) {
332 func(system, Param(system, 0), Param(system, 1)); 340 func(system, Param(system, 0), Param(system, 1));
333} 341}
334 342
335template <void func(Core::System&, u64, u64, u64)> 343template <void func(Core::System&, u64, u64, u64)>
336void SvcWrap(Core::System& system) { 344void SvcWrap64(Core::System& system) {
337 func(system, Param(system, 0), Param(system, 1), Param(system, 2)); 345 func(system, Param(system, 0), Param(system, 1), Param(system, 2));
338} 346}
339 347
340template <void func(Core::System&, u32, u64, u64)> 348template <void func(Core::System&, u32, u64, u64)>
341void SvcWrap(Core::System& system) { 349void SvcWrap64(Core::System& system) {
342 func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2)); 350 func(system, static_cast<u32>(Param(system, 0)), Param(system, 1), Param(system, 2));
343} 351}
344 352
353// Used by QueryMemory32
354template <ResultCode func(Core::System&, u32, u32, u32)>
355void SvcWrap32(Core::System& system) {
356 FuncReturn32(system,
357 func(system, Param32(system, 0), Param32(system, 1), Param32(system, 2)).raw);
358}
359
360// Used by GetInfo32
361template <ResultCode func(Core::System&, u32*, u32*, u32, u32, u32, u32)>
362void SvcWrap32(Core::System& system) {
363 u32 param_1 = 0;
364 u32 param_2 = 0;
365
366 const u32 retval = func(system, &param_1, &param_2, Param32(system, 0), Param32(system, 1),
367 Param32(system, 2), Param32(system, 3))
368 .raw;
369
370 system.CurrentArmInterface().SetReg(1, param_1);
371 system.CurrentArmInterface().SetReg(2, param_2);
372 FuncReturn(system, retval);
373}
374
375// Used by GetThreadPriority32, ConnectToNamedPort32
376template <ResultCode func(Core::System&, u32*, u32)>
377void SvcWrap32(Core::System& system) {
378 u32 param_1 = 0;
379 const u32 retval = func(system, &param_1, Param32(system, 1)).raw;
380 system.CurrentArmInterface().SetReg(1, param_1);
381 FuncReturn(system, retval);
382}
383
384// Used by GetThreadId32
385template <ResultCode func(Core::System&, u32*, u32*, u32)>
386void SvcWrap32(Core::System& system) {
387 u32 param_1 = 0;
388 u32 param_2 = 0;
389
390 const u32 retval = func(system, &param_1, &param_2, Param32(system, 1)).raw;
391 system.CurrentArmInterface().SetReg(1, param_1);
392 system.CurrentArmInterface().SetReg(2, param_2);
393 FuncReturn(system, retval);
394}
395
396// Used by SignalProcessWideKey32
397template <void func(Core::System&, u32, s32)>
398void SvcWrap32(Core::System& system) {
399 func(system, static_cast<u32>(Param(system, 0)), static_cast<s32>(Param(system, 1)));
400}
401
402// Used by SendSyncRequest32
403template <ResultCode func(Core::System&, u32)>
404void SvcWrap32(Core::System& system) {
405 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw);
406}
407
408// Used by WaitSynchronization32
409template <ResultCode func(Core::System&, u32, u32, s32, u32, Handle*)>
410void SvcWrap32(Core::System& system) {
411 u32 param_1 = 0;
412 const u32 retval = func(system, Param32(system, 0), Param32(system, 1), Param32(system, 2),
413 Param32(system, 3), &param_1)
414 .raw;
415 system.CurrentArmInterface().SetReg(1, param_1);
416 FuncReturn(system, retval);
417}
418
345} // namespace Kernel 419} // namespace Kernel
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index bf850e0b2..83e956036 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -133,15 +133,16 @@ void Thread::CancelWait() {
133 ResumeFromWait(); 133 ResumeFromWait();
134} 134}
135 135
136/** 136static void ResetThreadContext32(Core::ARM_Interface::ThreadContext32& context, u32 stack_top,
137 * Resets a thread context, making it ready to be scheduled and run by the CPU 137 u32 entry_point, u32 arg) {
138 * @param context Thread context to reset 138 context = {};
139 * @param stack_top Address of the top of the stack 139 context.cpu_registers[0] = arg;
140 * @param entry_point Address of entry point for execution 140 context.cpu_registers[15] = entry_point;
141 * @param arg User argument for thread 141 context.cpu_registers[13] = stack_top;
142 */ 142}
143static void ResetThreadContext(Core::ARM_Interface::ThreadContext& context, VAddr stack_top, 143
144 VAddr entry_point, u64 arg) { 144static void ResetThreadContext64(Core::ARM_Interface::ThreadContext64& context, VAddr stack_top,
145 VAddr entry_point, u64 arg) {
145 context = {}; 146 context = {};
146 context.cpu_registers[0] = arg; 147 context.cpu_registers[0] = arg;
147 context.pc = entry_point; 148 context.pc = entry_point;
@@ -198,9 +199,9 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(KernelCore& kernel, std::strin
198 199
199 thread->owner_process->RegisterThread(thread.get()); 200 thread->owner_process->RegisterThread(thread.get());
200 201
201 // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used 202 ResetThreadContext32(thread->context_32, static_cast<u32>(stack_top),
202 // to initialize the context 203 static_cast<u32>(entry_point), static_cast<u32>(arg));
203 ResetThreadContext(thread->context, stack_top, entry_point, arg); 204 ResetThreadContext64(thread->context_64, stack_top, entry_point, arg);
204 205
205 return MakeResult<std::shared_ptr<Thread>>(std::move(thread)); 206 return MakeResult<std::shared_ptr<Thread>>(std::move(thread));
206} 207}
@@ -213,11 +214,13 @@ void Thread::SetPriority(u32 priority) {
213} 214}
214 215
215void Thread::SetWaitSynchronizationResult(ResultCode result) { 216void Thread::SetWaitSynchronizationResult(ResultCode result) {
216 context.cpu_registers[0] = result.raw; 217 context_32.cpu_registers[0] = result.raw;
218 context_64.cpu_registers[0] = result.raw;
217} 219}
218 220
219void Thread::SetWaitSynchronizationOutput(s32 output) { 221void Thread::SetWaitSynchronizationOutput(s32 output) {
220 context.cpu_registers[1] = output; 222 context_32.cpu_registers[1] = output;
223 context_64.cpu_registers[1] = output;
221} 224}
222 225
223s32 Thread::GetSynchronizationObjectIndex(std::shared_ptr<SynchronizationObject> object) const { 226s32 Thread::GetSynchronizationObjectIndex(std::shared_ptr<SynchronizationObject> object) const {
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 129e7858a..23fdef8a4 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -102,7 +102,8 @@ public:
102 102
103 using MutexWaitingThreads = std::vector<std::shared_ptr<Thread>>; 103 using MutexWaitingThreads = std::vector<std::shared_ptr<Thread>>;
104 104
105 using ThreadContext = Core::ARM_Interface::ThreadContext; 105 using ThreadContext32 = Core::ARM_Interface::ThreadContext32;
106 using ThreadContext64 = Core::ARM_Interface::ThreadContext64;
106 107
107 using ThreadSynchronizationObjects = std::vector<std::shared_ptr<SynchronizationObject>>; 108 using ThreadSynchronizationObjects = std::vector<std::shared_ptr<SynchronizationObject>>;
108 109
@@ -273,12 +274,20 @@ public:
273 return status == ThreadStatus::WaitSynch; 274 return status == ThreadStatus::WaitSynch;
274 } 275 }
275 276
276 ThreadContext& GetContext() { 277 ThreadContext32& GetContext32() {
277 return context; 278 return context_32;
278 } 279 }
279 280
280 const ThreadContext& GetContext() const { 281 const ThreadContext32& GetContext32() const {
281 return context; 282 return context_32;
283 }
284
285 ThreadContext64& GetContext64() {
286 return context_64;
287 }
288
289 const ThreadContext64& GetContext64() const {
290 return context_64;
282 } 291 }
283 292
284 ThreadStatus GetStatus() const { 293 ThreadStatus GetStatus() const {
@@ -466,7 +475,8 @@ private:
466 void AdjustSchedulingOnPriority(u32 old_priority); 475 void AdjustSchedulingOnPriority(u32 old_priority);
467 void AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core); 476 void AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core);
468 477
469 Core::ARM_Interface::ThreadContext context{}; 478 ThreadContext32 context_32{};
479 ThreadContext64 context_64{};
470 480
471 u64 thread_id = 0; 481 u64 thread_id = 0;
472 482
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index cc978713b..d1bf13c89 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -607,7 +607,7 @@ ICommonStateGetter::ICommonStateGetter(Core::System& system,
607 {40, nullptr, "GetCradleFwVersion"}, 607 {40, nullptr, "GetCradleFwVersion"},
608 {50, nullptr, "IsVrModeEnabled"}, 608 {50, nullptr, "IsVrModeEnabled"},
609 {51, nullptr, "SetVrModeEnabled"}, 609 {51, nullptr, "SetVrModeEnabled"},
610 {52, nullptr, "SwitchLcdBacklight"}, 610 {52, &ICommonStateGetter::SetLcdBacklighOffEnabled, "SetLcdBacklighOffEnabled"},
611 {53, nullptr, "BeginVrModeEx"}, 611 {53, nullptr, "BeginVrModeEx"},
612 {54, nullptr, "EndVrModeEx"}, 612 {54, nullptr, "EndVrModeEx"},
613 {55, nullptr, "IsInControllerFirmwareUpdateSection"}, 613 {55, nullptr, "IsInControllerFirmwareUpdateSection"},
@@ -636,7 +636,6 @@ void ICommonStateGetter::GetBootMode(Kernel::HLERequestContext& ctx) {
636 636
637 IPC::ResponseBuilder rb{ctx, 3}; 637 IPC::ResponseBuilder rb{ctx, 3};
638 rb.Push(RESULT_SUCCESS); 638 rb.Push(RESULT_SUCCESS);
639
640 rb.Push<u8>(static_cast<u8>(Service::PM::SystemBootMode::Normal)); // Normal boot mode 639 rb.Push<u8>(static_cast<u8>(Service::PM::SystemBootMode::Normal)); // Normal boot mode
641} 640}
642 641
@@ -660,6 +659,7 @@ void ICommonStateGetter::ReceiveMessage(Kernel::HLERequestContext& ctx) {
660 rb.PushEnum<AppletMessageQueue::AppletMessage>(message); 659 rb.PushEnum<AppletMessageQueue::AppletMessage>(message);
661 return; 660 return;
662 } 661 }
662
663 rb.Push(RESULT_SUCCESS); 663 rb.Push(RESULT_SUCCESS);
664 rb.PushEnum<AppletMessageQueue::AppletMessage>(message); 664 rb.PushEnum<AppletMessageQueue::AppletMessage>(message);
665} 665}
@@ -672,6 +672,17 @@ void ICommonStateGetter::GetCurrentFocusState(Kernel::HLERequestContext& ctx) {
672 rb.Push(static_cast<u8>(FocusState::InFocus)); 672 rb.Push(static_cast<u8>(FocusState::InFocus));
673} 673}
674 674
675void ICommonStateGetter::SetLcdBacklighOffEnabled(Kernel::HLERequestContext& ctx) {
676 IPC::RequestParser rp{ctx};
677 const auto is_lcd_backlight_off_enabled = rp.Pop<bool>();
678
679 LOG_WARNING(Service_AM, "(STUBBED) called. is_lcd_backlight_off_enabled={}",
680 is_lcd_backlight_off_enabled);
681
682 IPC::ResponseBuilder rb{ctx, 2};
683 rb.Push(RESULT_SUCCESS);
684}
685
675void ICommonStateGetter::GetDefaultDisplayResolutionChangeEvent(Kernel::HLERequestContext& ctx) { 686void ICommonStateGetter::GetDefaultDisplayResolutionChangeEvent(Kernel::HLERequestContext& ctx) {
676 LOG_DEBUG(Service_AM, "called"); 687 LOG_DEBUG(Service_AM, "called");
677 688
diff --git a/src/core/hle/service/am/am.h b/src/core/hle/service/am/am.h
index 0b9a4332d..0843de781 100644
--- a/src/core/hle/service/am/am.h
+++ b/src/core/hle/service/am/am.h
@@ -182,6 +182,7 @@ private:
182 void GetOperationMode(Kernel::HLERequestContext& ctx); 182 void GetOperationMode(Kernel::HLERequestContext& ctx);
183 void GetPerformanceMode(Kernel::HLERequestContext& ctx); 183 void GetPerformanceMode(Kernel::HLERequestContext& ctx);
184 void GetBootMode(Kernel::HLERequestContext& ctx); 184 void GetBootMode(Kernel::HLERequestContext& ctx);
185 void SetLcdBacklighOffEnabled(Kernel::HLERequestContext& ctx);
185 void GetDefaultDisplayResolution(Kernel::HLERequestContext& ctx); 186 void GetDefaultDisplayResolution(Kernel::HLERequestContext& ctx);
186 void SetCpuBoostMode(Kernel::HLERequestContext& ctx); 187 void SetCpuBoostMode(Kernel::HLERequestContext& ctx);
187 188
diff --git a/src/core/hle/service/am/applets/web_browser.cpp b/src/core/hle/service/am/applets/web_browser.cpp
index 12443c910..9f30e167d 100644
--- a/src/core/hle/service/am/applets/web_browser.cpp
+++ b/src/core/hle/service/am/applets/web_browser.cpp
@@ -254,6 +254,12 @@ void WebBrowser::Execute() {
254 254
255 if (status != RESULT_SUCCESS) { 255 if (status != RESULT_SUCCESS) {
256 complete = true; 256 complete = true;
257
258 // This is a workaround in order not to softlock yuzu when an error happens during the
259 // webapplet init. In order to avoid an svcBreak, the status is set to RESULT_SUCCESS
260 Finalize();
261 status = RESULT_SUCCESS;
262
257 return; 263 return;
258 } 264 }
259 265
diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp
index 15c09f04c..c1e32b28c 100644
--- a/src/core/hle/service/hid/controllers/npad.cpp
+++ b/src/core/hle/service/hid/controllers/npad.cpp
@@ -287,13 +287,13 @@ void Controller_NPad::RequestPadStateUpdate(u32 npad_id) {
287 analog_state[static_cast<std::size_t>(JoystickId::Joystick_Left)]->GetAnalogDirectionStatus( 287 analog_state[static_cast<std::size_t>(JoystickId::Joystick_Left)]->GetAnalogDirectionStatus(
288 Input::AnalogDirection::DOWN)); 288 Input::AnalogDirection::DOWN));
289 289
290 pad_state.r_stick_up.Assign(analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)]
291 ->GetAnalogDirectionStatus(Input::AnalogDirection::RIGHT));
292 pad_state.r_stick_left.Assign(analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)]
293 ->GetAnalogDirectionStatus(Input::AnalogDirection::LEFT));
294 pad_state.r_stick_right.Assign( 290 pad_state.r_stick_right.Assign(
295 analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)] 291 analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)]
296 ->GetAnalogDirectionStatus(Input::AnalogDirection::UP)); 292 ->GetAnalogDirectionStatus(Input::AnalogDirection::RIGHT));
293 pad_state.r_stick_left.Assign(analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)]
294 ->GetAnalogDirectionStatus(Input::AnalogDirection::LEFT));
295 pad_state.r_stick_up.Assign(analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)]
296 ->GetAnalogDirectionStatus(Input::AnalogDirection::UP));
297 pad_state.r_stick_down.Assign(analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)] 297 pad_state.r_stick_down.Assign(analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)]
298 ->GetAnalogDirectionStatus(Input::AnalogDirection::DOWN)); 298 ->GetAnalogDirectionStatus(Input::AnalogDirection::DOWN));
299 299
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index 134152210..437bc5dee 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -191,8 +191,6 @@ void NVFlinger::Compose() {
191 // Search for a queued buffer and acquire it 191 // Search for a queued buffer and acquire it
192 auto buffer = buffer_queue.AcquireBuffer(); 192 auto buffer = buffer_queue.AcquireBuffer();
193 193
194 MicroProfileFlip();
195
196 if (!buffer) { 194 if (!buffer) {
197 continue; 195 continue;
198 } 196 }
@@ -206,6 +204,8 @@ void NVFlinger::Compose() {
206 gpu.WaitFence(fence.id, fence.value); 204 gpu.WaitFence(fence.id, fence.value);
207 } 205 }
208 206
207 MicroProfileFlip();
208
209 // Now send the buffer to the GPU for drawing. 209 // Now send the buffer to the GPU for drawing.
210 // TODO(Subv): Support more than just disp0. The display device selection is probably based 210 // TODO(Subv): Support more than just disp0. The display device selection is probably based
211 // on which display we're drawing (Default, Internal, External, etc) 211 // on which display we're drawing (Default, Internal, External, etc)
diff --git a/src/core/hle/service/set/set.cpp b/src/core/hle/service/set/set.cpp
index 5bcc0b588..9e12c76fc 100644
--- a/src/core/hle/service/set/set.cpp
+++ b/src/core/hle/service/set/set.cpp
@@ -111,6 +111,14 @@ void SET::GetLanguageCode(Kernel::HLERequestContext& ctx) {
111 rb.PushEnum(available_language_codes[Settings::values.language_index]); 111 rb.PushEnum(available_language_codes[Settings::values.language_index]);
112} 112}
113 113
114void SET::GetRegionCode(Kernel::HLERequestContext& ctx) {
115 LOG_DEBUG(Service_SET, "called");
116
117 IPC::ResponseBuilder rb{ctx, 3};
118 rb.Push(RESULT_SUCCESS);
119 rb.Push(Settings::values.region_index);
120}
121
114SET::SET() : ServiceFramework("set") { 122SET::SET() : ServiceFramework("set") {
115 // clang-format off 123 // clang-format off
116 static const FunctionInfo functions[] = { 124 static const FunctionInfo functions[] = {
@@ -118,7 +126,7 @@ SET::SET() : ServiceFramework("set") {
118 {1, &SET::GetAvailableLanguageCodes, "GetAvailableLanguageCodes"}, 126 {1, &SET::GetAvailableLanguageCodes, "GetAvailableLanguageCodes"},
119 {2, &SET::MakeLanguageCode, "MakeLanguageCode"}, 127 {2, &SET::MakeLanguageCode, "MakeLanguageCode"},
120 {3, &SET::GetAvailableLanguageCodeCount, "GetAvailableLanguageCodeCount"}, 128 {3, &SET::GetAvailableLanguageCodeCount, "GetAvailableLanguageCodeCount"},
121 {4, nullptr, "GetRegionCode"}, 129 {4, &SET::GetRegionCode, "GetRegionCode"},
122 {5, &SET::GetAvailableLanguageCodes2, "GetAvailableLanguageCodes2"}, 130 {5, &SET::GetAvailableLanguageCodes2, "GetAvailableLanguageCodes2"},
123 {6, &SET::GetAvailableLanguageCodeCount2, "GetAvailableLanguageCodeCount2"}, 131 {6, &SET::GetAvailableLanguageCodeCount2, "GetAvailableLanguageCodeCount2"},
124 {7, nullptr, "GetKeyCodeMap"}, 132 {7, nullptr, "GetKeyCodeMap"},
diff --git a/src/core/hle/service/set/set.h b/src/core/hle/service/set/set.h
index b154e08aa..6084b345d 100644
--- a/src/core/hle/service/set/set.h
+++ b/src/core/hle/service/set/set.h
@@ -43,6 +43,7 @@ private:
43 void GetAvailableLanguageCodeCount(Kernel::HLERequestContext& ctx); 43 void GetAvailableLanguageCodeCount(Kernel::HLERequestContext& ctx);
44 void GetAvailableLanguageCodeCount2(Kernel::HLERequestContext& ctx); 44 void GetAvailableLanguageCodeCount2(Kernel::HLERequestContext& ctx);
45 void GetQuestFlag(Kernel::HLERequestContext& ctx); 45 void GetQuestFlag(Kernel::HLERequestContext& ctx);
46 void GetRegionCode(Kernel::HLERequestContext& ctx);
46}; 47};
47 48
48} // namespace Service::Set 49} // namespace Service::Set
diff --git a/src/core/hle/service/sm/controller.cpp b/src/core/hle/service/sm/controller.cpp
index c45b285f8..9cca84b31 100644
--- a/src/core/hle/service/sm/controller.cpp
+++ b/src/core/hle/service/sm/controller.cpp
@@ -44,7 +44,7 @@ void Controller::QueryPointerBufferSize(Kernel::HLERequestContext& ctx) {
44 44
45 IPC::ResponseBuilder rb{ctx, 3}; 45 IPC::ResponseBuilder rb{ctx, 3};
46 rb.Push(RESULT_SUCCESS); 46 rb.Push(RESULT_SUCCESS);
47 rb.Push<u16>(0x500); 47 rb.Push<u16>(0x1000);
48} 48}
49 49
50Controller::Controller() : ServiceFramework("IpcController") { 50Controller::Controller() : ServiceFramework("IpcController") {
diff --git a/src/core/hle/service/time/time_zone_content_manager.cpp b/src/core/hle/service/time/time_zone_content_manager.cpp
index 57b1a2bca..78d4acd95 100644
--- a/src/core/hle/service/time/time_zone_content_manager.cpp
+++ b/src/core/hle/service/time/time_zone_content_manager.cpp
@@ -53,7 +53,7 @@ static std::vector<std::string> BuildLocationNameCache(Core::System& system) {
53 return {}; 53 return {};
54 } 54 }
55 55
56 std::vector<char> raw_data(binary_list->GetSize()); 56 std::vector<char> raw_data(binary_list->GetSize() + 1);
57 binary_list->ReadBytes<char>(raw_data.data(), binary_list->GetSize()); 57 binary_list->ReadBytes<char>(raw_data.data(), binary_list->GetSize());
58 58
59 std::stringstream data_stream{raw_data.data()}; 59 std::stringstream data_stream{raw_data.data()};
diff --git a/src/core/loader/deconstructed_rom_directory.cpp b/src/core/loader/deconstructed_rom_directory.cpp
index d19c3623c..53559e8b1 100644
--- a/src/core/loader/deconstructed_rom_directory.cpp
+++ b/src/core/loader/deconstructed_rom_directory.cpp
@@ -129,12 +129,6 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect
129 } 129 }
130 metadata.Print(); 130 metadata.Print();
131 131
132 const FileSys::ProgramAddressSpaceType arch_bits{metadata.GetAddressSpaceType()};
133 if (arch_bits == FileSys::ProgramAddressSpaceType::Is32Bit ||
134 arch_bits == FileSys::ProgramAddressSpaceType::Is32BitNoMap) {
135 return {ResultStatus::Error32BitISA, {}};
136 }
137
138 if (process.LoadFromMetadata(metadata).IsError()) { 132 if (process.LoadFromMetadata(metadata).IsError()) {
139 return {ResultStatus::ErrorUnableToParseKernelMetadata, {}}; 133 return {ResultStatus::ErrorUnableToParseKernelMetadata, {}};
140 } 134 }
diff --git a/src/core/reporter.cpp b/src/core/reporter.cpp
index f95eee3b1..85ac81ef7 100644
--- a/src/core/reporter.cpp
+++ b/src/core/reporter.cpp
@@ -111,7 +111,7 @@ json GetProcessorStateDataAuto(Core::System& system) {
111 const auto& vm_manager{process->VMManager()}; 111 const auto& vm_manager{process->VMManager()};
112 auto& arm{system.CurrentArmInterface()}; 112 auto& arm{system.CurrentArmInterface()};
113 113
114 Core::ARM_Interface::ThreadContext context{}; 114 Core::ARM_Interface::ThreadContext64 context{};
115 arm.SaveContext(context); 115 arm.SaveContext(context);
116 116
117 return GetProcessorStateData(process->Is64BitProcess() ? "AArch64" : "AArch32", 117 return GetProcessorStateData(process->Is64BitProcess() ? "AArch64" : "AArch32",
diff --git a/src/core/settings.cpp b/src/core/settings.cpp
index d1fc94060..c1282cb80 100644
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -86,6 +86,7 @@ void LogSettings() {
86 LogSetting("System_RngSeed", Settings::values.rng_seed.value_or(0)); 86 LogSetting("System_RngSeed", Settings::values.rng_seed.value_or(0));
87 LogSetting("System_CurrentUser", Settings::values.current_user); 87 LogSetting("System_CurrentUser", Settings::values.current_user);
88 LogSetting("System_LanguageIndex", Settings::values.language_index); 88 LogSetting("System_LanguageIndex", Settings::values.language_index);
89 LogSetting("System_RegionIndex", Settings::values.region_index);
89 LogSetting("Core_UseMultiCore", Settings::values.use_multi_core); 90 LogSetting("Core_UseMultiCore", Settings::values.use_multi_core);
90 LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor); 91 LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor);
91 LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit); 92 LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit);
@@ -94,6 +95,7 @@ void LogSettings() {
94 LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation); 95 LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation);
95 LogSetting("Renderer_UseAsynchronousGpuEmulation", 96 LogSetting("Renderer_UseAsynchronousGpuEmulation",
96 Settings::values.use_asynchronous_gpu_emulation); 97 Settings::values.use_asynchronous_gpu_emulation);
98 LogSetting("Renderer_UseVsync", Settings::values.use_vsync);
97 LogSetting("Audio_OutputEngine", Settings::values.sink_id); 99 LogSetting("Audio_OutputEngine", Settings::values.sink_id);
98 LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching); 100 LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching);
99 LogSetting("Audio_OutputDevice", Settings::values.audio_device_id); 101 LogSetting("Audio_OutputDevice", Settings::values.audio_device_id);
diff --git a/src/core/settings.h b/src/core/settings.h
index f837d3fbc..79ec01731 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -387,6 +387,8 @@ struct Values {
387 387
388 s32 current_user; 388 s32 current_user;
389 s32 language_index; 389 s32 language_index;
390 s32 region_index;
391 s32 sound_index;
390 392
391 // Controls 393 // Controls
392 std::array<PlayerInput, 10> players; 394 std::array<PlayerInput, 10> players;
@@ -430,11 +432,13 @@ struct Values {
430 432
431 float resolution_factor; 433 float resolution_factor;
432 int aspect_ratio; 434 int aspect_ratio;
435 int max_anisotropy;
433 bool use_frame_limit; 436 bool use_frame_limit;
434 u16 frame_limit; 437 u16 frame_limit;
435 bool use_disk_shader_cache; 438 bool use_disk_shader_cache;
436 bool use_accurate_gpu_emulation; 439 bool use_accurate_gpu_emulation;
437 bool use_asynchronous_gpu_emulation; 440 bool use_asynchronous_gpu_emulation;
441 bool use_vsync;
438 bool force_30fps_mode; 442 bool force_30fps_mode;
439 443
440 float bg_red; 444 float bg_red;
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp
index 0e72d31cd..0f3685d1c 100644
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -188,6 +188,7 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) {
188 Settings::values.use_accurate_gpu_emulation); 188 Settings::values.use_accurate_gpu_emulation);
189 AddField(field_type, "Renderer_UseAsynchronousGpuEmulation", 189 AddField(field_type, "Renderer_UseAsynchronousGpuEmulation",
190 Settings::values.use_asynchronous_gpu_emulation); 190 Settings::values.use_asynchronous_gpu_emulation);
191 AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync);
191 AddField(field_type, "System_UseDockedMode", Settings::values.use_docked_mode); 192 AddField(field_type, "System_UseDockedMode", Settings::values.use_docked_mode);
192} 193}
193 194
diff --git a/src/input_common/analog_from_button.cpp b/src/input_common/analog_from_button.cpp
index e1a260762..6cabdaa3c 100755
--- a/src/input_common/analog_from_button.cpp
+++ b/src/input_common/analog_from_button.cpp
@@ -34,6 +34,20 @@ public:
34 y * coef * (x == 0 ? 1.0f : SQRT_HALF)); 34 y * coef * (x == 0 ? 1.0f : SQRT_HALF));
35 } 35 }
36 36
37 bool GetAnalogDirectionStatus(Input::AnalogDirection direction) const override {
38 switch (direction) {
39 case Input::AnalogDirection::RIGHT:
40 return right->GetStatus();
41 case Input::AnalogDirection::LEFT:
42 return left->GetStatus();
43 case Input::AnalogDirection::UP:
44 return up->GetStatus();
45 case Input::AnalogDirection::DOWN:
46 return down->GetStatus();
47 }
48 return false;
49 }
50
37private: 51private:
38 Button up; 52 Button up;
39 Button down; 53 Button down;
diff --git a/src/input_common/udp/client.cpp b/src/input_common/udp/client.cpp
index 2228571a6..da5227058 100644
--- a/src/input_common/udp/client.cpp
+++ b/src/input_common/udp/client.cpp
@@ -32,8 +32,16 @@ public:
32 SocketCallback callback) 32 SocketCallback callback)
33 : callback(std::move(callback)), timer(io_service), 33 : callback(std::move(callback)), timer(io_service),
34 socket(io_service, udp::endpoint(udp::v4(), 0)), client_id(client_id), 34 socket(io_service, udp::endpoint(udp::v4(), 0)), client_id(client_id),
35 pad_index(pad_index), 35 pad_index(pad_index) {
36 send_endpoint(udp::endpoint(boost::asio::ip::make_address_v4(host), port)) {} 36 boost::system::error_code ec{};
37 auto ipv4 = boost::asio::ip::make_address_v4(host, ec);
38 if (ec.value() != boost::system::errc::success) {
39 LOG_ERROR(Input, "Invalid IPv4 address \"{}\" provided to socket", host);
40 ipv4 = boost::asio::ip::address_v4{};
41 }
42
43 send_endpoint = {udp::endpoint(ipv4, port)};
44 }
37 45
38 void Stop() { 46 void Stop() {
39 io_service.stop(); 47 io_service.stop();
@@ -85,17 +93,18 @@ private:
85 } 93 }
86 94
87 void HandleSend(const boost::system::error_code& error) { 95 void HandleSend(const boost::system::error_code& error) {
96 boost::system::error_code _ignored{};
88 // Send a request for getting port info for the pad 97 // Send a request for getting port info for the pad
89 Request::PortInfo port_info{1, {pad_index, 0, 0, 0}}; 98 Request::PortInfo port_info{1, {pad_index, 0, 0, 0}};
90 const auto port_message = Request::Create(port_info, client_id); 99 const auto port_message = Request::Create(port_info, client_id);
91 std::memcpy(&send_buffer1, &port_message, PORT_INFO_SIZE); 100 std::memcpy(&send_buffer1, &port_message, PORT_INFO_SIZE);
92 socket.send_to(boost::asio::buffer(send_buffer1), send_endpoint); 101 socket.send_to(boost::asio::buffer(send_buffer1), send_endpoint, {}, _ignored);
93 102
94 // Send a request for getting pad data for the pad 103 // Send a request for getting pad data for the pad
95 Request::PadData pad_data{Request::PadData::Flags::Id, pad_index, EMPTY_MAC_ADDRESS}; 104 Request::PadData pad_data{Request::PadData::Flags::Id, pad_index, EMPTY_MAC_ADDRESS};
96 const auto pad_message = Request::Create(pad_data, client_id); 105 const auto pad_message = Request::Create(pad_data, client_id);
97 std::memcpy(send_buffer2.data(), &pad_message, PAD_DATA_SIZE); 106 std::memcpy(send_buffer2.data(), &pad_message, PAD_DATA_SIZE);
98 socket.send_to(boost::asio::buffer(send_buffer2), send_endpoint); 107 socket.send_to(boost::asio::buffer(send_buffer2), send_endpoint, {}, _ignored);
99 StartSend(timer.expiry()); 108 StartSend(timer.expiry());
100 } 109 }
101 110
diff --git a/src/input_common/udp/protocol.cpp b/src/input_common/udp/protocol.cpp
index a982ac49d..5e50bd612 100644
--- a/src/input_common/udp/protocol.cpp
+++ b/src/input_common/udp/protocol.cpp
@@ -31,7 +31,6 @@ namespace Response {
31 */ 31 */
32std::optional<Type> Validate(u8* data, std::size_t size) { 32std::optional<Type> Validate(u8* data, std::size_t size) {
33 if (size < sizeof(Header)) { 33 if (size < sizeof(Header)) {
34 LOG_DEBUG(Input, "Invalid UDP packet received");
35 return std::nullopt; 34 return std::nullopt;
36 } 35 }
37 Header header{}; 36 Header header{};
diff --git a/src/input_common/udp/udp.cpp b/src/input_common/udp/udp.cpp
index ca99cc22f..8c6ef1394 100644
--- a/src/input_common/udp/udp.cpp
+++ b/src/input_common/udp/udp.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <mutex> 5#include <mutex>
6#include <optional>
6#include <tuple> 7#include <tuple>
7 8
8#include "common/param_package.h" 9#include "common/param_package.h"
@@ -44,7 +45,7 @@ public:
44 std::unique_ptr<Input::TouchDevice> Create(const Common::ParamPackage& params) override { 45 std::unique_ptr<Input::TouchDevice> Create(const Common::ParamPackage& params) override {
45 { 46 {
46 std::lock_guard guard(status->update_mutex); 47 std::lock_guard guard(status->update_mutex);
47 status->touch_calibration.emplace(); 48 status->touch_calibration = DeviceStatus::CalibrationData{};
48 // These default values work well for DS4 but probably not other touch inputs 49 // These default values work well for DS4 but probably not other touch inputs
49 status->touch_calibration->min_x = params.Get("min_x", 100); 50 status->touch_calibration->min_x = params.Get("min_x", 100);
50 status->touch_calibration->min_y = params.Get("min_y", 50); 51 status->touch_calibration->min_y = params.Get("min_y", 50);
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 4b0c6346f..91df062d7 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -2,6 +2,8 @@ add_library(video_core STATIC
2 buffer_cache/buffer_block.h 2 buffer_cache/buffer_block.h
3 buffer_cache/buffer_cache.h 3 buffer_cache/buffer_cache.h
4 buffer_cache/map_interval.h 4 buffer_cache/map_interval.h
5 dirty_flags.cpp
6 dirty_flags.h
5 dma_pusher.cpp 7 dma_pusher.cpp
6 dma_pusher.h 8 dma_pusher.h
7 engines/const_buffer_engine_interface.h 9 engines/const_buffer_engine_interface.h
@@ -63,14 +65,12 @@ add_library(video_core STATIC
63 renderer_opengl/gl_shader_decompiler.h 65 renderer_opengl/gl_shader_decompiler.h
64 renderer_opengl/gl_shader_disk_cache.cpp 66 renderer_opengl/gl_shader_disk_cache.cpp
65 renderer_opengl/gl_shader_disk_cache.h 67 renderer_opengl/gl_shader_disk_cache.h
66 renderer_opengl/gl_shader_gen.cpp
67 renderer_opengl/gl_shader_gen.h
68 renderer_opengl/gl_shader_manager.cpp 68 renderer_opengl/gl_shader_manager.cpp
69 renderer_opengl/gl_shader_manager.h 69 renderer_opengl/gl_shader_manager.h
70 renderer_opengl/gl_shader_util.cpp 70 renderer_opengl/gl_shader_util.cpp
71 renderer_opengl/gl_shader_util.h 71 renderer_opengl/gl_shader_util.h
72 renderer_opengl/gl_state.cpp 72 renderer_opengl/gl_state_tracker.cpp
73 renderer_opengl/gl_state.h 73 renderer_opengl/gl_state_tracker.h
74 renderer_opengl/gl_stream_buffer.cpp 74 renderer_opengl/gl_stream_buffer.cpp
75 renderer_opengl/gl_stream_buffer.h 75 renderer_opengl/gl_stream_buffer.h
76 renderer_opengl/gl_texture_cache.cpp 76 renderer_opengl/gl_texture_cache.cpp
@@ -116,8 +116,6 @@ add_library(video_core STATIC
116 shader/ast.h 116 shader/ast.h
117 shader/compiler_settings.cpp 117 shader/compiler_settings.cpp
118 shader/compiler_settings.h 118 shader/compiler_settings.h
119 shader/const_buffer_locker.cpp
120 shader/const_buffer_locker.h
121 shader/control_flow.cpp 119 shader/control_flow.cpp
122 shader/control_flow.h 120 shader/control_flow.h
123 shader/decode.cpp 121 shader/decode.cpp
@@ -126,9 +124,13 @@ add_library(video_core STATIC
126 shader/node_helper.cpp 124 shader/node_helper.cpp
127 shader/node_helper.h 125 shader/node_helper.h
128 shader/node.h 126 shader/node.h
127 shader/registry.cpp
128 shader/registry.h
129 shader/shader_ir.cpp 129 shader/shader_ir.cpp
130 shader/shader_ir.h 130 shader/shader_ir.h
131 shader/track.cpp 131 shader/track.cpp
132 shader/transform_feedback.cpp
133 shader/transform_feedback.h
132 surface.cpp 134 surface.cpp
133 surface.h 135 surface.h
134 texture_cache/format_lookup_table.cpp 136 texture_cache/format_lookup_table.cpp
@@ -198,6 +200,8 @@ if (ENABLE_VULKAN)
198 renderer_vulkan/vk_shader_util.h 200 renderer_vulkan/vk_shader_util.h
199 renderer_vulkan/vk_staging_buffer_pool.cpp 201 renderer_vulkan/vk_staging_buffer_pool.cpp
200 renderer_vulkan/vk_staging_buffer_pool.h 202 renderer_vulkan/vk_staging_buffer_pool.h
203 renderer_vulkan/vk_state_tracker.cpp
204 renderer_vulkan/vk_state_tracker.h
201 renderer_vulkan/vk_stream_buffer.cpp 205 renderer_vulkan/vk_stream_buffer.cpp
202 renderer_vulkan/vk_stream_buffer.h 206 renderer_vulkan/vk_stream_buffer.h
203 renderer_vulkan/vk_swapchain.cpp 207 renderer_vulkan/vk_swapchain.cpp
diff --git a/src/video_core/dirty_flags.cpp b/src/video_core/dirty_flags.cpp
new file mode 100644
index 000000000..e16075993
--- /dev/null
+++ b/src/video_core/dirty_flags.cpp
@@ -0,0 +1,38 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <cstddef>
7
8#include "common/common_types.h"
9#include "video_core/dirty_flags.h"
10
11#define OFF(field_name) MAXWELL3D_REG_INDEX(field_name)
12#define NUM(field_name) (sizeof(::Tegra::Engines::Maxwell3D::Regs::field_name) / sizeof(u32))
13
14namespace VideoCommon::Dirty {
15
16using Tegra::Engines::Maxwell3D;
17
18void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) {
19 static constexpr std::size_t num_per_rt = NUM(rt[0]);
20 static constexpr std::size_t begin = OFF(rt);
21 static constexpr std::size_t num = num_per_rt * Maxwell3D::Regs::NumRenderTargets;
22 for (std::size_t rt = 0; rt < Maxwell3D::Regs::NumRenderTargets; ++rt) {
23 FillBlock(tables[0], begin + rt * num_per_rt, num_per_rt, ColorBuffer0 + rt);
24 }
25 FillBlock(tables[1], begin, num, RenderTargets);
26
27 static constexpr std::array zeta_flags{ZetaBuffer, RenderTargets};
28 for (std::size_t i = 0; i < std::size(zeta_flags); ++i) {
29 const u8 flag = zeta_flags[i];
30 auto& table = tables[i];
31 table[OFF(zeta_enable)] = flag;
32 table[OFF(zeta_width)] = flag;
33 table[OFF(zeta_height)] = flag;
34 FillBlock(table, OFF(zeta), NUM(zeta), flag);
35 }
36}
37
38} // namespace VideoCommon::Dirty
diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h
new file mode 100644
index 000000000..3f6c1d83a
--- /dev/null
+++ b/src/video_core/dirty_flags.h
@@ -0,0 +1,49 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <algorithm>
8#include <cstddef>
9#include <iterator>
10
11#include "common/common_types.h"
12#include "video_core/engines/maxwell_3d.h"
13
14namespace VideoCommon::Dirty {
15
16enum : u8 {
17 NullEntry = 0,
18
19 RenderTargets,
20 ColorBuffer0,
21 ColorBuffer1,
22 ColorBuffer2,
23 ColorBuffer3,
24 ColorBuffer4,
25 ColorBuffer5,
26 ColorBuffer6,
27 ColorBuffer7,
28 ZetaBuffer,
29
30 LastCommonEntry,
31};
32
33template <typename Integer>
34void FillBlock(Tegra::Engines::Maxwell3D::DirtyState::Table& table, std::size_t begin,
35 std::size_t num, Integer dirty_index) {
36 const auto it = std::begin(table) + begin;
37 std::fill(it, it + num, static_cast<u8>(dirty_index));
38}
39
40template <typename Integer1, typename Integer2>
41void FillBlock(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables, std::size_t begin,
42 std::size_t num, Integer1 index_a, Integer2 index_b) {
43 FillBlock(tables[0], begin, num, index_a);
44 FillBlock(tables[1], begin, num, index_b);
45}
46
47void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables);
48
49} // namespace VideoCommon::Dirty
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 0094fd715..713c14182 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -22,7 +22,7 @@ void DmaPusher::DispatchCalls() {
22 MICROPROFILE_SCOPE(DispatchCalls); 22 MICROPROFILE_SCOPE(DispatchCalls);
23 23
24 // On entering GPU code, assume all memory may be touched by the ARM core. 24 // On entering GPU code, assume all memory may be touched by the ARM core.
25 gpu.Maxwell3D().dirty.OnMemoryWrite(); 25 gpu.Maxwell3D().OnMemoryWrite();
26 26
27 dma_pushbuffer_subindex = 0; 27 dma_pushbuffer_subindex = 0;
28 28
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h
index d56a47710..724ee0fd6 100644
--- a/src/video_core/engines/const_buffer_engine_interface.h
+++ b/src/video_core/engines/const_buffer_engine_interface.h
@@ -16,11 +16,12 @@ namespace Tegra::Engines {
16 16
17struct SamplerDescriptor { 17struct SamplerDescriptor {
18 union { 18 union {
19 BitField<0, 20, Tegra::Shader::TextureType> texture_type; 19 u32 raw = 0;
20 BitField<20, 1, u32> is_array; 20 BitField<0, 2, Tegra::Shader::TextureType> texture_type;
21 BitField<21, 1, u32> is_buffer; 21 BitField<2, 3, Tegra::Texture::ComponentType> component_type;
22 BitField<22, 1, u32> is_shadow; 22 BitField<5, 1, u32> is_array;
23 u32 raw{}; 23 BitField<6, 1, u32> is_buffer;
24 BitField<7, 1, u32> is_shadow;
24 }; 25 };
25 26
26 bool operator==(const SamplerDescriptor& rhs) const noexcept { 27 bool operator==(const SamplerDescriptor& rhs) const noexcept {
@@ -31,68 +32,48 @@ struct SamplerDescriptor {
31 return !operator==(rhs); 32 return !operator==(rhs);
32 } 33 }
33 34
34 static SamplerDescriptor FromTicTexture(Tegra::Texture::TextureType tic_texture_type) { 35 static SamplerDescriptor FromTIC(const Tegra::Texture::TICEntry& tic) {
36 using Tegra::Shader::TextureType;
35 SamplerDescriptor result; 37 SamplerDescriptor result;
36 switch (tic_texture_type) { 38
39 // This is going to be used to determine the shading language type.
40 // Because of that we don't care about all component types on color textures.
41 result.component_type.Assign(tic.r_type.Value());
42
43 switch (tic.texture_type.Value()) {
37 case Tegra::Texture::TextureType::Texture1D: 44 case Tegra::Texture::TextureType::Texture1D:
38 result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); 45 result.texture_type.Assign(TextureType::Texture1D);
39 result.is_array.Assign(0);
40 result.is_buffer.Assign(0);
41 result.is_shadow.Assign(0);
42 return result; 46 return result;
43 case Tegra::Texture::TextureType::Texture2D: 47 case Tegra::Texture::TextureType::Texture2D:
44 result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); 48 result.texture_type.Assign(TextureType::Texture2D);
45 result.is_array.Assign(0);
46 result.is_buffer.Assign(0);
47 result.is_shadow.Assign(0);
48 return result; 49 return result;
49 case Tegra::Texture::TextureType::Texture3D: 50 case Tegra::Texture::TextureType::Texture3D:
50 result.texture_type.Assign(Tegra::Shader::TextureType::Texture3D); 51 result.texture_type.Assign(TextureType::Texture3D);
51 result.is_array.Assign(0);
52 result.is_buffer.Assign(0);
53 result.is_shadow.Assign(0);
54 return result; 52 return result;
55 case Tegra::Texture::TextureType::TextureCubemap: 53 case Tegra::Texture::TextureType::TextureCubemap:
56 result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube); 54 result.texture_type.Assign(TextureType::TextureCube);
57 result.is_array.Assign(0);
58 result.is_buffer.Assign(0);
59 result.is_shadow.Assign(0);
60 return result; 55 return result;
61 case Tegra::Texture::TextureType::Texture1DArray: 56 case Tegra::Texture::TextureType::Texture1DArray:
62 result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); 57 result.texture_type.Assign(TextureType::Texture1D);
63 result.is_array.Assign(1); 58 result.is_array.Assign(1);
64 result.is_buffer.Assign(0);
65 result.is_shadow.Assign(0);
66 return result; 59 return result;
67 case Tegra::Texture::TextureType::Texture2DArray: 60 case Tegra::Texture::TextureType::Texture2DArray:
68 result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); 61 result.texture_type.Assign(TextureType::Texture2D);
69 result.is_array.Assign(1); 62 result.is_array.Assign(1);
70 result.is_buffer.Assign(0);
71 result.is_shadow.Assign(0);
72 return result; 63 return result;
73 case Tegra::Texture::TextureType::Texture1DBuffer: 64 case Tegra::Texture::TextureType::Texture1DBuffer:
74 result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); 65 result.texture_type.Assign(TextureType::Texture1D);
75 result.is_array.Assign(0);
76 result.is_buffer.Assign(1); 66 result.is_buffer.Assign(1);
77 result.is_shadow.Assign(0);
78 return result; 67 return result;
79 case Tegra::Texture::TextureType::Texture2DNoMipmap: 68 case Tegra::Texture::TextureType::Texture2DNoMipmap:
80 result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); 69 result.texture_type.Assign(TextureType::Texture2D);
81 result.is_array.Assign(0);
82 result.is_buffer.Assign(0);
83 result.is_shadow.Assign(0);
84 return result; 70 return result;
85 case Tegra::Texture::TextureType::TextureCubeArray: 71 case Tegra::Texture::TextureType::TextureCubeArray:
86 result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube); 72 result.texture_type.Assign(TextureType::TextureCube);
87 result.is_array.Assign(1); 73 result.is_array.Assign(1);
88 result.is_buffer.Assign(0);
89 result.is_shadow.Assign(0);
90 return result; 74 return result;
91 default: 75 default:
92 result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); 76 result.texture_type.Assign(TextureType::Texture2D);
93 result.is_array.Assign(0);
94 result.is_buffer.Assign(0);
95 result.is_shadow.Assign(0);
96 return result; 77 return result;
97 } 78 }
98 } 79 }
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 4b824aa4e..368c75a66 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -39,7 +39,7 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
39 const bool is_last_call = method_call.IsLastCall(); 39 const bool is_last_call = method_call.IsLastCall();
40 upload_state.ProcessData(method_call.argument, is_last_call); 40 upload_state.ProcessData(method_call.argument, is_last_call);
41 if (is_last_call) { 41 if (is_last_call) {
42 system.GPU().Maxwell3D().dirty.OnMemoryWrite(); 42 system.GPU().Maxwell3D().OnMemoryWrite();
43 } 43 }
44 break; 44 break;
45 } 45 }
@@ -89,7 +89,7 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con
89 89
90 const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; 90 const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
91 const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); 91 const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
92 SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value()); 92 SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
93 result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); 93 result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
94 return result; 94 return result;
95} 95}
@@ -119,14 +119,6 @@ Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const {
119 Texture::TICEntry tic_entry; 119 Texture::TICEntry tic_entry;
120 memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); 120 memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
121 121
122 const auto r_type{tic_entry.r_type.Value()};
123 const auto g_type{tic_entry.g_type.Value()};
124 const auto b_type{tic_entry.b_type.Value()};
125 const auto a_type{tic_entry.a_type.Value()};
126
127 // TODO(Subv): Different data types for separate components are not supported
128 DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type);
129
130 return tic_entry; 122 return tic_entry;
131} 123}
132 124
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index fa4a7c5c1..597872e43 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -34,7 +34,7 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {
34 const bool is_last_call = method_call.IsLastCall(); 34 const bool is_last_call = method_call.IsLastCall();
35 upload_state.ProcessData(method_call.argument, is_last_call); 35 upload_state.ProcessData(method_call.argument, is_last_call);
36 if (is_last_call) { 36 if (is_last_call) {
37 system.GPU().Maxwell3D().dirty.OnMemoryWrite(); 37 system.GPU().Maxwell3D().OnMemoryWrite();
38 } 38 }
39 break; 39 break;
40 } 40 }
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index b28de1092..ba63b44b4 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -26,7 +26,8 @@ Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& raste
26 MemoryManager& memory_manager) 26 MemoryManager& memory_manager)
27 : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, 27 : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager},
28 macro_interpreter{*this}, upload_state{memory_manager, regs.upload} { 28 macro_interpreter{*this}, upload_state{memory_manager, regs.upload} {
29 InitDirtySettings(); 29 dirty.flags.flip();
30
30 InitializeRegisterDefaults(); 31 InitializeRegisterDefaults();
31} 32}
32 33
@@ -75,8 +76,8 @@ void Maxwell3D::InitializeRegisterDefaults() {
75 regs.stencil_back_mask = 0xFFFFFFFF; 76 regs.stencil_back_mask = 0xFFFFFFFF;
76 77
77 regs.depth_test_func = Regs::ComparisonOp::Always; 78 regs.depth_test_func = Regs::ComparisonOp::Always;
78 regs.cull.front_face = Regs::Cull::FrontFace::CounterClockWise; 79 regs.front_face = Regs::FrontFace::CounterClockWise;
79 regs.cull.cull_face = Regs::Cull::CullFace::Back; 80 regs.cull_face = Regs::CullFace::Back;
80 81
81 // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a 82 // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a
82 // register carrying a default value. Assume it's OpenGL's default (1). 83 // register carrying a default value. Assume it's OpenGL's default (1).
@@ -95,7 +96,9 @@ void Maxwell3D::InitializeRegisterDefaults() {
95 regs.rasterize_enable = 1; 96 regs.rasterize_enable = 1;
96 regs.rt_separate_frag_data = 1; 97 regs.rt_separate_frag_data = 1;
97 regs.framebuffer_srgb = 1; 98 regs.framebuffer_srgb = 1;
98 regs.cull.front_face = Maxwell3D::Regs::Cull::FrontFace::ClockWise; 99 regs.front_face = Maxwell3D::Regs::FrontFace::ClockWise;
100
101 shadow_state = regs;
99 102
100 mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_end_gl)] = true; 103 mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_end_gl)] = true;
101 mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_begin_gl)] = true; 104 mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_begin_gl)] = true;
@@ -103,164 +106,6 @@ void Maxwell3D::InitializeRegisterDefaults() {
103 mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true; 106 mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true;
104} 107}
105 108
106#define DIRTY_REGS_POS(field_name) static_cast<u8>(offsetof(Maxwell3D::DirtyRegs, field_name))
107
108void Maxwell3D::InitDirtySettings() {
109 const auto set_block = [this](std::size_t start, std::size_t range, u8 position) {
110 const auto start_itr = dirty_pointers.begin() + start;
111 const auto end_itr = start_itr + range;
112 std::fill(start_itr, end_itr, position);
113 };
114 dirty.regs.fill(true);
115
116 // Init Render Targets
117 constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
118 constexpr u32 rt_start_reg = MAXWELL3D_REG_INDEX(rt);
119 constexpr u32 rt_end_reg = rt_start_reg + registers_per_rt * 8;
120 u8 rt_dirty_reg = DIRTY_REGS_POS(render_target);
121 for (u32 rt_reg = rt_start_reg; rt_reg < rt_end_reg; rt_reg += registers_per_rt) {
122 set_block(rt_reg, registers_per_rt, rt_dirty_reg);
123 ++rt_dirty_reg;
124 }
125 constexpr u32 depth_buffer_flag = DIRTY_REGS_POS(depth_buffer);
126 dirty_pointers[MAXWELL3D_REG_INDEX(zeta_enable)] = depth_buffer_flag;
127 dirty_pointers[MAXWELL3D_REG_INDEX(zeta_width)] = depth_buffer_flag;
128 dirty_pointers[MAXWELL3D_REG_INDEX(zeta_height)] = depth_buffer_flag;
129 constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32);
130 constexpr u32 zeta_reg = MAXWELL3D_REG_INDEX(zeta);
131 set_block(zeta_reg, registers_in_zeta, depth_buffer_flag);
132
133 // Init Vertex Arrays
134 constexpr u32 vertex_array_start = MAXWELL3D_REG_INDEX(vertex_array);
135 constexpr u32 vertex_array_size = sizeof(regs.vertex_array[0]) / sizeof(u32);
136 constexpr u32 vertex_array_end = vertex_array_start + vertex_array_size * Regs::NumVertexArrays;
137 u8 va_dirty_reg = DIRTY_REGS_POS(vertex_array);
138 u8 vi_dirty_reg = DIRTY_REGS_POS(vertex_instance);
139 for (u32 vertex_reg = vertex_array_start; vertex_reg < vertex_array_end;
140 vertex_reg += vertex_array_size) {
141 set_block(vertex_reg, 3, va_dirty_reg);
142 // The divisor concerns vertex array instances
143 dirty_pointers[static_cast<std::size_t>(vertex_reg) + 3] = vi_dirty_reg;
144 ++va_dirty_reg;
145 ++vi_dirty_reg;
146 }
147 constexpr u32 vertex_limit_start = MAXWELL3D_REG_INDEX(vertex_array_limit);
148 constexpr u32 vertex_limit_size = sizeof(regs.vertex_array_limit[0]) / sizeof(u32);
149 constexpr u32 vertex_limit_end = vertex_limit_start + vertex_limit_size * Regs::NumVertexArrays;
150 va_dirty_reg = DIRTY_REGS_POS(vertex_array);
151 for (u32 vertex_reg = vertex_limit_start; vertex_reg < vertex_limit_end;
152 vertex_reg += vertex_limit_size) {
153 set_block(vertex_reg, vertex_limit_size, va_dirty_reg);
154 va_dirty_reg++;
155 }
156 constexpr u32 vertex_instance_start = MAXWELL3D_REG_INDEX(instanced_arrays);
157 constexpr u32 vertex_instance_size =
158 sizeof(regs.instanced_arrays.is_instanced[0]) / sizeof(u32);
159 constexpr u32 vertex_instance_end =
160 vertex_instance_start + vertex_instance_size * Regs::NumVertexArrays;
161 vi_dirty_reg = DIRTY_REGS_POS(vertex_instance);
162 for (u32 vertex_reg = vertex_instance_start; vertex_reg < vertex_instance_end;
163 vertex_reg += vertex_instance_size) {
164 set_block(vertex_reg, vertex_instance_size, vi_dirty_reg);
165 vi_dirty_reg++;
166 }
167 set_block(MAXWELL3D_REG_INDEX(vertex_attrib_format), regs.vertex_attrib_format.size(),
168 DIRTY_REGS_POS(vertex_attrib_format));
169
170 // Init Shaders
171 constexpr u32 shader_registers_count =
172 sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
173 set_block(MAXWELL3D_REG_INDEX(shader_config[0]), shader_registers_count,
174 DIRTY_REGS_POS(shaders));
175
176 // State
177
178 // Viewport
179 constexpr u8 viewport_dirty_reg = DIRTY_REGS_POS(viewport);
180 constexpr u32 viewport_start = MAXWELL3D_REG_INDEX(viewports);
181 constexpr u32 viewport_size = sizeof(regs.viewports) / sizeof(u32);
182 set_block(viewport_start, viewport_size, viewport_dirty_reg);
183 constexpr u32 view_volume_start = MAXWELL3D_REG_INDEX(view_volume_clip_control);
184 constexpr u32 view_volume_size = sizeof(regs.view_volume_clip_control) / sizeof(u32);
185 set_block(view_volume_start, view_volume_size, viewport_dirty_reg);
186
187 // Viewport transformation
188 constexpr u32 viewport_trans_start = MAXWELL3D_REG_INDEX(viewport_transform);
189 constexpr u32 viewport_trans_size = sizeof(regs.viewport_transform) / sizeof(u32);
190 set_block(viewport_trans_start, viewport_trans_size, DIRTY_REGS_POS(viewport_transform));
191
192 // Cullmode
193 constexpr u32 cull_mode_start = MAXWELL3D_REG_INDEX(cull);
194 constexpr u32 cull_mode_size = sizeof(regs.cull) / sizeof(u32);
195 set_block(cull_mode_start, cull_mode_size, DIRTY_REGS_POS(cull_mode));
196
197 // Screen y control
198 dirty_pointers[MAXWELL3D_REG_INDEX(screen_y_control)] = DIRTY_REGS_POS(screen_y_control);
199
200 // Primitive Restart
201 constexpr u32 primitive_restart_start = MAXWELL3D_REG_INDEX(primitive_restart);
202 constexpr u32 primitive_restart_size = sizeof(regs.primitive_restart) / sizeof(u32);
203 set_block(primitive_restart_start, primitive_restart_size, DIRTY_REGS_POS(primitive_restart));
204
205 // Depth Test
206 constexpr u8 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test);
207 dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_enable)] = depth_test_dirty_reg;
208 dirty_pointers[MAXWELL3D_REG_INDEX(depth_write_enabled)] = depth_test_dirty_reg;
209 dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_func)] = depth_test_dirty_reg;
210
211 // Stencil Test
212 constexpr u32 stencil_test_dirty_reg = DIRTY_REGS_POS(stencil_test);
213 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_enable)] = stencil_test_dirty_reg;
214 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_func)] = stencil_test_dirty_reg;
215 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_ref)] = stencil_test_dirty_reg;
216 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_mask)] = stencil_test_dirty_reg;
217 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_fail)] = stencil_test_dirty_reg;
218 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zfail)] = stencil_test_dirty_reg;
219 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zpass)] = stencil_test_dirty_reg;
220 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_mask)] = stencil_test_dirty_reg;
221 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_two_side_enable)] = stencil_test_dirty_reg;
222 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_func)] = stencil_test_dirty_reg;
223 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_ref)] = stencil_test_dirty_reg;
224 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_mask)] = stencil_test_dirty_reg;
225 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_fail)] = stencil_test_dirty_reg;
226 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zfail)] = stencil_test_dirty_reg;
227 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zpass)] = stencil_test_dirty_reg;
228 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_mask)] = stencil_test_dirty_reg;
229
230 // Color Mask
231 constexpr u8 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask);
232 dirty_pointers[MAXWELL3D_REG_INDEX(color_mask_common)] = color_mask_dirty_reg;
233 set_block(MAXWELL3D_REG_INDEX(color_mask), sizeof(regs.color_mask) / sizeof(u32),
234 color_mask_dirty_reg);
235 // Blend State
236 constexpr u8 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state);
237 set_block(MAXWELL3D_REG_INDEX(blend_color), sizeof(regs.blend_color) / sizeof(u32),
238 blend_state_dirty_reg);
239 dirty_pointers[MAXWELL3D_REG_INDEX(independent_blend_enable)] = blend_state_dirty_reg;
240 set_block(MAXWELL3D_REG_INDEX(blend), sizeof(regs.blend) / sizeof(u32), blend_state_dirty_reg);
241 set_block(MAXWELL3D_REG_INDEX(independent_blend), sizeof(regs.independent_blend) / sizeof(u32),
242 blend_state_dirty_reg);
243
244 // Scissor State
245 constexpr u8 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test);
246 set_block(MAXWELL3D_REG_INDEX(scissor_test), sizeof(regs.scissor_test) / sizeof(u32),
247 scissor_test_dirty_reg);
248
249 // Polygon Offset
250 constexpr u8 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset);
251 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_fill_enable)] = polygon_offset_dirty_reg;
252 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_line_enable)] = polygon_offset_dirty_reg;
253 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_point_enable)] = polygon_offset_dirty_reg;
254 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_units)] = polygon_offset_dirty_reg;
255 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_factor)] = polygon_offset_dirty_reg;
256 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_clamp)] = polygon_offset_dirty_reg;
257
258 // Depth bounds
259 constexpr u8 depth_bounds_values_dirty_reg = DIRTY_REGS_POS(depth_bounds_values);
260 dirty_pointers[MAXWELL3D_REG_INDEX(depth_bounds[0])] = depth_bounds_values_dirty_reg;
261 dirty_pointers[MAXWELL3D_REG_INDEX(depth_bounds[1])] = depth_bounds_values_dirty_reg;
262}
263
264void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u32* parameters) { 109void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u32* parameters) {
265 // Reset the current macro. 110 // Reset the current macro.
266 executing_macro = 0; 111 executing_macro = 0;
@@ -317,31 +162,34 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
317 ASSERT_MSG(method < Regs::NUM_REGS, 162 ASSERT_MSG(method < Regs::NUM_REGS,
318 "Invalid Maxwell3D register, increase the size of the Regs structure"); 163 "Invalid Maxwell3D register, increase the size of the Regs structure");
319 164
320 if (regs.reg_array[method] != method_call.argument) { 165 u32 arg = method_call.argument;
321 regs.reg_array[method] = method_call.argument; 166 // Keep track of the register value in shadow_state when requested.
322 const std::size_t dirty_reg = dirty_pointers[method]; 167 if (shadow_state.shadow_ram_control == Regs::ShadowRamControl::Track ||
323 if (dirty_reg) { 168 shadow_state.shadow_ram_control == Regs::ShadowRamControl::TrackWithFilter) {
324 dirty.regs[dirty_reg] = true; 169 shadow_state.reg_array[method] = arg;
325 if (dirty_reg >= DIRTY_REGS_POS(vertex_array) && 170 } else if (shadow_state.shadow_ram_control == Regs::ShadowRamControl::Replay) {
326 dirty_reg < DIRTY_REGS_POS(vertex_array_buffers)) { 171 arg = shadow_state.reg_array[method];
327 dirty.vertex_array_buffers = true; 172 }
328 } else if (dirty_reg >= DIRTY_REGS_POS(vertex_instance) && 173
329 dirty_reg < DIRTY_REGS_POS(vertex_instances)) { 174 if (regs.reg_array[method] != arg) {
330 dirty.vertex_instances = true; 175 regs.reg_array[method] = arg;
331 } else if (dirty_reg >= DIRTY_REGS_POS(render_target) && 176
332 dirty_reg < DIRTY_REGS_POS(render_settings)) { 177 for (const auto& table : dirty.tables) {
333 dirty.render_settings = true; 178 dirty.flags[table[method]] = true;
334 }
335 } 179 }
336 } 180 }
337 181
338 switch (method) { 182 switch (method) {
183 case MAXWELL3D_REG_INDEX(shadow_ram_control): {
184 shadow_state.shadow_ram_control = static_cast<Regs::ShadowRamControl>(method_call.argument);
185 break;
186 }
339 case MAXWELL3D_REG_INDEX(macros.data): { 187 case MAXWELL3D_REG_INDEX(macros.data): {
340 ProcessMacroUpload(method_call.argument); 188 ProcessMacroUpload(arg);
341 break; 189 break;
342 } 190 }
343 case MAXWELL3D_REG_INDEX(macros.bind): { 191 case MAXWELL3D_REG_INDEX(macros.bind): {
344 ProcessMacroBind(method_call.argument); 192 ProcessMacroBind(arg);
345 break; 193 break;
346 } 194 }
347 case MAXWELL3D_REG_INDEX(firmware[4]): { 195 case MAXWELL3D_REG_INDEX(firmware[4]): {
@@ -417,9 +265,9 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
417 } 265 }
418 case MAXWELL3D_REG_INDEX(data_upload): { 266 case MAXWELL3D_REG_INDEX(data_upload): {
419 const bool is_last_call = method_call.IsLastCall(); 267 const bool is_last_call = method_call.IsLastCall();
420 upload_state.ProcessData(method_call.argument, is_last_call); 268 upload_state.ProcessData(arg, is_last_call);
421 if (is_last_call) { 269 if (is_last_call) {
422 dirty.OnMemoryWrite(); 270 OnMemoryWrite();
423 } 271 }
424 break; 272 break;
425 } 273 }
@@ -727,7 +575,7 @@ void Maxwell3D::FinishCBData() {
727 575
728 const u32 id = cb_data_state.id; 576 const u32 id = cb_data_state.id;
729 memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size); 577 memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size);
730 dirty.OnMemoryWrite(); 578 OnMemoryWrite();
731 579
732 cb_data_state.id = null_cb_data; 580 cb_data_state.id = null_cb_data;
733 cb_data_state.current = null_cb_data; 581 cb_data_state.current = null_cb_data;
@@ -805,7 +653,7 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b
805 653
806 const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; 654 const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
807 const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); 655 const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
808 SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value()); 656 SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
809 result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); 657 result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
810 return result; 658 return result;
811} 659}
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 26939be3f..d24c9f657 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -6,6 +6,7 @@
6 6
7#include <array> 7#include <array>
8#include <bitset> 8#include <bitset>
9#include <limits>
9#include <optional> 10#include <optional>
10#include <type_traits> 11#include <type_traits>
11#include <unordered_map> 12#include <unordered_map>
@@ -66,6 +67,7 @@ public:
66 static constexpr std::size_t NumVaryings = 31; 67 static constexpr std::size_t NumVaryings = 31;
67 static constexpr std::size_t NumImages = 8; // TODO(Rodrigo): Investigate this number 68 static constexpr std::size_t NumImages = 8; // TODO(Rodrigo): Investigate this number
68 static constexpr std::size_t NumClipDistances = 8; 69 static constexpr std::size_t NumClipDistances = 8;
70 static constexpr std::size_t NumTransformFeedbackBuffers = 4;
69 static constexpr std::size_t MaxShaderProgram = 6; 71 static constexpr std::size_t MaxShaderProgram = 6;
70 static constexpr std::size_t MaxShaderStage = 5; 72 static constexpr std::size_t MaxShaderStage = 5;
71 // Maximum number of const buffers per shader stage. 73 // Maximum number of const buffers per shader stage.
@@ -431,21 +433,15 @@ public:
431 GeneratedPrimitives = 0x1F, 433 GeneratedPrimitives = 0x1F,
432 }; 434 };
433 435
434 struct Cull { 436 enum class FrontFace : u32 {
435 enum class FrontFace : u32 { 437 ClockWise = 0x0900,
436 ClockWise = 0x0900, 438 CounterClockWise = 0x0901,
437 CounterClockWise = 0x0901, 439 };
438 };
439
440 enum class CullFace : u32 {
441 Front = 0x0404,
442 Back = 0x0405,
443 FrontAndBack = 0x0408,
444 };
445 440
446 u32 enabled; 441 enum class CullFace : u32 {
447 FrontFace front_face; 442 Front = 0x0404,
448 CullFace cull_face; 443 Back = 0x0405,
444 FrontAndBack = 0x0408,
449 }; 445 };
450 446
451 struct Blend { 447 struct Blend {
@@ -529,6 +525,23 @@ public:
529 FractionalEven = 2, 525 FractionalEven = 2,
530 }; 526 };
531 527
528 enum class PolygonMode : u32 {
529 Point = 0x1b00,
530 Line = 0x1b01,
531 Fill = 0x1b02,
532 };
533
534 enum class ShadowRamControl : u32 {
535 // write value to shadow ram
536 Track = 0,
537 // write value to shadow ram ( with validation ??? )
538 TrackWithFilter = 1,
539 // only write to real hw register
540 Passthrough = 2,
541 // write value from shadow ram to real hw register
542 Replay = 3,
543 };
544
532 struct RenderTargetConfig { 545 struct RenderTargetConfig {
533 u32 address_high; 546 u32 address_high;
534 u32 address_low; 547 u32 address_low;
@@ -542,7 +555,7 @@ public:
542 BitField<12, 1, InvMemoryLayout> type; 555 BitField<12, 1, InvMemoryLayout> type;
543 } memory_layout; 556 } memory_layout;
544 union { 557 union {
545 BitField<0, 16, u32> array_mode; 558 BitField<0, 16, u32> layers;
546 BitField<16, 1, u32> volume; 559 BitField<16, 1, u32> volume;
547 }; 560 };
548 u32 layer_stride; 561 u32 layer_stride;
@@ -574,7 +587,7 @@ public:
574 f32 translate_z; 587 f32 translate_z;
575 INSERT_UNION_PADDING_WORDS(2); 588 INSERT_UNION_PADDING_WORDS(2);
576 589
577 Common::Rectangle<s32> GetRect() const { 590 Common::Rectangle<f32> GetRect() const {
578 return { 591 return {
579 GetX(), // left 592 GetX(), // left
580 GetY() + GetHeight(), // top 593 GetY() + GetHeight(), // top
@@ -583,20 +596,20 @@ public:
583 }; 596 };
584 }; 597 };
585 598
586 s32 GetX() const { 599 f32 GetX() const {
587 return static_cast<s32>(std::max(0.0f, translate_x - std::fabs(scale_x))); 600 return std::max(0.0f, translate_x - std::fabs(scale_x));
588 } 601 }
589 602
590 s32 GetY() const { 603 f32 GetY() const {
591 return static_cast<s32>(std::max(0.0f, translate_y - std::fabs(scale_y))); 604 return std::max(0.0f, translate_y - std::fabs(scale_y));
592 } 605 }
593 606
594 s32 GetWidth() const { 607 f32 GetWidth() const {
595 return static_cast<s32>(translate_x + std::fabs(scale_x)) - GetX(); 608 return translate_x + std::fabs(scale_x) - GetX();
596 } 609 }
597 610
598 s32 GetHeight() const { 611 f32 GetHeight() const {
599 return static_cast<s32>(translate_y + std::fabs(scale_y)) - GetY(); 612 return translate_y + std::fabs(scale_y) - GetY();
600 } 613 }
601 }; 614 };
602 615
@@ -626,6 +639,29 @@ public:
626 float depth_range_far; 639 float depth_range_far;
627 }; 640 };
628 641
642 struct TransformFeedbackBinding {
643 u32 buffer_enable;
644 u32 address_high;
645 u32 address_low;
646 s32 buffer_size;
647 s32 buffer_offset;
648 INSERT_UNION_PADDING_WORDS(3);
649
650 GPUVAddr Address() const {
651 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
652 address_low);
653 }
654 };
655 static_assert(sizeof(TransformFeedbackBinding) == 32);
656
657 struct TransformFeedbackLayout {
658 u32 stream;
659 u32 varying_count;
660 u32 stride;
661 INSERT_UNION_PADDING_WORDS(1);
662 };
663 static_assert(sizeof(TransformFeedbackLayout) == 16);
664
629 bool IsShaderConfigEnabled(std::size_t index) const { 665 bool IsShaderConfigEnabled(std::size_t index) const {
630 // The VertexB is always enabled. 666 // The VertexB is always enabled.
631 if (index == static_cast<std::size_t>(Regs::ShaderProgram::VertexB)) { 667 if (index == static_cast<std::size_t>(Regs::ShaderProgram::VertexB)) {
@@ -634,6 +670,10 @@ public:
634 return shader_config[index].enable != 0; 670 return shader_config[index].enable != 0;
635 } 671 }
636 672
673 bool IsShaderConfigEnabled(Regs::ShaderProgram type) const {
674 return IsShaderConfigEnabled(static_cast<std::size_t>(type));
675 }
676
637 union { 677 union {
638 struct { 678 struct {
639 INSERT_UNION_PADDING_WORDS(0x45); 679 INSERT_UNION_PADDING_WORDS(0x45);
@@ -645,7 +685,9 @@ public:
645 u32 bind; 685 u32 bind;
646 } macros; 686 } macros;
647 687
648 INSERT_UNION_PADDING_WORDS(0x17); 688 ShadowRamControl shadow_ram_control;
689
690 INSERT_UNION_PADDING_WORDS(0x16);
649 691
650 Upload::Registers upload; 692 Upload::Registers upload;
651 struct { 693 struct {
@@ -682,7 +724,13 @@ public:
682 724
683 u32 rasterize_enable; 725 u32 rasterize_enable;
684 726
685 INSERT_UNION_PADDING_WORDS(0xF1); 727 std::array<TransformFeedbackBinding, NumTransformFeedbackBuffers> tfb_bindings;
728
729 INSERT_UNION_PADDING_WORDS(0xC0);
730
731 std::array<TransformFeedbackLayout, NumTransformFeedbackBuffers> tfb_layouts;
732
733 INSERT_UNION_PADDING_WORDS(0x1);
686 734
687 u32 tfb_enabled; 735 u32 tfb_enabled;
688 736
@@ -710,7 +758,12 @@ public:
710 758
711 s32 clear_stencil; 759 s32 clear_stencil;
712 760
713 INSERT_UNION_PADDING_WORDS(0x7); 761 INSERT_UNION_PADDING_WORDS(0x2);
762
763 PolygonMode polygon_mode_front;
764 PolygonMode polygon_mode_back;
765
766 INSERT_UNION_PADDING_WORDS(0x3);
714 767
715 u32 polygon_offset_point_enable; 768 u32 polygon_offset_point_enable;
716 u32 polygon_offset_line_enable; 769 u32 polygon_offset_line_enable;
@@ -769,7 +822,11 @@ public:
769 BitField<12, 4, u32> viewport; 822 BitField<12, 4, u32> viewport;
770 } clear_flags; 823 } clear_flags;
771 824
772 INSERT_UNION_PADDING_WORDS(0x19); 825 INSERT_UNION_PADDING_WORDS(0x10);
826
827 u32 fill_rectangle;
828
829 INSERT_UNION_PADDING_WORDS(0x8);
773 830
774 std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format; 831 std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format;
775 832
@@ -800,8 +857,12 @@ public:
800 857
801 u32 zeta_width; 858 u32 zeta_width;
802 u32 zeta_height; 859 u32 zeta_height;
860 union {
861 BitField<0, 16, u32> zeta_layers;
862 BitField<16, 1, u32> zeta_volume;
863 };
803 864
804 INSERT_UNION_PADDING_WORDS(0x27); 865 INSERT_UNION_PADDING_WORDS(0x26);
805 866
806 u32 depth_test_enable; 867 u32 depth_test_enable;
807 868
@@ -868,16 +929,7 @@ public:
868 929
869 INSERT_UNION_PADDING_WORDS(0x35); 930 INSERT_UNION_PADDING_WORDS(0x35);
870 931
871 union { 932 u32 clip_distance_enabled;
872 BitField<0, 1, u32> c0;
873 BitField<1, 1, u32> c1;
874 BitField<2, 1, u32> c2;
875 BitField<3, 1, u32> c3;
876 BitField<4, 1, u32> c4;
877 BitField<5, 1, u32> c5;
878 BitField<6, 1, u32> c6;
879 BitField<7, 1, u32> c7;
880 } clip_distance_enabled;
881 933
882 u32 samplecnt_enable; 934 u32 samplecnt_enable;
883 935
@@ -1056,7 +1108,9 @@ public:
1056 1108
1057 INSERT_UNION_PADDING_WORDS(1); 1109 INSERT_UNION_PADDING_WORDS(1);
1058 1110
1059 Cull cull; 1111 u32 cull_test_enabled;
1112 FrontFace front_face;
1113 CullFace cull_face;
1060 1114
1061 u32 pixel_center_integer; 1115 u32 pixel_center_integer;
1062 1116
@@ -1195,7 +1249,11 @@ public:
1195 1249
1196 u32 tex_cb_index; 1250 u32 tex_cb_index;
1197 1251
1198 INSERT_UNION_PADDING_WORDS(0x395); 1252 INSERT_UNION_PADDING_WORDS(0x7D);
1253
1254 std::array<std::array<u8, 128>, NumTransformFeedbackBuffers> tfb_varying_locs;
1255
1256 INSERT_UNION_PADDING_WORDS(0x298);
1199 1257
1200 struct { 1258 struct {
1201 /// Compressed address of a buffer that holds information about bound SSBOs. 1259 /// Compressed address of a buffer that holds information about bound SSBOs.
@@ -1218,7 +1276,12 @@ public:
1218 }; 1276 };
1219 std::array<u32, NUM_REGS> reg_array; 1277 std::array<u32, NUM_REGS> reg_array;
1220 }; 1278 };
1221 } regs{}; 1279 };
1280
1281 Regs regs{};
1282
1283 /// Store temporary hw register values, used by some calls to restore state after a operation
1284 Regs shadow_state;
1222 1285
1223 static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size"); 1286 static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size");
1224 static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable"); 1287 static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable");
@@ -1234,79 +1297,6 @@ public:
1234 1297
1235 State state{}; 1298 State state{};
1236 1299
1237 struct DirtyRegs {
1238 static constexpr std::size_t NUM_REGS = 256;
1239 static_assert(NUM_REGS - 1 <= std::numeric_limits<u8>::max());
1240
1241 union {
1242 struct {
1243 bool null_dirty;
1244
1245 // Vertex Attributes
1246 bool vertex_attrib_format;
1247
1248 // Vertex Arrays
1249 std::array<bool, 32> vertex_array;
1250
1251 bool vertex_array_buffers;
1252
1253 // Vertex Instances
1254 std::array<bool, 32> vertex_instance;
1255
1256 bool vertex_instances;
1257
1258 // Render Targets
1259 std::array<bool, 8> render_target;
1260 bool depth_buffer;
1261
1262 bool render_settings;
1263
1264 // Shaders
1265 bool shaders;
1266
1267 // Rasterizer State
1268 bool viewport;
1269 bool clip_coefficient;
1270 bool cull_mode;
1271 bool primitive_restart;
1272 bool depth_test;
1273 bool stencil_test;
1274 bool blend_state;
1275 bool scissor_test;
1276 bool transform_feedback;
1277 bool color_mask;
1278 bool polygon_offset;
1279 bool depth_bounds_values;
1280
1281 // Complementary
1282 bool viewport_transform;
1283 bool screen_y_control;
1284
1285 bool memory_general;
1286 };
1287 std::array<bool, NUM_REGS> regs;
1288 };
1289
1290 void ResetVertexArrays() {
1291 vertex_array.fill(true);
1292 vertex_array_buffers = true;
1293 }
1294
1295 void ResetRenderTargets() {
1296 depth_buffer = true;
1297 render_target.fill(true);
1298 render_settings = true;
1299 }
1300
1301 void OnMemoryWrite() {
1302 shaders = true;
1303 memory_general = true;
1304 ResetRenderTargets();
1305 ResetVertexArrays();
1306 }
1307
1308 } dirty{};
1309
1310 /// Reads a register value located at the input method address 1300 /// Reads a register value located at the input method address
1311 u32 GetRegisterValue(u32 method) const; 1301 u32 GetRegisterValue(u32 method) const;
1312 1302
@@ -1352,6 +1342,11 @@ public:
1352 return execute_on; 1342 return execute_on;
1353 } 1343 }
1354 1344
1345 /// Notify a memory write has happened.
1346 void OnMemoryWrite() {
1347 dirty.flags |= dirty.on_write_stores;
1348 }
1349
1355 enum class MMEDrawMode : u32 { 1350 enum class MMEDrawMode : u32 {
1356 Undefined, 1351 Undefined,
1357 Array, 1352 Array,
@@ -1367,6 +1362,16 @@ public:
1367 u32 gl_end_count{}; 1362 u32 gl_end_count{};
1368 } mme_draw; 1363 } mme_draw;
1369 1364
1365 struct DirtyState {
1366 using Flags = std::bitset<std::numeric_limits<u8>::max()>;
1367 using Table = std::array<u8, Regs::NUM_REGS>;
1368 using Tables = std::array<Table, 2>;
1369
1370 Flags flags;
1371 Flags on_write_stores;
1372 Tables tables{};
1373 } dirty;
1374
1370private: 1375private:
1371 void InitializeRegisterDefaults(); 1376 void InitializeRegisterDefaults();
1372 1377
@@ -1413,8 +1418,6 @@ private:
1413 /// Retrieves information about a specific TSC entry from the TSC buffer. 1418 /// Retrieves information about a specific TSC entry from the TSC buffer.
1414 Texture::TSCEntry GetTSCEntry(u32 tsc_index) const; 1419 Texture::TSCEntry GetTSCEntry(u32 tsc_index) const;
1415 1420
1416 void InitDirtySettings();
1417
1418 /** 1421 /**
1419 * Call a macro on this engine. 1422 * Call a macro on this engine.
1420 * @param method Method to call 1423 * @param method Method to call
@@ -1473,6 +1476,7 @@ private:
1473 "Field " #field_name " has invalid position") 1476 "Field " #field_name " has invalid position")
1474 1477
1475ASSERT_REG_POSITION(macros, 0x45); 1478ASSERT_REG_POSITION(macros, 0x45);
1479ASSERT_REG_POSITION(shadow_ram_control, 0x49);
1476ASSERT_REG_POSITION(upload, 0x60); 1480ASSERT_REG_POSITION(upload, 0x60);
1477ASSERT_REG_POSITION(exec_upload, 0x6C); 1481ASSERT_REG_POSITION(exec_upload, 0x6C);
1478ASSERT_REG_POSITION(data_upload, 0x6D); 1482ASSERT_REG_POSITION(data_upload, 0x6D);
@@ -1481,6 +1485,8 @@ ASSERT_REG_POSITION(tess_mode, 0xC8);
1481ASSERT_REG_POSITION(tess_level_outer, 0xC9); 1485ASSERT_REG_POSITION(tess_level_outer, 0xC9);
1482ASSERT_REG_POSITION(tess_level_inner, 0xCD); 1486ASSERT_REG_POSITION(tess_level_inner, 0xCD);
1483ASSERT_REG_POSITION(rasterize_enable, 0xDF); 1487ASSERT_REG_POSITION(rasterize_enable, 0xDF);
1488ASSERT_REG_POSITION(tfb_bindings, 0xE0);
1489ASSERT_REG_POSITION(tfb_layouts, 0x1C0);
1484ASSERT_REG_POSITION(tfb_enabled, 0x1D1); 1490ASSERT_REG_POSITION(tfb_enabled, 0x1D1);
1485ASSERT_REG_POSITION(rt, 0x200); 1491ASSERT_REG_POSITION(rt, 0x200);
1486ASSERT_REG_POSITION(viewport_transform, 0x280); 1492ASSERT_REG_POSITION(viewport_transform, 0x280);
@@ -1490,6 +1496,8 @@ ASSERT_REG_POSITION(depth_mode, 0x35F);
1490ASSERT_REG_POSITION(clear_color[0], 0x360); 1496ASSERT_REG_POSITION(clear_color[0], 0x360);
1491ASSERT_REG_POSITION(clear_depth, 0x364); 1497ASSERT_REG_POSITION(clear_depth, 0x364);
1492ASSERT_REG_POSITION(clear_stencil, 0x368); 1498ASSERT_REG_POSITION(clear_stencil, 0x368);
1499ASSERT_REG_POSITION(polygon_mode_front, 0x36B);
1500ASSERT_REG_POSITION(polygon_mode_back, 0x36C);
1493ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370); 1501ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370);
1494ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371); 1502ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371);
1495ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372); 1503ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372);
@@ -1503,10 +1511,12 @@ ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
1503ASSERT_REG_POSITION(depth_bounds, 0x3E7); 1511ASSERT_REG_POSITION(depth_bounds, 0x3E7);
1504ASSERT_REG_POSITION(zeta, 0x3F8); 1512ASSERT_REG_POSITION(zeta, 0x3F8);
1505ASSERT_REG_POSITION(clear_flags, 0x43E); 1513ASSERT_REG_POSITION(clear_flags, 0x43E);
1514ASSERT_REG_POSITION(fill_rectangle, 0x44F);
1506ASSERT_REG_POSITION(vertex_attrib_format, 0x458); 1515ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
1507ASSERT_REG_POSITION(rt_control, 0x487); 1516ASSERT_REG_POSITION(rt_control, 0x487);
1508ASSERT_REG_POSITION(zeta_width, 0x48a); 1517ASSERT_REG_POSITION(zeta_width, 0x48a);
1509ASSERT_REG_POSITION(zeta_height, 0x48b); 1518ASSERT_REG_POSITION(zeta_height, 0x48b);
1519ASSERT_REG_POSITION(zeta_layers, 0x48c);
1510ASSERT_REG_POSITION(depth_test_enable, 0x4B3); 1520ASSERT_REG_POSITION(depth_test_enable, 0x4B3);
1511ASSERT_REG_POSITION(independent_blend_enable, 0x4B9); 1521ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);
1512ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); 1522ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
@@ -1556,7 +1566,9 @@ ASSERT_REG_POSITION(index_array, 0x5F2);
1556ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F); 1566ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F);
1557ASSERT_REG_POSITION(instanced_arrays, 0x620); 1567ASSERT_REG_POSITION(instanced_arrays, 0x620);
1558ASSERT_REG_POSITION(vp_point_size, 0x644); 1568ASSERT_REG_POSITION(vp_point_size, 0x644);
1559ASSERT_REG_POSITION(cull, 0x646); 1569ASSERT_REG_POSITION(cull_test_enabled, 0x646);
1570ASSERT_REG_POSITION(front_face, 0x647);
1571ASSERT_REG_POSITION(cull_face, 0x648);
1560ASSERT_REG_POSITION(pixel_center_integer, 0x649); 1572ASSERT_REG_POSITION(pixel_center_integer, 0x649);
1561ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B); 1573ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B);
1562ASSERT_REG_POSITION(view_volume_clip_control, 0x64F); 1574ASSERT_REG_POSITION(view_volume_clip_control, 0x64F);
@@ -1573,6 +1585,7 @@ ASSERT_REG_POSITION(firmware, 0x8C0);
1573ASSERT_REG_POSITION(const_buffer, 0x8E0); 1585ASSERT_REG_POSITION(const_buffer, 0x8E0);
1574ASSERT_REG_POSITION(cb_bind[0], 0x904); 1586ASSERT_REG_POSITION(cb_bind[0], 0x904);
1575ASSERT_REG_POSITION(tex_cb_index, 0x982); 1587ASSERT_REG_POSITION(tex_cb_index, 0x982);
1588ASSERT_REG_POSITION(tfb_varying_locs, 0xA00);
1576ASSERT_REG_POSITION(ssbo_info, 0xD18); 1589ASSERT_REG_POSITION(ssbo_info, 0xD18);
1577ASSERT_REG_POSITION(tex_info_buffers.address[0], 0xD2A); 1590ASSERT_REG_POSITION(tex_info_buffers.address[0], 0xD2A);
1578ASSERT_REG_POSITION(tex_info_buffers.size[0], 0xD2F); 1591ASSERT_REG_POSITION(tex_info_buffers.size[0], 0xD2F);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index ad8453c5f..c2610f992 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -57,7 +57,7 @@ void MaxwellDMA::HandleCopy() {
57 } 57 }
58 58
59 // All copies here update the main memory, so mark all rasterizer states as invalid. 59 // All copies here update the main memory, so mark all rasterizer states as invalid.
60 system.GPU().Maxwell3D().dirty.OnMemoryWrite(); 60 system.GPU().Maxwell3D().OnMemoryWrite();
61 61
62 if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { 62 if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {
63 // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D 63 // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index c9bc83cd7..49dc5abe0 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -82,6 +82,10 @@ union Attribute {
82 Position = 7, 82 Position = 7,
83 Attribute_0 = 8, 83 Attribute_0 = 8,
84 Attribute_31 = 39, 84 Attribute_31 = 39,
85 FrontColor = 40,
86 FrontSecondaryColor = 41,
87 BackColor = 42,
88 BackSecondaryColor = 43,
85 ClipDistances0123 = 44, 89 ClipDistances0123 = 44,
86 ClipDistances4567 = 45, 90 ClipDistances4567 = 45,
87 PointCoord = 46, 91 PointCoord = 46,
@@ -89,6 +93,8 @@ union Attribute {
89 // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval 93 // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval
90 // shader. 94 // shader.
91 TessCoordInstanceIDVertexID = 47, 95 TessCoordInstanceIDVertexID = 47,
96 TexCoord_0 = 48,
97 TexCoord_7 = 55,
92 // This attribute contains a tuple of (Unk, Unk, Unk, gl_FrontFacing) when inside a fragment 98 // This attribute contains a tuple of (Unk, Unk, Unk, gl_FrontFacing) when inside a fragment
93 // shader. It is unknown what the other values contain. 99 // shader. It is unknown what the other values contain.
94 FrontFacing = 63, 100 FrontFacing = 63,
@@ -911,14 +917,9 @@ union Instruction {
911 } fadd32i; 917 } fadd32i;
912 918
913 union { 919 union {
914 BitField<20, 8, u64> shift_position; 920 BitField<40, 1, u64> brev;
915 BitField<28, 8, u64> shift_length; 921 BitField<47, 1, u64> rd_cc;
916 BitField<48, 1, u64> negate_b; 922 BitField<48, 1, u64> is_signed;
917 BitField<49, 1, u64> negate_a;
918
919 u64 GetLeftShiftValue() const {
920 return 32 - (shift_position + shift_length);
921 }
922 } bfe; 923 } bfe;
923 924
924 union { 925 union {
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 7d7137109..e8f763ce9 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -140,71 +140,6 @@ void GPU::FlushCommands() {
140 renderer.Rasterizer().FlushCommands(); 140 renderer.Rasterizer().FlushCommands();
141} 141}
142 142
143u32 RenderTargetBytesPerPixel(RenderTargetFormat format) {
144 ASSERT(format != RenderTargetFormat::NONE);
145
146 switch (format) {
147 case RenderTargetFormat::RGBA32_FLOAT:
148 case RenderTargetFormat::RGBA32_UINT:
149 return 16;
150 case RenderTargetFormat::RGBA16_UINT:
151 case RenderTargetFormat::RGBA16_UNORM:
152 case RenderTargetFormat::RGBA16_FLOAT:
153 case RenderTargetFormat::RGBX16_FLOAT:
154 case RenderTargetFormat::RG32_FLOAT:
155 case RenderTargetFormat::RG32_UINT:
156 return 8;
157 case RenderTargetFormat::RGBA8_UNORM:
158 case RenderTargetFormat::RGBA8_SNORM:
159 case RenderTargetFormat::RGBA8_SRGB:
160 case RenderTargetFormat::RGBA8_UINT:
161 case RenderTargetFormat::RGB10_A2_UNORM:
162 case RenderTargetFormat::BGRA8_UNORM:
163 case RenderTargetFormat::BGRA8_SRGB:
164 case RenderTargetFormat::RG16_UNORM:
165 case RenderTargetFormat::RG16_SNORM:
166 case RenderTargetFormat::RG16_UINT:
167 case RenderTargetFormat::RG16_SINT:
168 case RenderTargetFormat::RG16_FLOAT:
169 case RenderTargetFormat::R32_FLOAT:
170 case RenderTargetFormat::R11G11B10_FLOAT:
171 case RenderTargetFormat::R32_UINT:
172 return 4;
173 case RenderTargetFormat::R16_UNORM:
174 case RenderTargetFormat::R16_SNORM:
175 case RenderTargetFormat::R16_UINT:
176 case RenderTargetFormat::R16_SINT:
177 case RenderTargetFormat::R16_FLOAT:
178 case RenderTargetFormat::RG8_UNORM:
179 case RenderTargetFormat::RG8_SNORM:
180 return 2;
181 case RenderTargetFormat::R8_UNORM:
182 case RenderTargetFormat::R8_UINT:
183 return 1;
184 default:
185 UNIMPLEMENTED_MSG("Unimplemented render target format {}", static_cast<u32>(format));
186 return 1;
187 }
188}
189
190u32 DepthFormatBytesPerPixel(DepthFormat format) {
191 switch (format) {
192 case DepthFormat::Z32_S8_X24_FLOAT:
193 return 8;
194 case DepthFormat::Z32_FLOAT:
195 case DepthFormat::S8_Z24_UNORM:
196 case DepthFormat::Z24_X8_UNORM:
197 case DepthFormat::Z24_S8_UNORM:
198 case DepthFormat::Z24_C8_UNORM:
199 return 4;
200 case DepthFormat::Z16_UNORM:
201 return 2;
202 default:
203 UNIMPLEMENTED_MSG("Unimplemented Depth format {}", static_cast<u32>(format));
204 return 1;
205 }
206}
207
208// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence 143// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
209// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. 144// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
210// So the values you see in docs might be multiplied by 4. 145// So the values you see in docs might be multiplied by 4.
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 07727210c..64acb17df 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -39,6 +39,7 @@ enum class RenderTargetFormat : u32 {
39 RGBA32_FLOAT = 0xC0, 39 RGBA32_FLOAT = 0xC0,
40 RGBA32_UINT = 0xC2, 40 RGBA32_UINT = 0xC2,
41 RGBA16_UNORM = 0xC6, 41 RGBA16_UNORM = 0xC6,
42 RGBA16_SNORM = 0xC7,
42 RGBA16_UINT = 0xC9, 43 RGBA16_UINT = 0xC9,
43 RGBA16_FLOAT = 0xCA, 44 RGBA16_FLOAT = 0xCA,
44 RG32_FLOAT = 0xCB, 45 RG32_FLOAT = 0xCB,
@@ -57,6 +58,7 @@ enum class RenderTargetFormat : u32 {
57 RG16_UINT = 0xDD, 58 RG16_UINT = 0xDD,
58 RG16_FLOAT = 0xDE, 59 RG16_FLOAT = 0xDE,
59 R11G11B10_FLOAT = 0xE0, 60 R11G11B10_FLOAT = 0xE0,
61 R32_SINT = 0xE3,
60 R32_UINT = 0xE4, 62 R32_UINT = 0xE4,
61 R32_FLOAT = 0xE5, 63 R32_FLOAT = 0xE5,
62 B5G6R5_UNORM = 0xE8, 64 B5G6R5_UNORM = 0xE8,
@@ -82,12 +84,6 @@ enum class DepthFormat : u32 {
82 Z32_S8_X24_FLOAT = 0x19, 84 Z32_S8_X24_FLOAT = 0x19,
83}; 85};
84 86
85/// Returns the number of bytes per pixel of each rendertarget format.
86u32 RenderTargetBytesPerPixel(RenderTargetFormat format);
87
88/// Returns the number of bytes per pixel of each depth format.
89u32 DepthFormatBytesPerPixel(DepthFormat format);
90
91struct CommandListHeader; 87struct CommandListHeader;
92class DebugContext; 88class DebugContext;
93 89
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 2cdf1aa7f..b1088af3d 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -5,7 +5,7 @@
5#include "common/assert.h" 5#include "common/assert.h"
6#include "common/microprofile.h" 6#include "common/microprofile.h"
7#include "core/core.h" 7#include "core/core.h"
8#include "core/frontend/scope_acquire_window_context.h" 8#include "core/frontend/scope_acquire_context.h"
9#include "video_core/dma_pusher.h" 9#include "video_core/dma_pusher.h"
10#include "video_core/gpu.h" 10#include "video_core/gpu.h"
11#include "video_core/gpu_thread.h" 11#include "video_core/gpu_thread.h"
@@ -27,7 +27,7 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
27 return; 27 return;
28 } 28 }
29 29
30 Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()}; 30 Core::Frontend::ScopeAcquireContext acquire_context{renderer.GetRenderWindow()};
31 31
32 CommandDataContainer next; 32 CommandDataContainer next;
33 while (state.is_running) { 33 while (state.is_running) {
diff --git a/src/video_core/guest_driver.cpp b/src/video_core/guest_driver.cpp
index 6adef459e..f058f2744 100644
--- a/src/video_core/guest_driver.cpp
+++ b/src/video_core/guest_driver.cpp
@@ -4,13 +4,15 @@
4 4
5#include <algorithm> 5#include <algorithm>
6#include <limits> 6#include <limits>
7#include <vector>
7 8
9#include "common/common_types.h"
8#include "video_core/guest_driver.h" 10#include "video_core/guest_driver.h"
9 11
10namespace VideoCore { 12namespace VideoCore {
11 13
12void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets) { 14void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32> bound_offsets) {
13 if (texture_handler_size_deduced) { 15 if (texture_handler_size) {
14 return; 16 return;
15 } 17 }
16 const std::size_t size = bound_offsets.size(); 18 const std::size_t size = bound_offsets.size();
@@ -29,7 +31,6 @@ void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32>&& bound_offse
29 if (min_val > 2) { 31 if (min_val > 2) {
30 return; 32 return;
31 } 33 }
32 texture_handler_size_deduced = true;
33 texture_handler_size = min_texture_handler_size * min_val; 34 texture_handler_size = min_texture_handler_size * min_val;
34} 35}
35 36
diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h
index fc1917347..99450777e 100644
--- a/src/video_core/guest_driver.h
+++ b/src/video_core/guest_driver.h
@@ -4,6 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <optional>
7#include <vector> 8#include <vector>
8 9
9#include "common/common_types.h" 10#include "common/common_types.h"
@@ -17,25 +18,29 @@ namespace VideoCore {
17 */ 18 */
18class GuestDriverProfile { 19class GuestDriverProfile {
19public: 20public:
20 void DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets); 21 explicit GuestDriverProfile() = default;
22 explicit GuestDriverProfile(std::optional<u32> texture_handler_size)
23 : texture_handler_size{texture_handler_size} {}
24
25 void DeduceTextureHandlerSize(std::vector<u32> bound_offsets);
21 26
22 u32 GetTextureHandlerSize() const { 27 u32 GetTextureHandlerSize() const {
23 return texture_handler_size; 28 return texture_handler_size.value_or(default_texture_handler_size);
24 } 29 }
25 30
26 bool TextureHandlerSizeKnown() const { 31 bool IsTextureHandlerSizeKnown() const {
27 return texture_handler_size_deduced; 32 return texture_handler_size.has_value();
28 } 33 }
29 34
30private: 35private:
31 // Minimum size of texture handler any driver can use. 36 // Minimum size of texture handler any driver can use.
32 static constexpr u32 min_texture_handler_size = 4; 37 static constexpr u32 min_texture_handler_size = 4;
33 // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily 38
34 // use 4 bytes instead. Thus, certain drivers may squish the size. 39 // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily use 4 bytes instead.
40 // Thus, certain drivers may squish the size.
35 static constexpr u32 default_texture_handler_size = 8; 41 static constexpr u32 default_texture_handler_size = 8;
36 42
37 u32 texture_handler_size = default_texture_handler_size; 43 std::optional<u32> texture_handler_size = default_texture_handler_size;
38 bool texture_handler_size_deduced = false;
39}; 44};
40 45
41} // namespace VideoCore 46} // namespace VideoCore
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index aea010087..073bdb491 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -174,7 +174,7 @@ private:
174 /// End of address space, based on address space in bits. 174 /// End of address space, based on address space in bits.
175 static constexpr GPUVAddr address_space_end{1ULL << address_space_width}; 175 static constexpr GPUVAddr address_space_end{1ULL << address_space_width};
176 176
177 Common::PageTable page_table{page_bits}; 177 Common::BackingPageTable page_table{page_bits};
178 VMAMap vma_map; 178 VMAMap vma_map;
179 VideoCore::RasterizerInterface& rasterizer; 179 VideoCore::RasterizerInterface& rasterizer;
180 180
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
index 2f2fe6859..6d522c318 100644
--- a/src/video_core/morton.cpp
+++ b/src/video_core/morton.cpp
@@ -51,6 +51,7 @@ static constexpr ConversionArray morton_to_linear_fns = {
51 MortonCopy<true, PixelFormat::R8UI>, 51 MortonCopy<true, PixelFormat::R8UI>,
52 MortonCopy<true, PixelFormat::RGBA16F>, 52 MortonCopy<true, PixelFormat::RGBA16F>,
53 MortonCopy<true, PixelFormat::RGBA16U>, 53 MortonCopy<true, PixelFormat::RGBA16U>,
54 MortonCopy<true, PixelFormat::RGBA16S>,
54 MortonCopy<true, PixelFormat::RGBA16UI>, 55 MortonCopy<true, PixelFormat::RGBA16UI>,
55 MortonCopy<true, PixelFormat::R11FG11FB10F>, 56 MortonCopy<true, PixelFormat::R11FG11FB10F>,
56 MortonCopy<true, PixelFormat::RGBA32UI>, 57 MortonCopy<true, PixelFormat::RGBA32UI>,
@@ -85,6 +86,7 @@ static constexpr ConversionArray morton_to_linear_fns = {
85 MortonCopy<true, PixelFormat::RG32UI>, 86 MortonCopy<true, PixelFormat::RG32UI>,
86 MortonCopy<true, PixelFormat::RGBX16F>, 87 MortonCopy<true, PixelFormat::RGBX16F>,
87 MortonCopy<true, PixelFormat::R32UI>, 88 MortonCopy<true, PixelFormat::R32UI>,
89 MortonCopy<true, PixelFormat::R32I>,
88 MortonCopy<true, PixelFormat::ASTC_2D_8X8>, 90 MortonCopy<true, PixelFormat::ASTC_2D_8X8>,
89 MortonCopy<true, PixelFormat::ASTC_2D_8X5>, 91 MortonCopy<true, PixelFormat::ASTC_2D_8X5>,
90 MortonCopy<true, PixelFormat::ASTC_2D_5X4>, 92 MortonCopy<true, PixelFormat::ASTC_2D_5X4>,
@@ -130,6 +132,7 @@ static constexpr ConversionArray linear_to_morton_fns = {
130 MortonCopy<false, PixelFormat::R8U>, 132 MortonCopy<false, PixelFormat::R8U>,
131 MortonCopy<false, PixelFormat::R8UI>, 133 MortonCopy<false, PixelFormat::R8UI>,
132 MortonCopy<false, PixelFormat::RGBA16F>, 134 MortonCopy<false, PixelFormat::RGBA16F>,
135 MortonCopy<false, PixelFormat::RGBA16S>,
133 MortonCopy<false, PixelFormat::RGBA16U>, 136 MortonCopy<false, PixelFormat::RGBA16U>,
134 MortonCopy<false, PixelFormat::RGBA16UI>, 137 MortonCopy<false, PixelFormat::RGBA16UI>,
135 MortonCopy<false, PixelFormat::R11FG11FB10F>, 138 MortonCopy<false, PixelFormat::R11FG11FB10F>,
@@ -166,6 +169,7 @@ static constexpr ConversionArray linear_to_morton_fns = {
166 MortonCopy<false, PixelFormat::RG32UI>, 169 MortonCopy<false, PixelFormat::RG32UI>,
167 MortonCopy<false, PixelFormat::RGBX16F>, 170 MortonCopy<false, PixelFormat::RGBX16F>,
168 MortonCopy<false, PixelFormat::R32UI>, 171 MortonCopy<false, PixelFormat::R32UI>,
172 MortonCopy<false, PixelFormat::R32I>,
169 nullptr, 173 nullptr,
170 nullptr, 174 nullptr,
171 nullptr, 175 nullptr,
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index f18eaf4bc..1a68e3caa 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -25,7 +25,6 @@ constexpr std::size_t NumQueryTypes = 1;
25 25
26enum class LoadCallbackStage { 26enum class LoadCallbackStage {
27 Prepare, 27 Prepare,
28 Decompile,
29 Build, 28 Build,
30 Complete, 29 Complete,
31}; 30};
@@ -89,6 +88,9 @@ public:
89 virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false, 88 virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false,
90 const DiskResourceLoadCallback& callback = {}) {} 89 const DiskResourceLoadCallback& callback = {}) {}
91 90
91 /// Initializes renderer dirty flags
92 virtual void SetupDirtyFlags() {}
93
92 /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver. 94 /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
93 GuestDriverProfile& AccessGuestDriverProfile() { 95 GuestDriverProfile& AccessGuestDriverProfile() {
94 return guest_driver_profile; 96 return guest_driver_profile;
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index af1bebc4f..5ec99a126 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -35,15 +35,19 @@ public:
35 explicit RendererBase(Core::Frontend::EmuWindow& window); 35 explicit RendererBase(Core::Frontend::EmuWindow& window);
36 virtual ~RendererBase(); 36 virtual ~RendererBase();
37 37
38 /// Swap buffers (render frame)
39 virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
40
41 /// Initialize the renderer 38 /// Initialize the renderer
42 virtual bool Init() = 0; 39 virtual bool Init() = 0;
43 40
44 /// Shutdown the renderer 41 /// Shutdown the renderer
45 virtual void ShutDown() = 0; 42 virtual void ShutDown() = 0;
46 43
44 /// Finalize rendering the guest frame and draw into the presentation texture
45 virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
46
47 /// Draws the latest frame to the window waiting timeout_ms for a frame to arrive (Renderer
48 /// specific implementation)
49 virtual void TryPresent(int timeout_ms) = 0;
50
47 // Getter/setter functions: 51 // Getter/setter functions:
48 // ------------------------ 52 // ------------------------
49 53
diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp
index 874ed3c6e..b8a512cb6 100644
--- a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp
@@ -11,7 +11,6 @@
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "video_core/engines/maxwell_3d.h" 12#include "video_core/engines/maxwell_3d.h"
13#include "video_core/renderer_opengl/gl_framebuffer_cache.h" 13#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
14#include "video_core/renderer_opengl/gl_state.h"
15 14
16namespace OpenGL { 15namespace OpenGL {
17 16
@@ -36,8 +35,7 @@ OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheK
36 framebuffer.Create(); 35 framebuffer.Create();
37 36
38 // TODO(Rodrigo): Use DSA here after Nvidia fixes their framebuffer DSA bugs. 37 // TODO(Rodrigo): Use DSA here after Nvidia fixes their framebuffer DSA bugs.
39 local_state.draw.draw_framebuffer = framebuffer.handle; 38 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer.handle);
40 local_state.ApplyFramebufferState();
41 39
42 if (key.zeta) { 40 if (key.zeta) {
43 const bool stencil = key.zeta->GetSurfaceParams().type == SurfaceType::DepthStencil; 41 const bool stencil = key.zeta->GetSurfaceParams().type == SurfaceType::DepthStencil;
diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.h b/src/video_core/renderer_opengl/gl_framebuffer_cache.h
index 02ec80ae9..8f698fee0 100644
--- a/src/video_core/renderer_opengl/gl_framebuffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_framebuffer_cache.h
@@ -13,7 +13,6 @@
13#include "common/common_types.h" 13#include "common/common_types.h"
14#include "video_core/engines/maxwell_3d.h" 14#include "video_core/engines/maxwell_3d.h"
15#include "video_core/renderer_opengl/gl_resource_manager.h" 15#include "video_core/renderer_opengl/gl_resource_manager.h"
16#include "video_core/renderer_opengl/gl_state.h"
17#include "video_core/renderer_opengl/gl_texture_cache.h" 16#include "video_core/renderer_opengl/gl_texture_cache.h"
18 17
19namespace OpenGL { 18namespace OpenGL {
@@ -63,7 +62,6 @@ public:
63private: 62private:
64 OGLFramebuffer CreateFramebuffer(const FramebufferCacheKey& key); 63 OGLFramebuffer CreateFramebuffer(const FramebufferCacheKey& key);
65 64
66 OpenGLState local_state;
67 std::unordered_map<FramebufferCacheKey, OGLFramebuffer> cache; 65 std::unordered_map<FramebufferCacheKey, OGLFramebuffer> cache;
68}; 66};
69 67
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index e1965fb21..826eee7df 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -28,7 +28,6 @@
28#include "video_core/renderer_opengl/gl_query_cache.h" 28#include "video_core/renderer_opengl/gl_query_cache.h"
29#include "video_core/renderer_opengl/gl_rasterizer.h" 29#include "video_core/renderer_opengl/gl_rasterizer.h"
30#include "video_core/renderer_opengl/gl_shader_cache.h" 30#include "video_core/renderer_opengl/gl_shader_cache.h"
31#include "video_core/renderer_opengl/gl_shader_gen.h"
32#include "video_core/renderer_opengl/maxwell_to_gl.h" 31#include "video_core/renderer_opengl/maxwell_to_gl.h"
33#include "video_core/renderer_opengl/renderer_opengl.h" 32#include "video_core/renderer_opengl/renderer_opengl.h"
34 33
@@ -36,6 +35,7 @@ namespace OpenGL {
36 35
37using Maxwell = Tegra::Engines::Maxwell3D::Regs; 36using Maxwell = Tegra::Engines::Maxwell3D::Regs;
38 37
38using Tegra::Engines::ShaderType;
39using VideoCore::Surface::PixelFormat; 39using VideoCore::Surface::PixelFormat;
40using VideoCore::Surface::SurfaceTarget; 40using VideoCore::Surface::SurfaceTarget;
41using VideoCore::Surface::SurfaceType; 41using VideoCore::Surface::SurfaceType;
@@ -54,10 +54,11 @@ MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255
54 54
55namespace { 55namespace {
56 56
57constexpr std::size_t NumSupportedVertexAttributes = 16;
58
57template <typename Engine, typename Entry> 59template <typename Engine, typename Entry>
58Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, 60Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
59 Tegra::Engines::ShaderType shader_type, 61 ShaderType shader_type, std::size_t index = 0) {
60 std::size_t index = 0) {
61 if (entry.IsBindless()) { 62 if (entry.IsBindless()) {
62 const Tegra::Texture::TextureHandle tex_handle = 63 const Tegra::Texture::TextureHandle tex_handle =
63 engine.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset()); 64 engine.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset());
@@ -74,7 +75,7 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry
74} 75}
75 76
76std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, 77std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
77 const GLShader::ConstBufferEntry& entry) { 78 const ConstBufferEntry& entry) {
78 if (!entry.IsIndirect()) { 79 if (!entry.IsIndirect()) {
79 return entry.GetSize(); 80 return entry.GetSize();
80 } 81 }
@@ -88,18 +89,19 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
88 return buffer.size; 89 return buffer.size;
89} 90}
90 91
92void oglEnable(GLenum cap, bool state) {
93 (state ? glEnable : glDisable)(cap);
94}
95
91} // Anonymous namespace 96} // Anonymous namespace
92 97
93RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, 98RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
94 ScreenInfo& info) 99 ScreenInfo& info, GLShader::ProgramManager& program_manager,
95 : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device}, 100 StateTracker& state_tracker)
101 : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device, state_tracker},
96 shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, system{system}, 102 shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, system{system},
97 screen_info{info}, buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} { 103 screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker},
98 shader_program_manager = std::make_unique<GLShader::ProgramManager>(); 104 buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} {
99 state.draw.shader_program = 0;
100 state.Apply();
101
102 LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here");
103 CheckExtensions(); 105 CheckExtensions();
104} 106}
105 107
@@ -113,93 +115,72 @@ void RasterizerOpenGL::CheckExtensions() {
113 } 115 }
114} 116}
115 117
116GLuint RasterizerOpenGL::SetupVertexFormat() { 118void RasterizerOpenGL::SetupVertexFormat() {
117 auto& gpu = system.GPU().Maxwell3D(); 119 auto& gpu = system.GPU().Maxwell3D();
118 const auto& regs = gpu.regs; 120 auto& flags = gpu.dirty.flags;
119 121 if (!flags[Dirty::VertexFormats]) {
120 if (!gpu.dirty.vertex_attrib_format) { 122 return;
121 return state.draw.vertex_array;
122 } 123 }
123 gpu.dirty.vertex_attrib_format = false; 124 flags[Dirty::VertexFormats] = false;
124 125
125 MICROPROFILE_SCOPE(OpenGL_VAO); 126 MICROPROFILE_SCOPE(OpenGL_VAO);
126 127
127 auto [iter, is_cache_miss] = vertex_array_cache.try_emplace(regs.vertex_attrib_format); 128 // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. Enables
128 auto& vao_entry = iter->second; 129 // the first 16 vertex attributes always, as we don't know which ones are actually used until
129 130 // shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now to
130 if (is_cache_miss) { 131 // avoid OpenGL errors.
131 vao_entry.Create(); 132 // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't
132 const GLuint vao = vao_entry.handle; 133 // assume every shader uses them all.
133 134 for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) {
134 // Eventhough we are using DSA to create this vertex array, there is a bug on Intel's blob 135 if (!flags[Dirty::VertexFormat0 + index]) {
135 // that fails to properly create the vertex array if it's not bound even after creating it 136 continue;
136 // with glCreateVertexArrays
137 state.draw.vertex_array = vao;
138 state.ApplyVertexArrayState();
139
140 // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL.
141 // Enables the first 16 vertex attributes always, as we don't know which ones are actually
142 // used until shader time. Note, Tegra technically supports 32, but we're capping this to 16
143 // for now to avoid OpenGL errors.
144 // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't
145 // assume every shader uses them all.
146 for (u32 index = 0; index < 16; ++index) {
147 const auto& attrib = regs.vertex_attrib_format[index];
148
149 // Ignore invalid attributes.
150 if (!attrib.IsValid())
151 continue;
152
153 const auto& buffer = regs.vertex_array[attrib.buffer];
154 LOG_TRACE(Render_OpenGL,
155 "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}",
156 index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(),
157 attrib.offset.Value(), attrib.IsNormalized());
158
159 ASSERT(buffer.IsEnabled());
160
161 glEnableVertexArrayAttrib(vao, index);
162 if (attrib.type == Tegra::Engines::Maxwell3D::Regs::VertexAttribute::Type::SignedInt ||
163 attrib.type ==
164 Tegra::Engines::Maxwell3D::Regs::VertexAttribute::Type::UnsignedInt) {
165 glVertexArrayAttribIFormat(vao, index, attrib.ComponentCount(),
166 MaxwellToGL::VertexType(attrib), attrib.offset);
167 } else {
168 glVertexArrayAttribFormat(
169 vao, index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib),
170 attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset);
171 }
172 glVertexArrayAttribBinding(vao, index, attrib.buffer);
173 } 137 }
174 } 138 flags[Dirty::VertexFormat0 + index] = false;
139
140 const auto attrib = gpu.regs.vertex_attrib_format[index];
141 const auto gl_index = static_cast<GLuint>(index);
175 142
176 // Rebinding the VAO invalidates the vertex buffer bindings. 143 // Ignore invalid attributes.
177 gpu.dirty.ResetVertexArrays(); 144 if (!attrib.IsValid()) {
145 glDisableVertexAttribArray(gl_index);
146 continue;
147 }
148 glEnableVertexAttribArray(gl_index);
178 149
179 state.draw.vertex_array = vao_entry.handle; 150 if (attrib.type == Maxwell::VertexAttribute::Type::SignedInt ||
180 return vao_entry.handle; 151 attrib.type == Maxwell::VertexAttribute::Type::UnsignedInt) {
152 glVertexAttribIFormat(gl_index, attrib.ComponentCount(),
153 MaxwellToGL::VertexType(attrib), attrib.offset);
154 } else {
155 glVertexAttribFormat(gl_index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib),
156 attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset);
157 }
158 glVertexAttribBinding(gl_index, attrib.buffer);
159 }
181} 160}
182 161
183void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { 162void RasterizerOpenGL::SetupVertexBuffer() {
184 auto& gpu = system.GPU().Maxwell3D(); 163 auto& gpu = system.GPU().Maxwell3D();
185 if (!gpu.dirty.vertex_array_buffers) 164 auto& flags = gpu.dirty.flags;
165 if (!flags[Dirty::VertexBuffers]) {
186 return; 166 return;
187 gpu.dirty.vertex_array_buffers = false; 167 }
188 168 flags[Dirty::VertexBuffers] = false;
189 const auto& regs = gpu.regs;
190 169
191 MICROPROFILE_SCOPE(OpenGL_VB); 170 MICROPROFILE_SCOPE(OpenGL_VB);
192 171
193 // Upload all guest vertex arrays sequentially to our buffer 172 // Upload all guest vertex arrays sequentially to our buffer
194 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { 173 const auto& regs = gpu.regs;
195 if (!gpu.dirty.vertex_array[index]) 174 for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
175 if (!flags[Dirty::VertexBuffer0 + index]) {
196 continue; 176 continue;
197 gpu.dirty.vertex_array[index] = false; 177 }
198 gpu.dirty.vertex_instance[index] = false; 178 flags[Dirty::VertexBuffer0 + index] = false;
199 179
200 const auto& vertex_array = regs.vertex_array[index]; 180 const auto& vertex_array = regs.vertex_array[index];
201 if (!vertex_array.IsEnabled()) 181 if (!vertex_array.IsEnabled()) {
202 continue; 182 continue;
183 }
203 184
204 const GPUVAddr start = vertex_array.StartAddress(); 185 const GPUVAddr start = vertex_array.StartAddress();
205 const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); 186 const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
@@ -209,42 +190,30 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
209 const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size); 190 const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size);
210 191
211 // Bind the vertex array to the buffer at the current offset. 192 // Bind the vertex array to the buffer at the current offset.
212 vertex_array_pushbuffer.SetVertexBuffer(index, vertex_buffer, vertex_buffer_offset, 193 vertex_array_pushbuffer.SetVertexBuffer(static_cast<GLuint>(index), vertex_buffer,
213 vertex_array.stride); 194 vertex_buffer_offset, vertex_array.stride);
214
215 if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) {
216 // Enable vertex buffer instancing with the specified divisor.
217 glVertexArrayBindingDivisor(vao, index, vertex_array.divisor);
218 } else {
219 // Disable the vertex buffer instancing.
220 glVertexArrayBindingDivisor(vao, index, 0);
221 }
222 } 195 }
223} 196}
224 197
225void RasterizerOpenGL::SetupVertexInstances(GLuint vao) { 198void RasterizerOpenGL::SetupVertexInstances() {
226 auto& gpu = system.GPU().Maxwell3D(); 199 auto& gpu = system.GPU().Maxwell3D();
227 200 auto& flags = gpu.dirty.flags;
228 if (!gpu.dirty.vertex_instances) 201 if (!flags[Dirty::VertexInstances]) {
229 return; 202 return;
230 gpu.dirty.vertex_instances = false; 203 }
204 flags[Dirty::VertexInstances] = false;
231 205
232 const auto& regs = gpu.regs; 206 const auto& regs = gpu.regs;
233 // Upload all guest vertex arrays sequentially to our buffer 207 for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) {
234 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { 208 if (!flags[Dirty::VertexInstance0 + index]) {
235 if (!gpu.dirty.vertex_instance[index])
236 continue; 209 continue;
237
238 gpu.dirty.vertex_instance[index] = false;
239
240 if (regs.instanced_arrays.IsInstancingEnabled(index) &&
241 regs.vertex_array[index].divisor != 0) {
242 // Enable vertex buffer instancing with the specified divisor.
243 glVertexArrayBindingDivisor(vao, index, regs.vertex_array[index].divisor);
244 } else {
245 // Disable the vertex buffer instancing.
246 glVertexArrayBindingDivisor(vao, index, 0);
247 } 210 }
211 flags[Dirty::VertexInstance0 + index] = false;
212
213 const auto gl_index = static_cast<GLuint>(index);
214 const bool instancing_enabled = regs.instanced_arrays.IsInstancingEnabled(gl_index);
215 const GLuint divisor = instancing_enabled ? regs.vertex_array[index].divisor : 0;
216 glVertexBindingDivisor(gl_index, divisor);
248 } 217 }
249} 218}
250 219
@@ -260,8 +229,7 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {
260void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { 229void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
261 MICROPROFILE_SCOPE(OpenGL_Shader); 230 MICROPROFILE_SCOPE(OpenGL_Shader);
262 auto& gpu = system.GPU().Maxwell3D(); 231 auto& gpu = system.GPU().Maxwell3D();
263 232 u32 clip_distances = 0;
264 std::array<bool, Maxwell::NumClipDistances> clip_distances{};
265 233
266 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { 234 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
267 const auto& shader_config = gpu.regs.shader_config[index]; 235 const auto& shader_config = gpu.regs.shader_config[index];
@@ -271,10 +239,10 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
271 if (!gpu.regs.IsShaderConfigEnabled(index)) { 239 if (!gpu.regs.IsShaderConfigEnabled(index)) {
272 switch (program) { 240 switch (program) {
273 case Maxwell::ShaderProgram::Geometry: 241 case Maxwell::ShaderProgram::Geometry:
274 shader_program_manager->UseTrivialGeometryShader(); 242 program_manager.UseGeometryShader(0);
275 break; 243 break;
276 case Maxwell::ShaderProgram::Fragment: 244 case Maxwell::ShaderProgram::Fragment:
277 shader_program_manager->UseTrivialFragmentShader(); 245 program_manager.UseFragmentShader(0);
278 break; 246 break;
279 default: 247 default:
280 break; 248 break;
@@ -299,19 +267,17 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
299 SetupDrawTextures(stage, shader); 267 SetupDrawTextures(stage, shader);
300 SetupDrawImages(stage, shader); 268 SetupDrawImages(stage, shader);
301 269
302 const ProgramVariant variant(primitive_mode); 270 const GLuint program_handle = shader->GetHandle();
303 const auto program_handle = shader->GetHandle(variant);
304
305 switch (program) { 271 switch (program) {
306 case Maxwell::ShaderProgram::VertexA: 272 case Maxwell::ShaderProgram::VertexA:
307 case Maxwell::ShaderProgram::VertexB: 273 case Maxwell::ShaderProgram::VertexB:
308 shader_program_manager->UseProgrammableVertexShader(program_handle); 274 program_manager.UseVertexShader(program_handle);
309 break; 275 break;
310 case Maxwell::ShaderProgram::Geometry: 276 case Maxwell::ShaderProgram::Geometry:
311 shader_program_manager->UseProgrammableGeometryShader(program_handle); 277 program_manager.UseGeometryShader(program_handle);
312 break; 278 break;
313 case Maxwell::ShaderProgram::Fragment: 279 case Maxwell::ShaderProgram::Fragment:
314 shader_program_manager->UseProgrammableFragmentShader(program_handle); 280 program_manager.UseFragmentShader(program_handle);
315 break; 281 break;
316 default: 282 default:
317 UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, 283 UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
@@ -322,9 +288,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
322 // When a clip distance is enabled but not set in the shader it crops parts of the screen 288 // When a clip distance is enabled but not set in the shader it crops parts of the screen
323 // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the 289 // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
324 // clip distances only when it's written by a shader stage. 290 // clip distances only when it's written by a shader stage.
325 for (std::size_t i = 0; i < Maxwell::NumClipDistances; ++i) { 291 clip_distances |= shader->GetEntries().clip_distances;
326 clip_distances[i] = clip_distances[i] || shader->GetShaderEntries().clip_distances[i];
327 }
328 292
329 // When VertexA is enabled, we have dual vertex shaders 293 // When VertexA is enabled, we have dual vertex shaders
330 if (program == Maxwell::ShaderProgram::VertexA) { 294 if (program == Maxwell::ShaderProgram::VertexA) {
@@ -334,8 +298,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
334 } 298 }
335 299
336 SyncClipEnabled(clip_distances); 300 SyncClipEnabled(clip_distances);
337 301 gpu.dirty.flags[Dirty::Shaders] = false;
338 gpu.dirty.shaders = false;
339} 302}
340 303
341std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { 304std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
@@ -368,20 +331,23 @@ void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading,
368 shader_cache.LoadDiskCache(stop_loading, callback); 331 shader_cache.LoadDiskCache(stop_loading, callback);
369} 332}
370 333
334void RasterizerOpenGL::SetupDirtyFlags() {
335 state_tracker.Initialize();
336}
337
371void RasterizerOpenGL::ConfigureFramebuffers() { 338void RasterizerOpenGL::ConfigureFramebuffers() {
372 MICROPROFILE_SCOPE(OpenGL_Framebuffer); 339 MICROPROFILE_SCOPE(OpenGL_Framebuffer);
373 auto& gpu = system.GPU().Maxwell3D(); 340 auto& gpu = system.GPU().Maxwell3D();
374 if (!gpu.dirty.render_settings) { 341 if (!gpu.dirty.flags[VideoCommon::Dirty::RenderTargets]) {
375 return; 342 return;
376 } 343 }
377 gpu.dirty.render_settings = false; 344 gpu.dirty.flags[VideoCommon::Dirty::RenderTargets] = false;
378 345
379 texture_cache.GuardRenderTargets(true); 346 texture_cache.GuardRenderTargets(true);
380 347
381 View depth_surface = texture_cache.GetDepthBufferSurface(true); 348 View depth_surface = texture_cache.GetDepthBufferSurface(true);
382 349
383 const auto& regs = gpu.regs; 350 const auto& regs = gpu.regs;
384 state.framebuffer_srgb.enabled = regs.framebuffer_srgb != 0;
385 UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0); 351 UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
386 352
387 // Bind the framebuffer surfaces 353 // Bind the framebuffer surfaces
@@ -409,14 +375,11 @@ void RasterizerOpenGL::ConfigureFramebuffers() {
409 375
410 texture_cache.GuardRenderTargets(false); 376 texture_cache.GuardRenderTargets(false);
411 377
412 state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(key); 378 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key));
413 SyncViewport(state);
414} 379}
415 380
416void RasterizerOpenGL::ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb, 381void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color_fb, bool using_depth_fb,
417 bool using_depth_fb, bool using_stencil_fb) { 382 bool using_stencil_fb) {
418 using VideoCore::Surface::SurfaceType;
419
420 auto& gpu = system.GPU().Maxwell3D(); 383 auto& gpu = system.GPU().Maxwell3D();
421 const auto& regs = gpu.regs; 384 const auto& regs = gpu.regs;
422 385
@@ -435,80 +398,44 @@ void RasterizerOpenGL::ConfigureClearFramebuffer(OpenGLState& current_state, boo
435 key.colors[0] = color_surface; 398 key.colors[0] = color_surface;
436 key.zeta = depth_surface; 399 key.zeta = depth_surface;
437 400
438 current_state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(key); 401 state_tracker.NotifyFramebuffer();
439 current_state.ApplyFramebufferState(); 402 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key));
440} 403}
441 404
442void RasterizerOpenGL::Clear() { 405void RasterizerOpenGL::Clear() {
443 const auto& maxwell3d = system.GPU().Maxwell3D(); 406 const auto& gpu = system.GPU().Maxwell3D();
444 407 if (!gpu.ShouldExecute()) {
445 if (!maxwell3d.ShouldExecute()) {
446 return; 408 return;
447 } 409 }
448 410
449 const auto& regs = maxwell3d.regs; 411 const auto& regs = gpu.regs;
450 bool use_color{}; 412 bool use_color{};
451 bool use_depth{}; 413 bool use_depth{};
452 bool use_stencil{}; 414 bool use_stencil{};
453 415
454 OpenGLState prev_state{OpenGLState::GetCurState()};
455 SCOPE_EXIT({
456 prev_state.AllDirty();
457 prev_state.Apply();
458 });
459
460 OpenGLState clear_state{OpenGLState::GetCurState()};
461 clear_state.SetDefaultViewports();
462 if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || 416 if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
463 regs.clear_buffers.A) { 417 regs.clear_buffers.A) {
464 use_color = true; 418 use_color = true;
465 } 419 }
466 if (use_color) { 420 if (use_color) {
467 clear_state.color_mask[0].red_enabled = regs.clear_buffers.R ? GL_TRUE : GL_FALSE; 421 state_tracker.NotifyColorMask0();
468 clear_state.color_mask[0].green_enabled = regs.clear_buffers.G ? GL_TRUE : GL_FALSE; 422 glColorMaski(0, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0,
469 clear_state.color_mask[0].blue_enabled = regs.clear_buffers.B ? GL_TRUE : GL_FALSE; 423 regs.clear_buffers.B != 0, regs.clear_buffers.A != 0);
470 clear_state.color_mask[0].alpha_enabled = regs.clear_buffers.A ? GL_TRUE : GL_FALSE; 424
425 // TODO(Rodrigo): Determine if clamping is used on clears
426 SyncFragmentColorClampState();
427 SyncFramebufferSRGB();
471 } 428 }
472 if (regs.clear_buffers.Z) { 429 if (regs.clear_buffers.Z) {
473 ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear Z but buffer is not enabled!"); 430 ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear Z but buffer is not enabled!");
474 use_depth = true; 431 use_depth = true;
475 432
476 // Always enable the depth write when clearing the depth buffer. The depth write mask is 433 state_tracker.NotifyDepthMask();
477 // ignored when clearing the buffer in the Switch, but OpenGL obeys it so we set it to 434 glDepthMask(GL_TRUE);
478 // true.
479 clear_state.depth.test_enabled = true;
480 clear_state.depth.test_func = GL_ALWAYS;
481 clear_state.depth.write_mask = GL_TRUE;
482 } 435 }
483 if (regs.clear_buffers.S) { 436 if (regs.clear_buffers.S) {
484 ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!"); 437 ASSERT_MSG(regs.zeta_enable, "Tried to clear stencil but buffer is not enabled!");
485 use_stencil = true; 438 use_stencil = true;
486 clear_state.stencil.test_enabled = true;
487
488 if (regs.clear_flags.stencil) {
489 // Stencil affects the clear so fill it with the used masks
490 clear_state.stencil.front.test_func = GL_ALWAYS;
491 clear_state.stencil.front.test_mask = regs.stencil_front_func_mask;
492 clear_state.stencil.front.action_stencil_fail = GL_KEEP;
493 clear_state.stencil.front.action_depth_fail = GL_KEEP;
494 clear_state.stencil.front.action_depth_pass = GL_KEEP;
495 clear_state.stencil.front.write_mask = regs.stencil_front_mask;
496 if (regs.stencil_two_side_enable) {
497 clear_state.stencil.back.test_func = GL_ALWAYS;
498 clear_state.stencil.back.test_mask = regs.stencil_back_func_mask;
499 clear_state.stencil.back.action_stencil_fail = GL_KEEP;
500 clear_state.stencil.back.action_depth_fail = GL_KEEP;
501 clear_state.stencil.back.action_depth_pass = GL_KEEP;
502 clear_state.stencil.back.write_mask = regs.stencil_back_mask;
503 } else {
504 clear_state.stencil.back.test_func = GL_ALWAYS;
505 clear_state.stencil.back.test_mask = 0xFFFFFFFF;
506 clear_state.stencil.back.write_mask = 0xFFFFFFFF;
507 clear_state.stencil.back.action_stencil_fail = GL_KEEP;
508 clear_state.stencil.back.action_depth_fail = GL_KEEP;
509 clear_state.stencil.back.action_depth_pass = GL_KEEP;
510 }
511 }
512 } 439 }
513 440
514 if (!use_color && !use_depth && !use_stencil) { 441 if (!use_color && !use_depth && !use_stencil) {
@@ -516,20 +443,18 @@ void RasterizerOpenGL::Clear() {
516 return; 443 return;
517 } 444 }
518 445
519 ConfigureClearFramebuffer(clear_state, use_color, use_depth, use_stencil); 446 SyncRasterizeEnable();
520 447
521 SyncViewport(clear_state);
522 SyncRasterizeEnable(clear_state);
523 if (regs.clear_flags.scissor) { 448 if (regs.clear_flags.scissor) {
524 SyncScissorTest(clear_state); 449 SyncScissorTest();
450 } else {
451 state_tracker.NotifyScissor0();
452 glDisablei(GL_SCISSOR_TEST, 0);
525 } 453 }
526 454
527 if (regs.clear_flags.viewport) { 455 UNIMPLEMENTED_IF(regs.clear_flags.viewport);
528 clear_state.EmulateViewportWithScissor();
529 }
530 456
531 clear_state.AllDirty(); 457 ConfigureClearFramebuffer(use_color, use_depth, use_stencil);
532 clear_state.Apply();
533 458
534 if (use_color) { 459 if (use_color) {
535 glClearBufferfv(GL_COLOR, 0, regs.clear_color); 460 glClearBufferfv(GL_COLOR, 0, regs.clear_color);
@@ -549,25 +474,27 @@ void RasterizerOpenGL::Clear() {
549void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { 474void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
550 MICROPROFILE_SCOPE(OpenGL_Drawing); 475 MICROPROFILE_SCOPE(OpenGL_Drawing);
551 auto& gpu = system.GPU().Maxwell3D(); 476 auto& gpu = system.GPU().Maxwell3D();
552 const auto& regs = gpu.regs;
553 477
554 query_cache.UpdateCounters(); 478 query_cache.UpdateCounters();
555 479
556 SyncRasterizeEnable(state); 480 SyncViewport();
481 SyncRasterizeEnable();
482 SyncPolygonModes();
557 SyncColorMask(); 483 SyncColorMask();
558 SyncFragmentColorClampState(); 484 SyncFragmentColorClampState();
559 SyncMultiSampleState(); 485 SyncMultiSampleState();
560 SyncDepthTestState(); 486 SyncDepthTestState();
487 SyncDepthClamp();
561 SyncStencilTestState(); 488 SyncStencilTestState();
562 SyncBlendState(); 489 SyncBlendState();
563 SyncLogicOpState(); 490 SyncLogicOpState();
564 SyncCullMode(); 491 SyncCullMode();
565 SyncPrimitiveRestart(); 492 SyncPrimitiveRestart();
566 SyncScissorTest(state); 493 SyncScissorTest();
567 SyncTransformFeedback();
568 SyncPointState(); 494 SyncPointState();
569 SyncPolygonOffset(); 495 SyncPolygonOffset();
570 SyncAlphaTest(); 496 SyncAlphaTest();
497 SyncFramebufferSRGB();
571 498
572 buffer_cache.Acquire(); 499 buffer_cache.Acquire();
573 500
@@ -591,14 +518,13 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
591 buffer_cache.Map(buffer_size); 518 buffer_cache.Map(buffer_size);
592 519
593 // Prepare vertex array format. 520 // Prepare vertex array format.
594 const GLuint vao = SetupVertexFormat(); 521 SetupVertexFormat();
595 vertex_array_pushbuffer.Setup(vao); 522 vertex_array_pushbuffer.Setup();
596 523
597 // Upload vertex and index data. 524 // Upload vertex and index data.
598 SetupVertexBuffer(vao); 525 SetupVertexBuffer();
599 SetupVertexInstances(vao); 526 SetupVertexInstances();
600 527 GLintptr index_buffer_offset = 0;
601 GLintptr index_buffer_offset;
602 if (is_indexed) { 528 if (is_indexed) {
603 index_buffer_offset = SetupIndexBuffer(); 529 index_buffer_offset = SetupIndexBuffer();
604 } 530 }
@@ -624,27 +550,20 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
624 ConfigureFramebuffers(); 550 ConfigureFramebuffers();
625 551
626 // Signal the buffer cache that we are not going to upload more things. 552 // Signal the buffer cache that we are not going to upload more things.
627 const bool invalidate = buffer_cache.Unmap(); 553 buffer_cache.Unmap();
628 554
629 // Now that we are no longer uploading data, we can safely bind the buffers to OpenGL. 555 // Now that we are no longer uploading data, we can safely bind the buffers to OpenGL.
630 vertex_array_pushbuffer.Bind(); 556 vertex_array_pushbuffer.Bind();
631 bind_ubo_pushbuffer.Bind(); 557 bind_ubo_pushbuffer.Bind();
632 bind_ssbo_pushbuffer.Bind(); 558 bind_ssbo_pushbuffer.Bind();
633 559
634 if (invalidate) { 560 program_manager.BindGraphicsPipeline();
635 // As all cached buffers are invalidated, we need to recheck their state.
636 gpu.dirty.ResetVertexArrays();
637 }
638 gpu.dirty.memory_general = false;
639
640 shader_program_manager->ApplyTo(state);
641 state.Apply();
642 561
643 if (texture_cache.TextureBarrier()) { 562 if (texture_cache.TextureBarrier()) {
644 glTextureBarrier(); 563 glTextureBarrier();
645 } 564 }
646 565
647 ++num_queued_commands; 566 BeginTransformFeedback(primitive_mode);
648 567
649 const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance); 568 const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance);
650 const GLsizei num_instances = 569 const GLsizei num_instances =
@@ -683,6 +602,10 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
683 num_instances, base_instance); 602 num_instances, base_instance);
684 } 603 }
685 } 604 }
605
606 EndTransformFeedback();
607
608 ++num_queued_commands;
686} 609}
687 610
688void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { 611void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
@@ -695,13 +618,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
695 auto kernel = shader_cache.GetComputeKernel(code_addr); 618 auto kernel = shader_cache.GetComputeKernel(code_addr);
696 SetupComputeTextures(kernel); 619 SetupComputeTextures(kernel);
697 SetupComputeImages(kernel); 620 SetupComputeImages(kernel);
698 621 program_manager.BindComputeShader(kernel->GetHandle());
699 const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
700 const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y,
701 launch_desc.block_dim_z, launch_desc.shared_alloc,
702 launch_desc.local_pos_alloc);
703 state.draw.shader_program = kernel->GetHandle(variant);
704 state.draw.program_pipeline = 0;
705 622
706 const std::size_t buffer_size = 623 const std::size_t buffer_size =
707 Tegra::Engines::KeplerCompute::NumConstBuffers * 624 Tegra::Engines::KeplerCompute::NumConstBuffers *
@@ -719,11 +636,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
719 bind_ubo_pushbuffer.Bind(); 636 bind_ubo_pushbuffer.Bind();
720 bind_ssbo_pushbuffer.Bind(); 637 bind_ssbo_pushbuffer.Bind();
721 638
722 state.ApplyTextures(); 639 const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
723 state.ApplyImages();
724 state.ApplyShaderProgram();
725 state.ApplyProgramPipeline();
726
727 glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); 640 glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
728 ++num_queued_commands; 641 ++num_queued_commands;
729} 642}
@@ -828,7 +741,7 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shad
828 const auto& shader_stage = stages[stage_index]; 741 const auto& shader_stage = stages[stage_index];
829 742
830 u32 binding = device.GetBaseBindings(stage_index).uniform_buffer; 743 u32 binding = device.GetBaseBindings(stage_index).uniform_buffer;
831 for (const auto& entry : shader->GetShaderEntries().const_buffers) { 744 for (const auto& entry : shader->GetEntries().const_buffers) {
832 const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; 745 const auto& buffer = shader_stage.const_buffers[entry.GetIndex()];
833 SetupConstBuffer(binding++, buffer, entry); 746 SetupConstBuffer(binding++, buffer, entry);
834 } 747 }
@@ -839,7 +752,7 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
839 const auto& launch_desc = system.GPU().KeplerCompute().launch_description; 752 const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
840 753
841 u32 binding = 0; 754 u32 binding = 0;
842 for (const auto& entry : kernel->GetShaderEntries().const_buffers) { 755 for (const auto& entry : kernel->GetEntries().const_buffers) {
843 const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; 756 const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
844 const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); 757 const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
845 Tegra::Engines::ConstBufferInfo buffer; 758 Tegra::Engines::ConstBufferInfo buffer;
@@ -851,7 +764,7 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
851} 764}
852 765
853void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, 766void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
854 const GLShader::ConstBufferEntry& entry) { 767 const ConstBufferEntry& entry) {
855 if (!buffer.enabled) { 768 if (!buffer.enabled) {
856 // Set values to zero to unbind buffers 769 // Set values to zero to unbind buffers
857 bind_ubo_pushbuffer.Push(binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0, 770 bind_ubo_pushbuffer.Push(binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0,
@@ -875,7 +788,7 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shad
875 const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; 788 const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};
876 789
877 u32 binding = device.GetBaseBindings(stage_index).shader_storage_buffer; 790 u32 binding = device.GetBaseBindings(stage_index).shader_storage_buffer;
878 for (const auto& entry : shader->GetShaderEntries().global_memory_entries) { 791 for (const auto& entry : shader->GetEntries().global_memory_entries) {
879 const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()}; 792 const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()};
880 const auto gpu_addr{memory_manager.Read<u64>(addr)}; 793 const auto gpu_addr{memory_manager.Read<u64>(addr)};
881 const auto size{memory_manager.Read<u32>(addr + 8)}; 794 const auto size{memory_manager.Read<u32>(addr + 8)};
@@ -889,7 +802,7 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) {
889 const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; 802 const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
890 803
891 u32 binding = 0; 804 u32 binding = 0;
892 for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) { 805 for (const auto& entry : kernel->GetEntries().global_memory_entries) {
893 const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; 806 const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()};
894 const auto gpu_addr{memory_manager.Read<u64>(addr)}; 807 const auto gpu_addr{memory_manager.Read<u64>(addr)};
895 const auto size{memory_manager.Read<u32>(addr + 8)}; 808 const auto size{memory_manager.Read<u32>(addr + 8)};
@@ -897,7 +810,7 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) {
897 } 810 }
898} 811}
899 812
900void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry, 813void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
901 GPUVAddr gpu_addr, std::size_t size) { 814 GPUVAddr gpu_addr, std::size_t size) {
902 const auto alignment{device.GetShaderStorageBufferAlignment()}; 815 const auto alignment{device.GetShaderStorageBufferAlignment()};
903 const auto [ssbo, buffer_offset] = 816 const auto [ssbo, buffer_offset] =
@@ -909,16 +822,11 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader&
909 MICROPROFILE_SCOPE(OpenGL_Texture); 822 MICROPROFILE_SCOPE(OpenGL_Texture);
910 const auto& maxwell3d = system.GPU().Maxwell3D(); 823 const auto& maxwell3d = system.GPU().Maxwell3D();
911 u32 binding = device.GetBaseBindings(stage_index).sampler; 824 u32 binding = device.GetBaseBindings(stage_index).sampler;
912 for (const auto& entry : shader->GetShaderEntries().samplers) { 825 for (const auto& entry : shader->GetEntries().samplers) {
913 const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index); 826 const auto shader_type = static_cast<ShaderType>(stage_index);
914 if (!entry.IsIndexed()) { 827 for (std::size_t i = 0; i < entry.Size(); ++i) {
915 const auto texture = GetTextureInfo(maxwell3d, entry, shader_type); 828 const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i);
916 SetupTexture(binding++, texture, entry); 829 SetupTexture(binding++, texture, entry);
917 } else {
918 for (std::size_t i = 0; i < entry.Size(); ++i) {
919 const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i);
920 SetupTexture(binding++, texture, entry);
921 }
922 } 830 }
923 } 831 }
924} 832}
@@ -927,46 +835,39 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) {
927 MICROPROFILE_SCOPE(OpenGL_Texture); 835 MICROPROFILE_SCOPE(OpenGL_Texture);
928 const auto& compute = system.GPU().KeplerCompute(); 836 const auto& compute = system.GPU().KeplerCompute();
929 u32 binding = 0; 837 u32 binding = 0;
930 for (const auto& entry : kernel->GetShaderEntries().samplers) { 838 for (const auto& entry : kernel->GetEntries().samplers) {
931 if (!entry.IsIndexed()) { 839 for (std::size_t i = 0; i < entry.Size(); ++i) {
932 const auto texture = 840 const auto texture = GetTextureInfo(compute, entry, ShaderType::Compute, i);
933 GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute);
934 SetupTexture(binding++, texture, entry); 841 SetupTexture(binding++, texture, entry);
935 } else {
936 for (std::size_t i = 0; i < entry.Size(); ++i) {
937 const auto texture =
938 GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute, i);
939 SetupTexture(binding++, texture, entry);
940 }
941 } 842 }
942 } 843 }
943} 844}
944 845
945void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, 846void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
946 const GLShader::SamplerEntry& entry) { 847 const SamplerEntry& entry) {
947 const auto view = texture_cache.GetTextureSurface(texture.tic, entry); 848 const auto view = texture_cache.GetTextureSurface(texture.tic, entry);
948 if (!view) { 849 if (!view) {
949 // Can occur when texture addr is null or its memory is unmapped/invalid 850 // Can occur when texture addr is null or its memory is unmapped/invalid
950 state.samplers[binding] = 0; 851 glBindSampler(binding, 0);
951 state.textures[binding] = 0; 852 glBindTextureUnit(binding, 0);
952 return; 853 return;
953 } 854 }
954 state.textures[binding] = view->GetTexture(); 855 glBindTextureUnit(binding, view->GetTexture());
955 856
956 if (view->GetSurfaceParams().IsBuffer()) { 857 if (view->GetSurfaceParams().IsBuffer()) {
957 return; 858 return;
958 } 859 }
959 state.samplers[binding] = sampler_cache.GetSampler(texture.tsc);
960
961 // Apply swizzle to textures that are not buffers. 860 // Apply swizzle to textures that are not buffers.
962 view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, 861 view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
963 texture.tic.w_source); 862 texture.tic.w_source);
863
864 glBindSampler(binding, sampler_cache.GetSampler(texture.tsc));
964} 865}
965 866
966void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) { 867void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) {
967 const auto& maxwell3d = system.GPU().Maxwell3D(); 868 const auto& maxwell3d = system.GPU().Maxwell3D();
968 u32 binding = device.GetBaseBindings(stage_index).image; 869 u32 binding = device.GetBaseBindings(stage_index).image;
969 for (const auto& entry : shader->GetShaderEntries().images) { 870 for (const auto& entry : shader->GetEntries().images) {
970 const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index); 871 const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index);
971 const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic; 872 const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic;
972 SetupImage(binding++, tic, entry); 873 SetupImage(binding++, tic, entry);
@@ -976,17 +877,17 @@ void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& sh
976void RasterizerOpenGL::SetupComputeImages(const Shader& shader) { 877void RasterizerOpenGL::SetupComputeImages(const Shader& shader) {
977 const auto& compute = system.GPU().KeplerCompute(); 878 const auto& compute = system.GPU().KeplerCompute();
978 u32 binding = 0; 879 u32 binding = 0;
979 for (const auto& entry : shader->GetShaderEntries().images) { 880 for (const auto& entry : shader->GetEntries().images) {
980 const auto tic = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute).tic; 881 const auto tic = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute).tic;
981 SetupImage(binding++, tic, entry); 882 SetupImage(binding++, tic, entry);
982 } 883 }
983} 884}
984 885
985void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, 886void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic,
986 const GLShader::ImageEntry& entry) { 887 const ImageEntry& entry) {
987 const auto view = texture_cache.GetImageSurface(tic, entry); 888 const auto view = texture_cache.GetImageSurface(tic, entry);
988 if (!view) { 889 if (!view) {
989 state.images[binding] = 0; 890 glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8);
990 return; 891 return;
991 } 892 }
992 if (!tic.IsBuffer()) { 893 if (!tic.IsBuffer()) {
@@ -995,55 +896,87 @@ void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& t
995 if (entry.IsWritten()) { 896 if (entry.IsWritten()) {
996 view->MarkAsModified(texture_cache.Tick()); 897 view->MarkAsModified(texture_cache.Tick());
997 } 898 }
998 state.images[binding] = view->GetTexture(); 899 glBindImageTexture(binding, view->GetTexture(), 0, GL_TRUE, 0, GL_READ_WRITE,
900 view->GetFormat());
999} 901}
1000 902
1001void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { 903void RasterizerOpenGL::SyncViewport() {
1002 const auto& regs = system.GPU().Maxwell3D().regs; 904 auto& gpu = system.GPU().Maxwell3D();
1003 const bool geometry_shaders_enabled = 905 auto& flags = gpu.dirty.flags;
1004 regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry)); 906 const auto& regs = gpu.regs;
1005 const std::size_t viewport_count = 907
1006 geometry_shaders_enabled ? Tegra::Engines::Maxwell3D::Regs::NumViewports : 1; 908 const bool dirty_viewport = flags[Dirty::Viewports];
1007 for (std::size_t i = 0; i < viewport_count; i++) { 909 if (dirty_viewport || flags[Dirty::ClipControl]) {
1008 auto& viewport = current_state.viewports[i]; 910 flags[Dirty::ClipControl] = false;
1009 const auto& src = regs.viewports[i]; 911
1010 const Common::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()}; 912 bool flip_y = false;
1011 viewport.x = viewport_rect.left; 913 if (regs.viewport_transform[0].scale_y < 0.0) {
1012 viewport.y = viewport_rect.bottom; 914 flip_y = !flip_y;
1013 viewport.width = viewport_rect.GetWidth(); 915 }
1014 viewport.height = viewport_rect.GetHeight(); 916 if (regs.screen_y_control.y_negate != 0) {
1015 viewport.depth_range_far = src.depth_range_far; 917 flip_y = !flip_y;
1016 viewport.depth_range_near = src.depth_range_near; 918 }
1017 } 919 glClipControl(flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT,
1018 state.depth_clamp.far_plane = regs.view_volume_clip_control.depth_clamp_far != 0; 920 regs.depth_mode == Maxwell::DepthMode::ZeroToOne ? GL_ZERO_TO_ONE
1019 state.depth_clamp.near_plane = regs.view_volume_clip_control.depth_clamp_near != 0; 921 : GL_NEGATIVE_ONE_TO_ONE);
1020 922 }
1021 bool flip_y = false; 923
1022 if (regs.viewport_transform[0].scale_y < 0.0) { 924 if (dirty_viewport) {
1023 flip_y = !flip_y; 925 flags[Dirty::Viewports] = false;
1024 } 926
1025 if (regs.screen_y_control.y_negate != 0) { 927 const bool force = flags[Dirty::ViewportTransform];
1026 flip_y = !flip_y; 928 flags[Dirty::ViewportTransform] = false;
1027 } 929
1028 state.clip_control.origin = flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT; 930 for (std::size_t i = 0; i < Maxwell::NumViewports; ++i) {
1029 state.clip_control.depth_mode = 931 if (!force && !flags[Dirty::Viewport0 + i]) {
1030 regs.depth_mode == Tegra::Engines::Maxwell3D::Regs::DepthMode::ZeroToOne 932 continue;
1031 ? GL_ZERO_TO_ONE 933 }
1032 : GL_NEGATIVE_ONE_TO_ONE; 934 flags[Dirty::Viewport0 + i] = false;
935
936 const auto& src = regs.viewport_transform[i];
937 const Common::Rectangle<f32> rect{src.GetRect()};
938 glViewportIndexedf(static_cast<GLuint>(i), rect.left, rect.bottom, rect.GetWidth(),
939 rect.GetHeight());
940
941 const GLdouble reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne;
942 const GLdouble near_depth = src.translate_z - src.scale_z * reduce_z;
943 const GLdouble far_depth = src.translate_z + src.scale_z;
944 glDepthRangeIndexed(static_cast<GLuint>(i), near_depth, far_depth);
945 }
946 }
1033} 947}
1034 948
1035void RasterizerOpenGL::SyncClipEnabled( 949void RasterizerOpenGL::SyncDepthClamp() {
1036 const std::array<bool, Maxwell::Regs::NumClipDistances>& clip_mask) { 950 auto& gpu = system.GPU().Maxwell3D();
951 auto& flags = gpu.dirty.flags;
952 if (!flags[Dirty::DepthClampEnabled]) {
953 return;
954 }
955 flags[Dirty::DepthClampEnabled] = false;
1037 956
1038 const auto& regs = system.GPU().Maxwell3D().regs; 957 const auto& state = gpu.regs.view_volume_clip_control;
1039 const std::array<bool, Maxwell::Regs::NumClipDistances> reg_state{ 958 UNIMPLEMENTED_IF_MSG(state.depth_clamp_far != state.depth_clamp_near,
1040 regs.clip_distance_enabled.c0 != 0, regs.clip_distance_enabled.c1 != 0, 959 "Unimplemented depth clamp separation!");
1041 regs.clip_distance_enabled.c2 != 0, regs.clip_distance_enabled.c3 != 0, 960
1042 regs.clip_distance_enabled.c4 != 0, regs.clip_distance_enabled.c5 != 0, 961 oglEnable(GL_DEPTH_CLAMP, state.depth_clamp_far || state.depth_clamp_near);
1043 regs.clip_distance_enabled.c6 != 0, regs.clip_distance_enabled.c7 != 0}; 962}
963
964void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) {
965 auto& gpu = system.GPU().Maxwell3D();
966 auto& flags = gpu.dirty.flags;
967 if (!flags[Dirty::ClipDistances] && !flags[Dirty::Shaders]) {
968 return;
969 }
970 flags[Dirty::ClipDistances] = false;
971
972 clip_mask &= gpu.regs.clip_distance_enabled;
973 if (clip_mask == last_clip_distance_mask) {
974 return;
975 }
976 last_clip_distance_mask = clip_mask;
1044 977
1045 for (std::size_t i = 0; i < Maxwell::Regs::NumClipDistances; ++i) { 978 for (std::size_t i = 0; i < Maxwell::Regs::NumClipDistances; ++i) {
1046 state.clip_distance[i] = reg_state[i] && clip_mask[i]; 979 oglEnable(static_cast<GLenum>(GL_CLIP_DISTANCE0 + i), (clip_mask >> i) & 1);
1047 } 980 }
1048} 981}
1049 982
@@ -1052,247 +985,442 @@ void RasterizerOpenGL::SyncClipCoef() {
1052} 985}
1053 986
1054void RasterizerOpenGL::SyncCullMode() { 987void RasterizerOpenGL::SyncCullMode() {
1055 const auto& regs = system.GPU().Maxwell3D().regs; 988 auto& gpu = system.GPU().Maxwell3D();
989 auto& flags = gpu.dirty.flags;
990 const auto& regs = gpu.regs;
1056 991
1057 state.cull.enabled = regs.cull.enabled != 0; 992 if (flags[Dirty::CullTest]) {
1058 if (state.cull.enabled) { 993 flags[Dirty::CullTest] = false;
1059 state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face); 994
995 if (regs.cull_test_enabled) {
996 glEnable(GL_CULL_FACE);
997 glCullFace(MaxwellToGL::CullFace(regs.cull_face));
998 } else {
999 glDisable(GL_CULL_FACE);
1000 }
1060 } 1001 }
1061 1002
1062 state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face); 1003 if (flags[Dirty::FrontFace]) {
1004 flags[Dirty::FrontFace] = false;
1005 glFrontFace(MaxwellToGL::FrontFace(regs.front_face));
1006 }
1063} 1007}
1064 1008
1065void RasterizerOpenGL::SyncPrimitiveRestart() { 1009void RasterizerOpenGL::SyncPrimitiveRestart() {
1066 const auto& regs = system.GPU().Maxwell3D().regs; 1010 auto& gpu = system.GPU().Maxwell3D();
1011 auto& flags = gpu.dirty.flags;
1012 if (!flags[Dirty::PrimitiveRestart]) {
1013 return;
1014 }
1015 flags[Dirty::PrimitiveRestart] = false;
1067 1016
1068 state.primitive_restart.enabled = regs.primitive_restart.enabled; 1017 if (gpu.regs.primitive_restart.enabled) {
1069 state.primitive_restart.index = regs.primitive_restart.index; 1018 glEnable(GL_PRIMITIVE_RESTART);
1019 glPrimitiveRestartIndex(gpu.regs.primitive_restart.index);
1020 } else {
1021 glDisable(GL_PRIMITIVE_RESTART);
1022 }
1070} 1023}
1071 1024
1072void RasterizerOpenGL::SyncDepthTestState() { 1025void RasterizerOpenGL::SyncDepthTestState() {
1073 const auto& regs = system.GPU().Maxwell3D().regs; 1026 auto& gpu = system.GPU().Maxwell3D();
1074 1027 auto& flags = gpu.dirty.flags;
1075 state.depth.test_enabled = regs.depth_test_enable != 0;
1076 state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE;
1077 1028
1078 if (!state.depth.test_enabled) { 1029 const auto& regs = gpu.regs;
1079 return; 1030 if (flags[Dirty::DepthMask]) {
1031 flags[Dirty::DepthMask] = false;
1032 glDepthMask(regs.depth_write_enabled ? GL_TRUE : GL_FALSE);
1080 } 1033 }
1081 1034
1082 state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func); 1035 if (flags[Dirty::DepthTest]) {
1036 flags[Dirty::DepthTest] = false;
1037 if (regs.depth_test_enable) {
1038 glEnable(GL_DEPTH_TEST);
1039 glDepthFunc(MaxwellToGL::ComparisonOp(regs.depth_test_func));
1040 } else {
1041 glDisable(GL_DEPTH_TEST);
1042 }
1043 }
1083} 1044}
1084 1045
1085void RasterizerOpenGL::SyncStencilTestState() { 1046void RasterizerOpenGL::SyncStencilTestState() {
1086 auto& maxwell3d = system.GPU().Maxwell3D(); 1047 auto& gpu = system.GPU().Maxwell3D();
1087 if (!maxwell3d.dirty.stencil_test) { 1048 auto& flags = gpu.dirty.flags;
1049 if (!flags[Dirty::StencilTest]) {
1088 return; 1050 return;
1089 } 1051 }
1090 maxwell3d.dirty.stencil_test = false; 1052 flags[Dirty::StencilTest] = false;
1091
1092 const auto& regs = maxwell3d.regs;
1093 state.stencil.test_enabled = regs.stencil_enable != 0;
1094 state.MarkDirtyStencilState();
1095 1053
1054 const auto& regs = gpu.regs;
1096 if (!regs.stencil_enable) { 1055 if (!regs.stencil_enable) {
1056 glDisable(GL_STENCIL_TEST);
1097 return; 1057 return;
1098 } 1058 }
1099 1059
1100 state.stencil.front.test_func = MaxwellToGL::ComparisonOp(regs.stencil_front_func_func); 1060 glEnable(GL_STENCIL_TEST);
1101 state.stencil.front.test_ref = regs.stencil_front_func_ref; 1061 glStencilFuncSeparate(GL_FRONT, MaxwellToGL::ComparisonOp(regs.stencil_front_func_func),
1102 state.stencil.front.test_mask = regs.stencil_front_func_mask; 1062 regs.stencil_front_func_ref, regs.stencil_front_func_mask);
1103 state.stencil.front.action_stencil_fail = MaxwellToGL::StencilOp(regs.stencil_front_op_fail); 1063 glStencilOpSeparate(GL_FRONT, MaxwellToGL::StencilOp(regs.stencil_front_op_fail),
1104 state.stencil.front.action_depth_fail = MaxwellToGL::StencilOp(regs.stencil_front_op_zfail); 1064 MaxwellToGL::StencilOp(regs.stencil_front_op_zfail),
1105 state.stencil.front.action_depth_pass = MaxwellToGL::StencilOp(regs.stencil_front_op_zpass); 1065 MaxwellToGL::StencilOp(regs.stencil_front_op_zpass));
1106 state.stencil.front.write_mask = regs.stencil_front_mask; 1066 glStencilMaskSeparate(GL_FRONT, regs.stencil_front_mask);
1067
1107 if (regs.stencil_two_side_enable) { 1068 if (regs.stencil_two_side_enable) {
1108 state.stencil.back.test_func = MaxwellToGL::ComparisonOp(regs.stencil_back_func_func); 1069 glStencilFuncSeparate(GL_BACK, MaxwellToGL::ComparisonOp(regs.stencil_back_func_func),
1109 state.stencil.back.test_ref = regs.stencil_back_func_ref; 1070 regs.stencil_back_func_ref, regs.stencil_back_func_mask);
1110 state.stencil.back.test_mask = regs.stencil_back_func_mask; 1071 glStencilOpSeparate(GL_BACK, MaxwellToGL::StencilOp(regs.stencil_back_op_fail),
1111 state.stencil.back.action_stencil_fail = MaxwellToGL::StencilOp(regs.stencil_back_op_fail); 1072 MaxwellToGL::StencilOp(regs.stencil_back_op_zfail),
1112 state.stencil.back.action_depth_fail = MaxwellToGL::StencilOp(regs.stencil_back_op_zfail); 1073 MaxwellToGL::StencilOp(regs.stencil_back_op_zpass));
1113 state.stencil.back.action_depth_pass = MaxwellToGL::StencilOp(regs.stencil_back_op_zpass); 1074 glStencilMaskSeparate(GL_BACK, regs.stencil_back_mask);
1114 state.stencil.back.write_mask = regs.stencil_back_mask;
1115 } else { 1075 } else {
1116 state.stencil.back.test_func = GL_ALWAYS; 1076 glStencilFuncSeparate(GL_BACK, GL_ALWAYS, 0, 0xFFFFFFFF);
1117 state.stencil.back.test_ref = 0; 1077 glStencilOpSeparate(GL_BACK, GL_KEEP, GL_KEEP, GL_KEEP);
1118 state.stencil.back.test_mask = 0xFFFFFFFF; 1078 glStencilMaskSeparate(GL_BACK, 0xFFFFFFFF);
1119 state.stencil.back.write_mask = 0xFFFFFFFF;
1120 state.stencil.back.action_stencil_fail = GL_KEEP;
1121 state.stencil.back.action_depth_fail = GL_KEEP;
1122 state.stencil.back.action_depth_pass = GL_KEEP;
1123 } 1079 }
1124} 1080}
1125 1081
1126void RasterizerOpenGL::SyncRasterizeEnable(OpenGLState& current_state) { 1082void RasterizerOpenGL::SyncRasterizeEnable() {
1127 const auto& regs = system.GPU().Maxwell3D().regs; 1083 auto& gpu = system.GPU().Maxwell3D();
1128 current_state.rasterizer_discard = regs.rasterize_enable == 0; 1084 auto& flags = gpu.dirty.flags;
1085 if (!flags[Dirty::RasterizeEnable]) {
1086 return;
1087 }
1088 flags[Dirty::RasterizeEnable] = false;
1089
1090 oglEnable(GL_RASTERIZER_DISCARD, gpu.regs.rasterize_enable == 0);
1091}
1092
1093void RasterizerOpenGL::SyncPolygonModes() {
1094 auto& gpu = system.GPU().Maxwell3D();
1095 auto& flags = gpu.dirty.flags;
1096 if (!flags[Dirty::PolygonModes]) {
1097 return;
1098 }
1099 flags[Dirty::PolygonModes] = false;
1100
1101 if (gpu.regs.fill_rectangle) {
1102 if (!GLAD_GL_NV_fill_rectangle) {
1103 LOG_ERROR(Render_OpenGL, "GL_NV_fill_rectangle used and not supported");
1104 glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
1105 return;
1106 }
1107
1108 flags[Dirty::PolygonModeFront] = true;
1109 flags[Dirty::PolygonModeBack] = true;
1110 glPolygonMode(GL_FRONT_AND_BACK, GL_FILL_RECTANGLE_NV);
1111 return;
1112 }
1113
1114 if (gpu.regs.polygon_mode_front == gpu.regs.polygon_mode_back) {
1115 flags[Dirty::PolygonModeFront] = false;
1116 flags[Dirty::PolygonModeBack] = false;
1117 glPolygonMode(GL_FRONT_AND_BACK, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_front));
1118 return;
1119 }
1120
1121 if (flags[Dirty::PolygonModeFront]) {
1122 flags[Dirty::PolygonModeFront] = false;
1123 glPolygonMode(GL_FRONT, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_front));
1124 }
1125
1126 if (flags[Dirty::PolygonModeBack]) {
1127 flags[Dirty::PolygonModeBack] = false;
1128 glPolygonMode(GL_BACK, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_back));
1129 }
1129} 1130}
1130 1131
1131void RasterizerOpenGL::SyncColorMask() { 1132void RasterizerOpenGL::SyncColorMask() {
1132 auto& maxwell3d = system.GPU().Maxwell3D(); 1133 auto& gpu = system.GPU().Maxwell3D();
1133 if (!maxwell3d.dirty.color_mask) { 1134 auto& flags = gpu.dirty.flags;
1135 if (!flags[Dirty::ColorMasks]) {
1134 return; 1136 return;
1135 } 1137 }
1136 const auto& regs = maxwell3d.regs; 1138 flags[Dirty::ColorMasks] = false;
1139
1140 const bool force = flags[Dirty::ColorMaskCommon];
1141 flags[Dirty::ColorMaskCommon] = false;
1142
1143 const auto& regs = gpu.regs;
1144 if (regs.color_mask_common) {
1145 if (!force && !flags[Dirty::ColorMask0]) {
1146 return;
1147 }
1148 flags[Dirty::ColorMask0] = false;
1137 1149
1138 const std::size_t count = 1150 auto& mask = regs.color_mask[0];
1139 regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1; 1151 glColorMask(mask.R != 0, mask.B != 0, mask.G != 0, mask.A != 0);
1140 for (std::size_t i = 0; i < count; i++) { 1152 return;
1141 const auto& source = regs.color_mask[regs.color_mask_common ? 0 : i];
1142 auto& dest = state.color_mask[i];
1143 dest.red_enabled = (source.R == 0) ? GL_FALSE : GL_TRUE;
1144 dest.green_enabled = (source.G == 0) ? GL_FALSE : GL_TRUE;
1145 dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE;
1146 dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE;
1147 } 1153 }
1148 1154
1149 state.MarkDirtyColorMask(); 1155 // Path without color_mask_common set
1150 maxwell3d.dirty.color_mask = false; 1156 for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) {
1157 if (!force && !flags[Dirty::ColorMask0 + i]) {
1158 continue;
1159 }
1160 flags[Dirty::ColorMask0 + i] = false;
1161
1162 const auto& mask = regs.color_mask[i];
1163 glColorMaski(static_cast<GLuint>(i), mask.R != 0, mask.G != 0, mask.B != 0, mask.A != 0);
1164 }
1151} 1165}
1152 1166
1153void RasterizerOpenGL::SyncMultiSampleState() { 1167void RasterizerOpenGL::SyncMultiSampleState() {
1168 auto& gpu = system.GPU().Maxwell3D();
1169 auto& flags = gpu.dirty.flags;
1170 if (!flags[Dirty::MultisampleControl]) {
1171 return;
1172 }
1173 flags[Dirty::MultisampleControl] = false;
1174
1154 const auto& regs = system.GPU().Maxwell3D().regs; 1175 const auto& regs = system.GPU().Maxwell3D().regs;
1155 state.multisample_control.alpha_to_coverage = regs.multisample_control.alpha_to_coverage != 0; 1176 oglEnable(GL_SAMPLE_ALPHA_TO_COVERAGE, regs.multisample_control.alpha_to_coverage);
1156 state.multisample_control.alpha_to_one = regs.multisample_control.alpha_to_one != 0; 1177 oglEnable(GL_SAMPLE_ALPHA_TO_ONE, regs.multisample_control.alpha_to_one);
1157} 1178}
1158 1179
1159void RasterizerOpenGL::SyncFragmentColorClampState() { 1180void RasterizerOpenGL::SyncFragmentColorClampState() {
1160 const auto& regs = system.GPU().Maxwell3D().regs; 1181 auto& gpu = system.GPU().Maxwell3D();
1161 state.fragment_color_clamp.enabled = regs.frag_color_clamp != 0; 1182 auto& flags = gpu.dirty.flags;
1183 if (!flags[Dirty::FragmentClampColor]) {
1184 return;
1185 }
1186 flags[Dirty::FragmentClampColor] = false;
1187
1188 glClampColor(GL_CLAMP_FRAGMENT_COLOR, gpu.regs.frag_color_clamp ? GL_TRUE : GL_FALSE);
1162} 1189}
1163 1190
1164void RasterizerOpenGL::SyncBlendState() { 1191void RasterizerOpenGL::SyncBlendState() {
1165 auto& maxwell3d = system.GPU().Maxwell3D(); 1192 auto& gpu = system.GPU().Maxwell3D();
1166 if (!maxwell3d.dirty.blend_state) { 1193 auto& flags = gpu.dirty.flags;
1194 const auto& regs = gpu.regs;
1195
1196 if (flags[Dirty::BlendColor]) {
1197 flags[Dirty::BlendColor] = false;
1198 glBlendColor(regs.blend_color.r, regs.blend_color.g, regs.blend_color.b,
1199 regs.blend_color.a);
1200 }
1201
1202 // TODO(Rodrigo): Revisit blending, there are several registers we are not reading
1203
1204 if (!flags[Dirty::BlendStates]) {
1167 return; 1205 return;
1168 } 1206 }
1169 const auto& regs = maxwell3d.regs; 1207 flags[Dirty::BlendStates] = false;
1170 1208
1171 state.blend_color.red = regs.blend_color.r; 1209 if (!regs.independent_blend_enable) {
1172 state.blend_color.green = regs.blend_color.g; 1210 if (!regs.blend.enable[0]) {
1173 state.blend_color.blue = regs.blend_color.b; 1211 glDisable(GL_BLEND);
1174 state.blend_color.alpha = regs.blend_color.a; 1212 return;
1175
1176 state.independant_blend.enabled = regs.independent_blend_enable;
1177 if (!state.independant_blend.enabled) {
1178 auto& blend = state.blend[0];
1179 const auto& src = regs.blend;
1180 blend.enabled = src.enable[0] != 0;
1181 if (blend.enabled) {
1182 blend.rgb_equation = MaxwellToGL::BlendEquation(src.equation_rgb);
1183 blend.src_rgb_func = MaxwellToGL::BlendFunc(src.factor_source_rgb);
1184 blend.dst_rgb_func = MaxwellToGL::BlendFunc(src.factor_dest_rgb);
1185 blend.a_equation = MaxwellToGL::BlendEquation(src.equation_a);
1186 blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a);
1187 blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a);
1188 }
1189 for (std::size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
1190 state.blend[i].enabled = false;
1191 } 1213 }
1192 maxwell3d.dirty.blend_state = false; 1214 glEnable(GL_BLEND);
1193 state.MarkDirtyBlendState(); 1215 glBlendFuncSeparate(MaxwellToGL::BlendFunc(regs.blend.factor_source_rgb),
1216 MaxwellToGL::BlendFunc(regs.blend.factor_dest_rgb),
1217 MaxwellToGL::BlendFunc(regs.blend.factor_source_a),
1218 MaxwellToGL::BlendFunc(regs.blend.factor_dest_a));
1219 glBlendEquationSeparate(MaxwellToGL::BlendEquation(regs.blend.equation_rgb),
1220 MaxwellToGL::BlendEquation(regs.blend.equation_a));
1194 return; 1221 return;
1195 } 1222 }
1196 1223
1197 for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { 1224 const bool force = flags[Dirty::BlendIndependentEnabled];
1198 auto& blend = state.blend[i]; 1225 flags[Dirty::BlendIndependentEnabled] = false;
1199 const auto& src = regs.independent_blend[i]; 1226
1200 blend.enabled = regs.blend.enable[i] != 0; 1227 for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) {
1201 if (!blend.enabled) 1228 if (!force && !flags[Dirty::BlendState0 + i]) {
1202 continue; 1229 continue;
1203 blend.rgb_equation = MaxwellToGL::BlendEquation(src.equation_rgb); 1230 }
1204 blend.src_rgb_func = MaxwellToGL::BlendFunc(src.factor_source_rgb); 1231 flags[Dirty::BlendState0 + i] = false;
1205 blend.dst_rgb_func = MaxwellToGL::BlendFunc(src.factor_dest_rgb); 1232
1206 blend.a_equation = MaxwellToGL::BlendEquation(src.equation_a); 1233 if (!regs.blend.enable[i]) {
1207 blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a); 1234 glDisablei(GL_BLEND, static_cast<GLuint>(i));
1208 blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a); 1235 continue;
1209 } 1236 }
1237 glEnablei(GL_BLEND, static_cast<GLuint>(i));
1210 1238
1211 state.MarkDirtyBlendState(); 1239 const auto& src = regs.independent_blend[i];
1212 maxwell3d.dirty.blend_state = false; 1240 glBlendFuncSeparatei(static_cast<GLuint>(i), MaxwellToGL::BlendFunc(src.factor_source_rgb),
1241 MaxwellToGL::BlendFunc(src.factor_dest_rgb),
1242 MaxwellToGL::BlendFunc(src.factor_source_a),
1243 MaxwellToGL::BlendFunc(src.factor_dest_a));
1244 glBlendEquationSeparatei(static_cast<GLuint>(i),
1245 MaxwellToGL::BlendEquation(src.equation_rgb),
1246 MaxwellToGL::BlendEquation(src.equation_a));
1247 }
1213} 1248}
1214 1249
1215void RasterizerOpenGL::SyncLogicOpState() { 1250void RasterizerOpenGL::SyncLogicOpState() {
1216 const auto& regs = system.GPU().Maxwell3D().regs; 1251 auto& gpu = system.GPU().Maxwell3D();
1252 auto& flags = gpu.dirty.flags;
1253 if (!flags[Dirty::LogicOp]) {
1254 return;
1255 }
1256 flags[Dirty::LogicOp] = false;
1217 1257
1218 state.logic_op.enabled = regs.logic_op.enable != 0; 1258 const auto& regs = gpu.regs;
1259 if (regs.logic_op.enable) {
1260 glEnable(GL_COLOR_LOGIC_OP);
1261 glLogicOp(MaxwellToGL::LogicOp(regs.logic_op.operation));
1262 } else {
1263 glDisable(GL_COLOR_LOGIC_OP);
1264 }
1265}
1219 1266
1220 if (!state.logic_op.enabled) 1267void RasterizerOpenGL::SyncScissorTest() {
1268 auto& gpu = system.GPU().Maxwell3D();
1269 auto& flags = gpu.dirty.flags;
1270 if (!flags[Dirty::Scissors]) {
1221 return; 1271 return;
1272 }
1273 flags[Dirty::Scissors] = false;
1222 1274
1223 ASSERT_MSG(regs.blend.enable[0] == 0, 1275 const auto& regs = gpu.regs;
1224 "Blending and logic op can't be enabled at the same time."); 1276 for (std::size_t index = 0; index < Maxwell::NumViewports; ++index) {
1225 1277 if (!flags[Dirty::Scissor0 + index]) {
1226 state.logic_op.operation = MaxwellToGL::LogicOp(regs.logic_op.operation); 1278 continue;
1227} 1279 }
1280 flags[Dirty::Scissor0 + index] = false;
1228 1281
1229void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) { 1282 const auto& src = regs.scissor_test[index];
1230 const auto& regs = system.GPU().Maxwell3D().regs; 1283 if (src.enable) {
1231 const bool geometry_shaders_enabled = 1284 glEnablei(GL_SCISSOR_TEST, static_cast<GLuint>(index));
1232 regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry)); 1285 glScissorIndexed(static_cast<GLuint>(index), src.min_x, src.min_y,
1233 const std::size_t viewport_count = 1286 src.max_x - src.min_x, src.max_y - src.min_y);
1234 geometry_shaders_enabled ? Tegra::Engines::Maxwell3D::Regs::NumViewports : 1; 1287 } else {
1235 for (std::size_t i = 0; i < viewport_count; i++) { 1288 glDisablei(GL_SCISSOR_TEST, static_cast<GLuint>(index));
1236 const auto& src = regs.scissor_test[i];
1237 auto& dst = current_state.viewports[i].scissor;
1238 dst.enabled = (src.enable != 0);
1239 if (dst.enabled == 0) {
1240 return;
1241 } 1289 }
1242 const u32 width = src.max_x - src.min_x;
1243 const u32 height = src.max_y - src.min_y;
1244 dst.x = src.min_x;
1245 dst.y = src.min_y;
1246 dst.width = width;
1247 dst.height = height;
1248 } 1290 }
1249} 1291}
1250 1292
1251void RasterizerOpenGL::SyncTransformFeedback() {
1252 const auto& regs = system.GPU().Maxwell3D().regs;
1253 UNIMPLEMENTED_IF_MSG(regs.tfb_enabled != 0, "Transform feedbacks are not implemented");
1254}
1255
1256void RasterizerOpenGL::SyncPointState() { 1293void RasterizerOpenGL::SyncPointState() {
1257 const auto& regs = system.GPU().Maxwell3D().regs; 1294 auto& gpu = system.GPU().Maxwell3D();
1295 auto& flags = gpu.dirty.flags;
1296 if (!flags[Dirty::PointSize]) {
1297 return;
1298 }
1299 flags[Dirty::PointSize] = false;
1300
1301 oglEnable(GL_POINT_SPRITE, gpu.regs.point_sprite_enable);
1302
1303 if (gpu.regs.vp_point_size.enable) {
1304 // By definition of GL_POINT_SIZE, it only matters if GL_PROGRAM_POINT_SIZE is disabled.
1305 glEnable(GL_PROGRAM_POINT_SIZE);
1306 return;
1307 }
1308
1258 // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid 1309 // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
1259 // in OpenGL). 1310 // in OpenGL).
1260 state.point.program_control = regs.vp_point_size.enable != 0; 1311 glPointSize(std::max(1.0f, gpu.regs.point_size));
1261 state.point.sprite = regs.point_sprite_enable != 0; 1312 glDisable(GL_PROGRAM_POINT_SIZE);
1262 state.point.size = std::max(1.0f, regs.point_size);
1263} 1313}
1264 1314
1265void RasterizerOpenGL::SyncPolygonOffset() { 1315void RasterizerOpenGL::SyncPolygonOffset() {
1266 auto& maxwell3d = system.GPU().Maxwell3D(); 1316 auto& gpu = system.GPU().Maxwell3D();
1267 if (!maxwell3d.dirty.polygon_offset) { 1317 auto& flags = gpu.dirty.flags;
1318 if (!flags[Dirty::PolygonOffset]) {
1268 return; 1319 return;
1269 } 1320 }
1270 const auto& regs = maxwell3d.regs; 1321 flags[Dirty::PolygonOffset] = false;
1271
1272 state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0;
1273 state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0;
1274 state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0;
1275 1322
1276 // Hardware divides polygon offset units by two 1323 const auto& regs = gpu.regs;
1277 state.polygon_offset.units = regs.polygon_offset_units / 2.0f; 1324 oglEnable(GL_POLYGON_OFFSET_FILL, regs.polygon_offset_fill_enable);
1278 state.polygon_offset.factor = regs.polygon_offset_factor; 1325 oglEnable(GL_POLYGON_OFFSET_LINE, regs.polygon_offset_line_enable);
1279 state.polygon_offset.clamp = regs.polygon_offset_clamp; 1326 oglEnable(GL_POLYGON_OFFSET_POINT, regs.polygon_offset_point_enable);
1280 1327
1281 state.MarkDirtyPolygonOffset(); 1328 if (regs.polygon_offset_fill_enable || regs.polygon_offset_line_enable ||
1282 maxwell3d.dirty.polygon_offset = false; 1329 regs.polygon_offset_point_enable) {
1330 // Hardware divides polygon offset units by two
1331 glPolygonOffsetClamp(regs.polygon_offset_factor, regs.polygon_offset_units / 2.0f,
1332 regs.polygon_offset_clamp);
1333 }
1283} 1334}
1284 1335
1285void RasterizerOpenGL::SyncAlphaTest() { 1336void RasterizerOpenGL::SyncAlphaTest() {
1337 auto& gpu = system.GPU().Maxwell3D();
1338 auto& flags = gpu.dirty.flags;
1339 if (!flags[Dirty::AlphaTest]) {
1340 return;
1341 }
1342 flags[Dirty::AlphaTest] = false;
1343
1344 const auto& regs = gpu.regs;
1345 if (regs.alpha_test_enabled && regs.rt_control.count > 1) {
1346 LOG_WARNING(Render_OpenGL, "Alpha testing with more than one render target is not tested");
1347 }
1348
1349 if (regs.alpha_test_enabled) {
1350 glEnable(GL_ALPHA_TEST);
1351 glAlphaFunc(MaxwellToGL::ComparisonOp(regs.alpha_test_func), regs.alpha_test_ref);
1352 } else {
1353 glDisable(GL_ALPHA_TEST);
1354 }
1355}
1356
1357void RasterizerOpenGL::SyncFramebufferSRGB() {
1358 auto& gpu = system.GPU().Maxwell3D();
1359 auto& flags = gpu.dirty.flags;
1360 if (!flags[Dirty::FramebufferSRGB]) {
1361 return;
1362 }
1363 flags[Dirty::FramebufferSRGB] = false;
1364
1365 oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb);
1366}
1367
1368void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
1286 const auto& regs = system.GPU().Maxwell3D().regs; 1369 const auto& regs = system.GPU().Maxwell3D().regs;
1287 UNIMPLEMENTED_IF_MSG(regs.alpha_test_enabled != 0 && regs.rt_control.count > 1, 1370 if (regs.tfb_enabled == 0) {
1288 "Alpha Testing is enabled with more than one rendertarget"); 1371 return;
1372 }
1373
1374 UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
1375 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
1376 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
1289 1377
1290 state.alpha_test.enabled = regs.alpha_test_enabled; 1378 for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
1291 if (!state.alpha_test.enabled) { 1379 const auto& binding = regs.tfb_bindings[index];
1380 if (!binding.buffer_enable) {
1381 if (enabled_transform_feedback_buffers[index]) {
1382 glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), 0, 0,
1383 0);
1384 }
1385 enabled_transform_feedback_buffers[index] = false;
1386 continue;
1387 }
1388 enabled_transform_feedback_buffers[index] = true;
1389
1390 auto& tfb_buffer = transform_feedback_buffers[index];
1391 tfb_buffer.Create();
1392
1393 const GLuint handle = tfb_buffer.handle;
1394 const std::size_t size = binding.buffer_size;
1395 glNamedBufferData(handle, static_cast<GLsizeiptr>(size), nullptr, GL_STREAM_COPY);
1396 glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), handle, 0,
1397 static_cast<GLsizeiptr>(size));
1398 }
1399
1400 glBeginTransformFeedback(GL_POINTS);
1401}
1402
1403void RasterizerOpenGL::EndTransformFeedback() {
1404 const auto& regs = system.GPU().Maxwell3D().regs;
1405 if (regs.tfb_enabled == 0) {
1292 return; 1406 return;
1293 } 1407 }
1294 state.alpha_test.func = MaxwellToGL::ComparisonOp(regs.alpha_test_func); 1408
1295 state.alpha_test.ref = regs.alpha_test_ref; 1409 glEndTransformFeedback();
1410
1411 for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
1412 const auto& binding = regs.tfb_bindings[index];
1413 if (!binding.buffer_enable) {
1414 continue;
1415 }
1416 UNIMPLEMENTED_IF(binding.buffer_offset != 0);
1417
1418 const GLuint handle = transform_feedback_buffers[index].handle;
1419 const GPUVAddr gpu_addr = binding.Address();
1420 const std::size_t size = binding.buffer_size;
1421 const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
1422 glCopyNamedBufferSubData(handle, *dest_buffer, 0, offset, static_cast<GLsizeiptr>(size));
1423 }
1296} 1424}
1297 1425
1298} // namespace OpenGL 1426} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 68abe9a21..2d3be2437 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -30,7 +30,7 @@
30#include "video_core/renderer_opengl/gl_shader_cache.h" 30#include "video_core/renderer_opengl/gl_shader_cache.h"
31#include "video_core/renderer_opengl/gl_shader_decompiler.h" 31#include "video_core/renderer_opengl/gl_shader_decompiler.h"
32#include "video_core/renderer_opengl/gl_shader_manager.h" 32#include "video_core/renderer_opengl/gl_shader_manager.h"
33#include "video_core/renderer_opengl/gl_state.h" 33#include "video_core/renderer_opengl/gl_state_tracker.h"
34#include "video_core/renderer_opengl/gl_texture_cache.h" 34#include "video_core/renderer_opengl/gl_texture_cache.h"
35#include "video_core/renderer_opengl/utils.h" 35#include "video_core/renderer_opengl/utils.h"
36#include "video_core/textures/texture.h" 36#include "video_core/textures/texture.h"
@@ -55,7 +55,8 @@ struct DrawParameters;
55class RasterizerOpenGL : public VideoCore::RasterizerAccelerated { 55class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
56public: 56public:
57 explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, 57 explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
58 ScreenInfo& info); 58 ScreenInfo& info, GLShader::ProgramManager& program_manager,
59 StateTracker& state_tracker);
59 ~RasterizerOpenGL() override; 60 ~RasterizerOpenGL() override;
60 61
61 void Draw(bool is_indexed, bool is_instanced) override; 62 void Draw(bool is_indexed, bool is_instanced) override;
@@ -76,6 +77,7 @@ public:
76 u32 pixel_stride) override; 77 u32 pixel_stride) override;
77 void LoadDiskResources(const std::atomic_bool& stop_loading, 78 void LoadDiskResources(const std::atomic_bool& stop_loading,
78 const VideoCore::DiskResourceLoadCallback& callback) override; 79 const VideoCore::DiskResourceLoadCallback& callback) override;
80 void SetupDirtyFlags() override;
79 81
80 /// Returns true when there are commands queued to the OpenGL server. 82 /// Returns true when there are commands queued to the OpenGL server.
81 bool AnyCommandQueued() const { 83 bool AnyCommandQueued() const {
@@ -86,8 +88,7 @@ private:
86 /// Configures the color and depth framebuffer states. 88 /// Configures the color and depth framebuffer states.
87 void ConfigureFramebuffers(); 89 void ConfigureFramebuffers();
88 90
89 void ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb, 91 void ConfigureClearFramebuffer(bool using_color_fb, bool using_depth_fb, bool using_stencil_fb);
90 bool using_depth_fb, bool using_stencil_fb);
91 92
92 /// Configures the current constbuffers to use for the draw command. 93 /// Configures the current constbuffers to use for the draw command.
93 void SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader); 94 void SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader);
@@ -97,7 +98,7 @@ private:
97 98
98 /// Configures a constant buffer. 99 /// Configures a constant buffer.
99 void SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, 100 void SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
100 const GLShader::ConstBufferEntry& entry); 101 const ConstBufferEntry& entry);
101 102
102 /// Configures the current global memory entries to use for the draw command. 103 /// Configures the current global memory entries to use for the draw command.
103 void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader); 104 void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader);
@@ -106,7 +107,7 @@ private:
106 void SetupComputeGlobalMemory(const Shader& kernel); 107 void SetupComputeGlobalMemory(const Shader& kernel);
107 108
108 /// Configures a constant buffer. 109 /// Configures a constant buffer.
109 void SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr, 110 void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
110 std::size_t size); 111 std::size_t size);
111 112
112 /// Configures the current textures to use for the draw command. 113 /// Configures the current textures to use for the draw command.
@@ -117,7 +118,7 @@ private:
117 118
118 /// Configures a texture. 119 /// Configures a texture.
119 void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, 120 void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
120 const GLShader::SamplerEntry& entry); 121 const SamplerEntry& entry);
121 122
122 /// Configures images in a graphics shader. 123 /// Configures images in a graphics shader.
123 void SetupDrawImages(std::size_t stage_index, const Shader& shader); 124 void SetupDrawImages(std::size_t stage_index, const Shader& shader);
@@ -126,15 +127,16 @@ private:
126 void SetupComputeImages(const Shader& shader); 127 void SetupComputeImages(const Shader& shader);
127 128
128 /// Configures an image. 129 /// Configures an image.
129 void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, 130 void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
130 const GLShader::ImageEntry& entry);
131 131
132 /// Syncs the viewport and depth range to match the guest state 132 /// Syncs the viewport and depth range to match the guest state
133 void SyncViewport(OpenGLState& current_state); 133 void SyncViewport();
134
135 /// Syncs the depth clamp state
136 void SyncDepthClamp();
134 137
135 /// Syncs the clip enabled status to match the guest state 138 /// Syncs the clip enabled status to match the guest state
136 void SyncClipEnabled( 139 void SyncClipEnabled(u32 clip_mask);
137 const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& clip_mask);
138 140
139 /// Syncs the clip coefficients to match the guest state 141 /// Syncs the clip coefficients to match the guest state
140 void SyncClipCoef(); 142 void SyncClipCoef();
@@ -164,16 +166,16 @@ private:
164 void SyncMultiSampleState(); 166 void SyncMultiSampleState();
165 167
166 /// Syncs the scissor test state to match the guest state 168 /// Syncs the scissor test state to match the guest state
167 void SyncScissorTest(OpenGLState& current_state); 169 void SyncScissorTest();
168
169 /// Syncs the transform feedback state to match the guest state
170 void SyncTransformFeedback();
171 170
172 /// Syncs the point state to match the guest state 171 /// Syncs the point state to match the guest state
173 void SyncPointState(); 172 void SyncPointState();
174 173
175 /// Syncs the rasterizer enable state to match the guest state 174 /// Syncs the rasterizer enable state to match the guest state
176 void SyncRasterizeEnable(OpenGLState& current_state); 175 void SyncRasterizeEnable();
176
177 /// Syncs polygon modes to match the guest state
178 void SyncPolygonModes();
177 179
178 /// Syncs Color Mask 180 /// Syncs Color Mask
179 void SyncColorMask(); 181 void SyncColorMask();
@@ -184,6 +186,15 @@ private:
184 /// Syncs the alpha test state to match the guest state 186 /// Syncs the alpha test state to match the guest state
185 void SyncAlphaTest(); 187 void SyncAlphaTest();
186 188
189 /// Syncs the framebuffer sRGB state to match the guest state
190 void SyncFramebufferSRGB();
191
192 /// Begin a transform feedback
193 void BeginTransformFeedback(GLenum primitive_mode);
194
195 /// End a transform feedback
196 void EndTransformFeedback();
197
187 /// Check for extension that are not strictly required but are needed for correct emulation 198 /// Check for extension that are not strictly required but are needed for correct emulation
188 void CheckExtensions(); 199 void CheckExtensions();
189 200
@@ -191,18 +202,17 @@ private:
191 202
192 std::size_t CalculateIndexBufferSize() const; 203 std::size_t CalculateIndexBufferSize() const;
193 204
194 /// Updates and returns a vertex array object representing current vertex format 205 /// Updates the current vertex format
195 GLuint SetupVertexFormat(); 206 void SetupVertexFormat();
196 207
197 void SetupVertexBuffer(GLuint vao); 208 void SetupVertexBuffer();
198 void SetupVertexInstances(GLuint vao); 209 void SetupVertexInstances();
199 210
200 GLintptr SetupIndexBuffer(); 211 GLintptr SetupIndexBuffer();
201 212
202 void SetupShaders(GLenum primitive_mode); 213 void SetupShaders(GLenum primitive_mode);
203 214
204 const Device device; 215 const Device device;
205 OpenGLState state;
206 216
207 TextureCacheOpenGL texture_cache; 217 TextureCacheOpenGL texture_cache;
208 ShaderCacheOpenGL shader_cache; 218 ShaderCacheOpenGL shader_cache;
@@ -212,22 +222,25 @@ private:
212 222
213 Core::System& system; 223 Core::System& system;
214 ScreenInfo& screen_info; 224 ScreenInfo& screen_info;
215 225 GLShader::ProgramManager& program_manager;
216 std::unique_ptr<GLShader::ProgramManager> shader_program_manager; 226 StateTracker& state_tracker;
217 std::map<std::array<Tegra::Engines::Maxwell3D::Regs::VertexAttribute,
218 Tegra::Engines::Maxwell3D::Regs::NumVertexAttributes>,
219 OGLVertexArray>
220 vertex_array_cache;
221 227
222 static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; 228 static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
223 OGLBufferCache buffer_cache; 229 OGLBufferCache buffer_cache;
224 230
225 VertexArrayPushBuffer vertex_array_pushbuffer; 231 VertexArrayPushBuffer vertex_array_pushbuffer{state_tracker};
226 BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; 232 BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
227 BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; 233 BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};
228 234
235 std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
236 transform_feedback_buffers;
237 std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
238 enabled_transform_feedback_buffers;
239
229 /// Number of commands queued to the OpenGL driver. Reseted on flush. 240 /// Number of commands queued to the OpenGL driver. Reseted on flush.
230 std::size_t num_queued_commands = 0; 241 std::size_t num_queued_commands = 0;
242
243 u32 last_clip_distance_mask = 0;
231}; 244};
232 245
233} // namespace OpenGL 246} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index f0ddfb276..97803d480 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -8,13 +8,29 @@
8#include "common/microprofile.h" 8#include "common/microprofile.h"
9#include "video_core/renderer_opengl/gl_resource_manager.h" 9#include "video_core/renderer_opengl/gl_resource_manager.h"
10#include "video_core/renderer_opengl/gl_shader_util.h" 10#include "video_core/renderer_opengl/gl_shader_util.h"
11#include "video_core/renderer_opengl/gl_state.h"
12 11
13MICROPROFILE_DEFINE(OpenGL_ResourceCreation, "OpenGL", "Resource Creation", MP_RGB(128, 128, 192)); 12MICROPROFILE_DEFINE(OpenGL_ResourceCreation, "OpenGL", "Resource Creation", MP_RGB(128, 128, 192));
14MICROPROFILE_DEFINE(OpenGL_ResourceDeletion, "OpenGL", "Resource Deletion", MP_RGB(128, 128, 192)); 13MICROPROFILE_DEFINE(OpenGL_ResourceDeletion, "OpenGL", "Resource Deletion", MP_RGB(128, 128, 192));
15 14
16namespace OpenGL { 15namespace OpenGL {
17 16
17void OGLRenderbuffer::Create() {
18 if (handle != 0)
19 return;
20
21 MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
22 glCreateRenderbuffers(1, &handle);
23}
24
25void OGLRenderbuffer::Release() {
26 if (handle == 0)
27 return;
28
29 MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
30 glDeleteRenderbuffers(1, &handle);
31 handle = 0;
32}
33
18void OGLTexture::Create(GLenum target) { 34void OGLTexture::Create(GLenum target) {
19 if (handle != 0) 35 if (handle != 0)
20 return; 36 return;
@@ -29,7 +45,6 @@ void OGLTexture::Release() {
29 45
30 MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); 46 MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
31 glDeleteTextures(1, &handle); 47 glDeleteTextures(1, &handle);
32 OpenGLState::GetCurState().UnbindTexture(handle).Apply();
33 handle = 0; 48 handle = 0;
34} 49}
35 50
@@ -47,7 +62,6 @@ void OGLTextureView::Release() {
47 62
48 MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); 63 MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
49 glDeleteTextures(1, &handle); 64 glDeleteTextures(1, &handle);
50 OpenGLState::GetCurState().UnbindTexture(handle).Apply();
51 handle = 0; 65 handle = 0;
52} 66}
53 67
@@ -65,7 +79,6 @@ void OGLSampler::Release() {
65 79
66 MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); 80 MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
67 glDeleteSamplers(1, &handle); 81 glDeleteSamplers(1, &handle);
68 OpenGLState::GetCurState().ResetSampler(handle).Apply();
69 handle = 0; 82 handle = 0;
70} 83}
71 84
@@ -109,7 +122,6 @@ void OGLProgram::Release() {
109 122
110 MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); 123 MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
111 glDeleteProgram(handle); 124 glDeleteProgram(handle);
112 OpenGLState::GetCurState().ResetProgram(handle).Apply();
113 handle = 0; 125 handle = 0;
114} 126}
115 127
@@ -127,7 +139,6 @@ void OGLPipeline::Release() {
127 139
128 MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); 140 MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
129 glDeleteProgramPipelines(1, &handle); 141 glDeleteProgramPipelines(1, &handle);
130 OpenGLState::GetCurState().ResetPipeline(handle).Apply();
131 handle = 0; 142 handle = 0;
132} 143}
133 144
@@ -171,24 +182,6 @@ void OGLSync::Release() {
171 handle = 0; 182 handle = 0;
172} 183}
173 184
174void OGLVertexArray::Create() {
175 if (handle != 0)
176 return;
177
178 MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
179 glCreateVertexArrays(1, &handle);
180}
181
182void OGLVertexArray::Release() {
183 if (handle == 0)
184 return;
185
186 MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
187 glDeleteVertexArrays(1, &handle);
188 OpenGLState::GetCurState().ResetVertexArray(handle).Apply();
189 handle = 0;
190}
191
192void OGLFramebuffer::Create() { 185void OGLFramebuffer::Create() {
193 if (handle != 0) 186 if (handle != 0)
194 return; 187 return;
@@ -203,7 +196,6 @@ void OGLFramebuffer::Release() {
203 196
204 MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); 197 MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
205 glDeleteFramebuffers(1, &handle); 198 glDeleteFramebuffers(1, &handle);
206 OpenGLState::GetCurState().ResetFramebuffer(handle).Apply();
207 handle = 0; 199 handle = 0;
208} 200}
209 201
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index 514d1d165..de93f4212 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -11,6 +11,31 @@
11 11
12namespace OpenGL { 12namespace OpenGL {
13 13
14class OGLRenderbuffer : private NonCopyable {
15public:
16 OGLRenderbuffer() = default;
17
18 OGLRenderbuffer(OGLRenderbuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
19
20 ~OGLRenderbuffer() {
21 Release();
22 }
23
24 OGLRenderbuffer& operator=(OGLRenderbuffer&& o) noexcept {
25 Release();
26 handle = std::exchange(o.handle, 0);
27 return *this;
28 }
29
30 /// Creates a new internal OpenGL resource and stores the handle
31 void Create();
32
33 /// Deletes the internal OpenGL resource
34 void Release();
35
36 GLuint handle = 0;
37};
38
14class OGLTexture : private NonCopyable { 39class OGLTexture : private NonCopyable {
15public: 40public:
16 OGLTexture() = default; 41 OGLTexture() = default;
@@ -216,31 +241,6 @@ public:
216 GLsync handle = 0; 241 GLsync handle = 0;
217}; 242};
218 243
219class OGLVertexArray : private NonCopyable {
220public:
221 OGLVertexArray() = default;
222
223 OGLVertexArray(OGLVertexArray&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
224
225 ~OGLVertexArray() {
226 Release();
227 }
228
229 OGLVertexArray& operator=(OGLVertexArray&& o) noexcept {
230 Release();
231 handle = std::exchange(o.handle, 0);
232 return *this;
233 }
234
235 /// Creates a new internal OpenGL resource and stores the handle
236 void Create();
237
238 /// Deletes the internal OpenGL resource
239 void Release();
240
241 GLuint handle = 0;
242};
243
244class OGLFramebuffer : private NonCopyable { 244class OGLFramebuffer : private NonCopyable {
245public: 245public:
246 OGLFramebuffer() = default; 246 OGLFramebuffer() = default;
diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.cpp b/src/video_core/renderer_opengl/gl_sampler_cache.cpp
index 3ded5ecea..5c174879a 100644
--- a/src/video_core/renderer_opengl/gl_sampler_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_sampler_cache.cpp
@@ -38,7 +38,7 @@ OGLSampler SamplerCacheOpenGL::CreateSampler(const Tegra::Texture::TSCEntry& tsc
38 glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, tsc.GetMaxAnisotropy()); 38 glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, tsc.GetMaxAnisotropy());
39 } else if (GLAD_GL_EXT_texture_filter_anisotropic) { 39 } else if (GLAD_GL_EXT_texture_filter_anisotropic) {
40 glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, tsc.GetMaxAnisotropy()); 40 glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, tsc.GetMaxAnisotropy());
41 } else if (tsc.GetMaxAnisotropy() != 1) { 41 } else {
42 LOG_WARNING(Render_OpenGL, "Anisotropy not supported by host GPU driver"); 42 LOG_WARNING(Render_OpenGL, "Anisotropy not supported by host GPU driver");
43 } 43 }
44 44
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 489eb143c..e3d31c3eb 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -2,12 +2,16 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <atomic>
6#include <functional>
5#include <mutex> 7#include <mutex>
6#include <optional> 8#include <optional>
7#include <string> 9#include <string>
8#include <thread> 10#include <thread>
9#include <unordered_set> 11#include <unordered_set>
12
10#include <boost/functional/hash.hpp> 13#include <boost/functional/hash.hpp>
14
11#include "common/alignment.h" 15#include "common/alignment.h"
12#include "common/assert.h" 16#include "common/assert.h"
13#include "common/logging/log.h" 17#include "common/logging/log.h"
@@ -22,14 +26,16 @@
22#include "video_core/renderer_opengl/gl_shader_cache.h" 26#include "video_core/renderer_opengl/gl_shader_cache.h"
23#include "video_core/renderer_opengl/gl_shader_decompiler.h" 27#include "video_core/renderer_opengl/gl_shader_decompiler.h"
24#include "video_core/renderer_opengl/gl_shader_disk_cache.h" 28#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
29#include "video_core/renderer_opengl/gl_state_tracker.h"
25#include "video_core/renderer_opengl/utils.h" 30#include "video_core/renderer_opengl/utils.h"
31#include "video_core/shader/registry.h"
26#include "video_core/shader/shader_ir.h" 32#include "video_core/shader/shader_ir.h"
27 33
28namespace OpenGL { 34namespace OpenGL {
29 35
30using Tegra::Engines::ShaderType; 36using Tegra::Engines::ShaderType;
31using VideoCommon::Shader::ConstBufferLocker;
32using VideoCommon::Shader::ProgramCode; 37using VideoCommon::Shader::ProgramCode;
38using VideoCommon::Shader::Registry;
33using VideoCommon::Shader::ShaderIR; 39using VideoCommon::Shader::ShaderIR;
34 40
35namespace { 41namespace {
@@ -55,7 +61,7 @@ constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
55} 61}
56 62
57/// Calculates the size of a program stream 63/// Calculates the size of a program stream
58std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { 64std::size_t CalculateProgramSize(const ProgramCode& program) {
59 constexpr std::size_t start_offset = 10; 65 constexpr std::size_t start_offset = 10;
60 // This is the encoded version of BRA that jumps to itself. All Nvidia 66 // This is the encoded version of BRA that jumps to itself. All Nvidia
61 // shaders end with one. 67 // shaders end with one.
@@ -108,32 +114,9 @@ constexpr GLenum GetGLShaderType(ShaderType shader_type) {
108 } 114 }
109} 115}
110 116
111/// Describes primitive behavior on geometry shaders
112constexpr std::pair<const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) {
113 switch (primitive_mode) {
114 case GL_POINTS:
115 return {"points", 1};
116 case GL_LINES:
117 case GL_LINE_STRIP:
118 return {"lines", 2};
119 case GL_LINES_ADJACENCY:
120 case GL_LINE_STRIP_ADJACENCY:
121 return {"lines_adjacency", 4};
122 case GL_TRIANGLES:
123 case GL_TRIANGLE_STRIP:
124 case GL_TRIANGLE_FAN:
125 return {"triangles", 3};
126 case GL_TRIANGLES_ADJACENCY:
127 case GL_TRIANGLE_STRIP_ADJACENCY:
128 return {"triangles_adjacency", 6};
129 default:
130 return {"points", 1};
131 }
132}
133
134/// Hashes one (or two) program streams 117/// Hashes one (or two) program streams
135u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& code, 118u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& code,
136 const ProgramCode& code_b) { 119 const ProgramCode& code_b = {}) {
137 u64 unique_identifier = boost::hash_value(code); 120 u64 unique_identifier = boost::hash_value(code);
138 if (is_a) { 121 if (is_a) {
139 // VertexA programs include two programs 122 // VertexA programs include two programs
@@ -142,24 +125,6 @@ u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& co
142 return unique_identifier; 125 return unique_identifier;
143} 126}
144 127
145/// Creates an unspecialized program from code streams
146std::string GenerateGLSL(const Device& device, ShaderType shader_type, const ShaderIR& ir,
147 const std::optional<ShaderIR>& ir_b) {
148 switch (shader_type) {
149 case ShaderType::Vertex:
150 return GLShader::GenerateVertexShader(device, ir, ir_b ? &*ir_b : nullptr);
151 case ShaderType::Geometry:
152 return GLShader::GenerateGeometryShader(device, ir);
153 case ShaderType::Fragment:
154 return GLShader::GenerateFragmentShader(device, ir);
155 case ShaderType::Compute:
156 return GLShader::GenerateComputeShader(device, ir);
157 default:
158 UNIMPLEMENTED_MSG("Unimplemented shader_type={}", static_cast<u32>(shader_type));
159 return {};
160 }
161}
162
163constexpr const char* GetShaderTypeName(ShaderType shader_type) { 128constexpr const char* GetShaderTypeName(ShaderType shader_type) {
164 switch (shader_type) { 129 switch (shader_type) {
165 case ShaderType::Vertex: 130 case ShaderType::Vertex:
@@ -195,102 +160,38 @@ constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) {
195 return {}; 160 return {};
196} 161}
197 162
198std::string GetShaderId(u64 unique_identifier, ShaderType shader_type) { 163std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) {
199 return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier); 164 return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier);
200} 165}
201 166
202Tegra::Engines::ConstBufferEngineInterface& GetConstBufferEngineInterface(Core::System& system, 167std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
203 ShaderType shader_type) { 168 const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size};
204 if (shader_type == ShaderType::Compute) { 169 const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer,
205 return system.GPU().KeplerCompute(); 170 entry.graphics_info, entry.compute_info};
206 } else { 171 const auto registry = std::make_shared<Registry>(entry.type, info);
207 return system.GPU().Maxwell3D(); 172 for (const auto& [address, value] : entry.keys) {
208 } 173 const auto [buffer, offset] = address;
209} 174 registry->InsertKey(buffer, offset, value);
210
211std::unique_ptr<ConstBufferLocker> MakeLocker(Core::System& system, ShaderType shader_type) {
212 return std::make_unique<ConstBufferLocker>(shader_type,
213 GetConstBufferEngineInterface(system, shader_type));
214}
215
216void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) {
217 locker.SetBoundBuffer(usage.bound_buffer);
218 for (const auto& key : usage.keys) {
219 const auto [buffer, offset] = key.first;
220 locker.InsertKey(buffer, offset, key.second);
221 } 175 }
222 for (const auto& [offset, sampler] : usage.bound_samplers) { 176 for (const auto& [offset, sampler] : entry.bound_samplers) {
223 locker.InsertBoundSampler(offset, sampler); 177 registry->InsertBoundSampler(offset, sampler);
224 } 178 }
225 for (const auto& [key, sampler] : usage.bindless_samplers) { 179 for (const auto& [key, sampler] : entry.bindless_samplers) {
226 const auto [buffer, offset] = key; 180 const auto [buffer, offset] = key;
227 locker.InsertBindlessSampler(buffer, offset, sampler); 181 registry->InsertBindlessSampler(buffer, offset, sampler);
228 } 182 }
183 return registry;
229} 184}
230 185
231CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderType shader_type, 186std::shared_ptr<OGLProgram> BuildShader(const Device& device, ShaderType shader_type,
232 const ProgramCode& code, const ProgramCode& code_b, 187 u64 unique_identifier, const ShaderIR& ir,
233 ConstBufferLocker& locker, const ProgramVariant& variant, 188 const Registry& registry, bool hint_retrievable = false) {
234 bool hint_retrievable = false) { 189 const std::string shader_id = MakeShaderID(unique_identifier, shader_type);
235 LOG_INFO(Render_OpenGL, "called. {}", GetShaderId(unique_identifier, shader_type)); 190 LOG_INFO(Render_OpenGL, "{}", shader_id);
236
237 const bool is_compute = shader_type == ShaderType::Compute;
238 const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
239 const ShaderIR ir(code, main_offset, COMPILER_SETTINGS, locker);
240 std::optional<ShaderIR> ir_b;
241 if (!code_b.empty()) {
242 ir_b.emplace(code_b, main_offset, COMPILER_SETTINGS, locker);
243 }
244
245 std::string source = fmt::format(R"(// {}
246#version 430 core
247#extension GL_ARB_separate_shader_objects : enable
248)",
249 GetShaderId(unique_identifier, shader_type));
250 if (device.HasShaderBallot()) {
251 source += "#extension GL_ARB_shader_ballot : require\n";
252 }
253 if (device.HasVertexViewportLayer()) {
254 source += "#extension GL_ARB_shader_viewport_layer_array : require\n";
255 }
256 if (device.HasImageLoadFormatted()) {
257 source += "#extension GL_EXT_shader_image_load_formatted : require\n";
258 }
259 if (device.HasWarpIntrinsics()) {
260 source += "#extension GL_NV_gpu_shader5 : require\n"
261 "#extension GL_NV_shader_thread_group : require\n"
262 "#extension GL_NV_shader_thread_shuffle : require\n";
263 }
264 // This pragma stops Nvidia's driver from over optimizing math (probably using fp16 operations)
265 // on places where we don't want to.
266 // Thanks to Ryujinx for finding this workaround.
267 source += "#pragma optionNV(fastmath off)\n";
268
269 if (shader_type == ShaderType::Geometry) {
270 const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(variant.primitive_mode);
271 source += fmt::format("#define MAX_VERTEX_INPUT {}\n", max_vertices);
272 source += fmt::format("layout ({}) in;\n", glsl_topology);
273 }
274 if (shader_type == ShaderType::Compute) {
275 if (variant.local_memory_size > 0) {
276 source += fmt::format("#define LOCAL_MEMORY_SIZE {}\n",
277 Common::AlignUp(variant.local_memory_size, 4) / 4);
278 }
279 source +=
280 fmt::format("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;\n",
281 variant.block_x, variant.block_y, variant.block_z);
282
283 if (variant.shared_memory_size > 0) {
284 // shared_memory_size is described in number of words
285 source += fmt::format("shared uint smem[{}];\n", variant.shared_memory_size);
286 }
287 }
288
289 source += '\n';
290 source += GenerateGLSL(device, shader_type, ir, ir_b);
291 191
192 const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id);
292 OGLShader shader; 193 OGLShader shader;
293 shader.Create(source.c_str(), GetGLShaderType(shader_type)); 194 shader.Create(glsl.c_str(), GetGLShaderType(shader_type));
294 195
295 auto program = std::make_shared<OGLProgram>(); 196 auto program = std::make_shared<OGLProgram>();
296 program->Create(true, hint_retrievable, shader.handle); 197 program->Create(true, hint_retrievable, shader.handle);
@@ -298,7 +199,7 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderTyp
298} 199}
299 200
300std::unordered_set<GLenum> GetSupportedFormats() { 201std::unordered_set<GLenum> GetSupportedFormats() {
301 GLint num_formats{}; 202 GLint num_formats;
302 glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); 203 glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
303 204
304 std::vector<GLint> formats(num_formats); 205 std::vector<GLint> formats(num_formats);
@@ -313,115 +214,82 @@ std::unordered_set<GLenum> GetSupportedFormats() {
313 214
314} // Anonymous namespace 215} // Anonymous namespace
315 216
316CachedShader::CachedShader(const ShaderParameters& params, ShaderType shader_type, 217CachedShader::CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes,
317 GLShader::ShaderEntries entries, ProgramCode code, ProgramCode code_b) 218 std::shared_ptr<VideoCommon::Shader::Registry> registry,
318 : RasterizerCacheObject{params.host_ptr}, system{params.system}, 219 ShaderEntries entries, std::shared_ptr<OGLProgram> program)
319 disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr}, 220 : RasterizerCacheObject{host_ptr}, registry{std::move(registry)}, entries{std::move(entries)},
320 unique_identifier{params.unique_identifier}, shader_type{shader_type}, 221 cpu_addr{cpu_addr}, size_in_bytes{size_in_bytes}, program{std::move(program)} {}
321 entries{std::move(entries)}, code{std::move(code)}, code_b{std::move(code_b)} { 222
322 if (!params.precompiled_variants) { 223CachedShader::~CachedShader() = default;
323 return; 224
324 } 225GLuint CachedShader::GetHandle() const {
325 for (const auto& pair : *params.precompiled_variants) { 226 DEBUG_ASSERT(registry->IsConsistent());
326 auto locker = MakeLocker(system, shader_type); 227 return program->handle;
327 const auto& usage = pair->first;
328 FillLocker(*locker, usage);
329
330 std::unique_ptr<LockerVariant>* locker_variant = nullptr;
331 const auto it =
332 std::find_if(locker_variants.begin(), locker_variants.end(), [&](const auto& variant) {
333 return variant->locker->HasEqualKeys(*locker);
334 });
335 if (it == locker_variants.end()) {
336 locker_variant = &locker_variants.emplace_back();
337 *locker_variant = std::make_unique<LockerVariant>();
338 locker_variant->get()->locker = std::move(locker);
339 } else {
340 locker_variant = &*it;
341 }
342 locker_variant->get()->programs.emplace(usage.variant, pair->second);
343 }
344} 228}
345 229
346Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, 230Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
347 Maxwell::ShaderProgram program_type, ProgramCode code, 231 Maxwell::ShaderProgram program_type, ProgramCode code,
348 ProgramCode code_b) { 232 ProgramCode code_b) {
349 const auto shader_type = GetShaderType(program_type); 233 const auto shader_type = GetShaderType(program_type);
350 params.disk_cache.SaveRaw( 234 const std::size_t size_in_bytes = code.size() * sizeof(u64);
351 ShaderDiskCacheRaw(params.unique_identifier, shader_type, code, code_b));
352 235
353 ConstBufferLocker locker(shader_type, params.system.GPU().Maxwell3D()); 236 auto registry = std::make_shared<Registry>(shader_type, params.system.GPU().Maxwell3D());
354 const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, locker); 237 const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
355 // TODO(Rodrigo): Handle VertexA shaders 238 // TODO(Rodrigo): Handle VertexA shaders
356 // std::optional<ShaderIR> ir_b; 239 // std::optional<ShaderIR> ir_b;
357 // if (!code_b.empty()) { 240 // if (!code_b.empty()) {
358 // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); 241 // ir_b.emplace(code_b, STAGE_MAIN_OFFSET);
359 // } 242 // }
360 return std::shared_ptr<CachedShader>(new CachedShader( 243 auto program = BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry);
361 params, shader_type, GLShader::GetEntries(ir), std::move(code), std::move(code_b))); 244
245 ShaderDiskCacheEntry entry;
246 entry.type = shader_type;
247 entry.code = std::move(code);
248 entry.code_b = std::move(code_b);
249 entry.unique_identifier = params.unique_identifier;
250 entry.bound_buffer = registry->GetBoundBuffer();
251 entry.graphics_info = registry->GetGraphicsInfo();
252 entry.keys = registry->GetKeys();
253 entry.bound_samplers = registry->GetBoundSamplers();
254 entry.bindless_samplers = registry->GetBindlessSamplers();
255 params.disk_cache.SaveEntry(std::move(entry));
256
257 return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr,
258 size_in_bytes, std::move(registry),
259 MakeEntries(ir), std::move(program)));
362} 260}
363 261
364Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { 262Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
365 params.disk_cache.SaveRaw( 263 const std::size_t size_in_bytes = code.size() * sizeof(u64);
366 ShaderDiskCacheRaw(params.unique_identifier, ShaderType::Compute, code)); 264
367 265 auto& engine = params.system.GPU().KeplerCompute();
368 ConstBufferLocker locker(Tegra::Engines::ShaderType::Compute, 266 auto registry = std::make_shared<Registry>(ShaderType::Compute, engine);
369 params.system.GPU().KeplerCompute()); 267 const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
370 const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, locker); 268 const u64 uid = params.unique_identifier;
371 return std::shared_ptr<CachedShader>(new CachedShader( 269 auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry);
372 params, ShaderType::Compute, GLShader::GetEntries(ir), std::move(code), {})); 270
271 ShaderDiskCacheEntry entry;
272 entry.type = ShaderType::Compute;
273 entry.code = std::move(code);
274 entry.unique_identifier = uid;
275 entry.bound_buffer = registry->GetBoundBuffer();
276 entry.compute_info = registry->GetComputeInfo();
277 entry.keys = registry->GetKeys();
278 entry.bound_samplers = registry->GetBoundSamplers();
279 entry.bindless_samplers = registry->GetBindlessSamplers();
280 params.disk_cache.SaveEntry(std::move(entry));
281
282 return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr,
283 size_in_bytes, std::move(registry),
284 MakeEntries(ir), std::move(program)));
373} 285}
374 286
375Shader CachedShader::CreateFromCache(const ShaderParameters& params, 287Shader CachedShader::CreateFromCache(const ShaderParameters& params,
376 const UnspecializedShader& unspecialized) { 288 const PrecompiledShader& precompiled_shader,
377 return std::shared_ptr<CachedShader>(new CachedShader(params, unspecialized.type, 289 std::size_t size_in_bytes) {
378 unspecialized.entries, unspecialized.code, 290 return std::shared_ptr<CachedShader>(new CachedShader(
379 unspecialized.code_b)); 291 params.host_ptr, params.cpu_addr, size_in_bytes, precompiled_shader.registry,
380} 292 precompiled_shader.entries, precompiled_shader.program));
381
382GLuint CachedShader::GetHandle(const ProgramVariant& variant) {
383 EnsureValidLockerVariant();
384
385 const auto [entry, is_cache_miss] = curr_locker_variant->programs.try_emplace(variant);
386 auto& program = entry->second;
387 if (!is_cache_miss) {
388 return program->handle;
389 }
390
391 program = BuildShader(device, unique_identifier, shader_type, code, code_b,
392 *curr_locker_variant->locker, variant);
393 disk_cache.SaveUsage(GetUsage(variant, *curr_locker_variant->locker));
394
395 LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
396 return program->handle;
397}
398
399bool CachedShader::EnsureValidLockerVariant() {
400 const auto previous_variant = curr_locker_variant;
401 if (curr_locker_variant && !curr_locker_variant->locker->IsConsistent()) {
402 curr_locker_variant = nullptr;
403 }
404 if (!curr_locker_variant) {
405 for (auto& variant : locker_variants) {
406 if (variant->locker->IsConsistent()) {
407 curr_locker_variant = variant.get();
408 }
409 }
410 }
411 if (!curr_locker_variant) {
412 auto& new_variant = locker_variants.emplace_back();
413 new_variant = std::make_unique<LockerVariant>();
414 new_variant->locker = MakeLocker(system, shader_type);
415 curr_locker_variant = new_variant.get();
416 }
417 return previous_variant == curr_locker_variant;
418}
419
420ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant,
421 const ConstBufferLocker& locker) const {
422 return ShaderDiskCacheUsage{unique_identifier, variant,
423 locker.GetBoundBuffer(), locker.GetKeys(),
424 locker.GetBoundSamplers(), locker.GetBindlessSamplers()};
425} 293}
426 294
427ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, 295ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
@@ -431,16 +299,12 @@ ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System&
431 299
432void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, 300void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
433 const VideoCore::DiskResourceLoadCallback& callback) { 301 const VideoCore::DiskResourceLoadCallback& callback) {
434 const auto transferable = disk_cache.LoadTransferable(); 302 const std::optional transferable = disk_cache.LoadTransferable();
435 if (!transferable) { 303 if (!transferable) {
436 return; 304 return;
437 } 305 }
438 const auto [raws, shader_usages] = *transferable;
439 if (!GenerateUnspecializedShaders(stop_loading, callback, raws) || stop_loading) {
440 return;
441 }
442 306
443 const auto dumps = disk_cache.LoadPrecompiled(); 307 const std::vector gl_cache = disk_cache.LoadPrecompiled();
444 const auto supported_formats = GetSupportedFormats(); 308 const auto supported_formats = GetSupportedFormats();
445 309
446 // Track if precompiled cache was altered during loading to know if we have to 310 // Track if precompiled cache was altered during loading to know if we have to
@@ -449,77 +313,82 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
449 313
450 // Inform the frontend about shader build initialization 314 // Inform the frontend about shader build initialization
451 if (callback) { 315 if (callback) {
452 callback(VideoCore::LoadCallbackStage::Build, 0, shader_usages.size()); 316 callback(VideoCore::LoadCallbackStage::Build, 0, transferable->size());
453 } 317 }
454 318
455 std::mutex mutex; 319 std::mutex mutex;
456 std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex 320 std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex
457 std::atomic_bool compilation_failed = false; 321 std::atomic_bool gl_cache_failed = false;
458 322
459 const auto Worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin, 323 const auto find_precompiled = [&gl_cache](u64 id) {
460 std::size_t end, const std::vector<ShaderDiskCacheUsage>& shader_usages, 324 return std::find_if(gl_cache.begin(), gl_cache.end(),
461 const ShaderDumpsMap& dumps) { 325 [id](const auto& entry) { return entry.unique_identifier == id; });
326 };
327
328 const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin,
329 std::size_t end) {
462 context->MakeCurrent(); 330 context->MakeCurrent();
463 SCOPE_EXIT({ return context->DoneCurrent(); }); 331 SCOPE_EXIT({ return context->DoneCurrent(); });
464 332
465 for (std::size_t i = begin; i < end; ++i) { 333 for (std::size_t i = begin; i < end; ++i) {
466 if (stop_loading || compilation_failed) { 334 if (stop_loading) {
467 return; 335 return;
468 } 336 }
469 const auto& usage{shader_usages[i]}; 337 const auto& entry = (*transferable)[i];
470 const auto& unspecialized{unspecialized_shaders.at(usage.unique_identifier)}; 338 const u64 uid = entry.unique_identifier;
471 const auto dump{dumps.find(usage)}; 339 const auto it = find_precompiled(uid);
472 340 const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr;
473 CachedProgram shader; 341
474 if (dump != dumps.end()) { 342 const bool is_compute = entry.type == ShaderType::Compute;
475 // If the shader is dumped, attempt to load it with 343 const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
476 shader = GeneratePrecompiledProgram(dump->second, supported_formats); 344 auto registry = MakeRegistry(entry);
477 if (!shader) { 345 const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry);
478 compilation_failed = true; 346
479 return; 347 std::shared_ptr<OGLProgram> program;
348 if (precompiled_entry) {
349 // If the shader is precompiled, attempt to load it with
350 program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats);
351 if (!program) {
352 gl_cache_failed = true;
480 } 353 }
481 } 354 }
482 if (!shader) { 355 if (!program) {
483 auto locker{MakeLocker(system, unspecialized.type)}; 356 // Otherwise compile it from GLSL
484 FillLocker(*locker, usage); 357 program = BuildShader(device, entry.type, uid, ir, *registry, true);
485
486 shader = BuildShader(device, usage.unique_identifier, unspecialized.type,
487 unspecialized.code, unspecialized.code_b, *locker,
488 usage.variant, true);
489 } 358 }
490 359
360 PrecompiledShader shader;
361 shader.program = std::move(program);
362 shader.registry = std::move(registry);
363 shader.entries = MakeEntries(ir);
364
491 std::scoped_lock lock{mutex}; 365 std::scoped_lock lock{mutex};
492 if (callback) { 366 if (callback) {
493 callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, 367 callback(VideoCore::LoadCallbackStage::Build, ++built_shaders,
494 shader_usages.size()); 368 transferable->size());
495 } 369 }
496 370 runtime_cache.emplace(entry.unique_identifier, std::move(shader));
497 precompiled_programs.emplace(usage, std::move(shader));
498
499 // TODO(Rodrigo): Is there a better way to do this?
500 precompiled_variants[usage.unique_identifier].push_back(
501 precompiled_programs.find(usage));
502 } 371 }
503 }; 372 };
504 373
505 const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1ULL)}; 374 const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1ULL)};
506 const std::size_t bucket_size{shader_usages.size() / num_workers}; 375 const std::size_t bucket_size{transferable->size() / num_workers};
507 std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers); 376 std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers);
508 std::vector<std::thread> threads(num_workers); 377 std::vector<std::thread> threads(num_workers);
509 for (std::size_t i = 0; i < num_workers; ++i) { 378 for (std::size_t i = 0; i < num_workers; ++i) {
510 const bool is_last_worker = i + 1 == num_workers; 379 const bool is_last_worker = i + 1 == num_workers;
511 const std::size_t start{bucket_size * i}; 380 const std::size_t start{bucket_size * i};
512 const std::size_t end{is_last_worker ? shader_usages.size() : start + bucket_size}; 381 const std::size_t end{is_last_worker ? transferable->size() : start + bucket_size};
513 382
514 // On some platforms the shared context has to be created from the GUI thread 383 // On some platforms the shared context has to be created from the GUI thread
515 contexts[i] = emu_window.CreateSharedContext(); 384 contexts[i] = emu_window.CreateSharedContext();
516 threads[i] = std::thread(Worker, contexts[i].get(), start, end, shader_usages, dumps); 385 threads[i] = std::thread(worker, contexts[i].get(), start, end);
517 } 386 }
518 for (auto& thread : threads) { 387 for (auto& thread : threads) {
519 thread.join(); 388 thread.join();
520 } 389 }
521 390
522 if (compilation_failed) { 391 if (gl_cache_failed) {
523 // Invalidate the precompiled cache if a shader dumped shader was rejected 392 // Invalidate the precompiled cache if a shader dumped shader was rejected
524 disk_cache.InvalidatePrecompiled(); 393 disk_cache.InvalidatePrecompiled();
525 precompiled_cache_altered = true; 394 precompiled_cache_altered = true;
@@ -532,11 +401,12 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
532 // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw 401 // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw
533 // before precompiling them 402 // before precompiling them
534 403
535 for (std::size_t i = 0; i < shader_usages.size(); ++i) { 404 for (std::size_t i = 0; i < transferable->size(); ++i) {
536 const auto& usage{shader_usages[i]}; 405 const u64 id = (*transferable)[i].unique_identifier;
537 if (dumps.find(usage) == dumps.end()) { 406 const auto it = find_precompiled(id);
538 const auto& program{precompiled_programs.at(usage)}; 407 if (it == gl_cache.end()) {
539 disk_cache.SaveDump(usage, program->handle); 408 const GLuint program = runtime_cache.at(id).program->handle;
409 disk_cache.SavePrecompiled(id, program);
540 precompiled_cache_altered = true; 410 precompiled_cache_altered = true;
541 } 411 }
542 } 412 }
@@ -546,84 +416,33 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
546 } 416 }
547} 417}
548 418
549const PrecompiledVariants* ShaderCacheOpenGL::GetPrecompiledVariants(u64 unique_identifier) const { 419std::shared_ptr<OGLProgram> ShaderCacheOpenGL::GeneratePrecompiledProgram(
550 const auto it = precompiled_variants.find(unique_identifier); 420 const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
551 return it == precompiled_variants.end() ? nullptr : &it->second; 421 const std::unordered_set<GLenum>& supported_formats) {
552} 422 if (supported_formats.find(precompiled_entry.binary_format) == supported_formats.end()) {
553 423 LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing");
554CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram(
555 const ShaderDiskCacheDump& dump, const std::unordered_set<GLenum>& supported_formats) {
556 if (supported_formats.find(dump.binary_format) == supported_formats.end()) {
557 LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing");
558 return {}; 424 return {};
559 } 425 }
560 426
561 CachedProgram shader = std::make_shared<OGLProgram>(); 427 auto program = std::make_shared<OGLProgram>();
562 shader->handle = glCreateProgram(); 428 program->handle = glCreateProgram();
563 glProgramParameteri(shader->handle, GL_PROGRAM_SEPARABLE, GL_TRUE); 429 glProgramParameteri(program->handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
564 glProgramBinary(shader->handle, dump.binary_format, dump.binary.data(), 430 glProgramBinary(program->handle, precompiled_entry.binary_format,
565 static_cast<GLsizei>(dump.binary.size())); 431 precompiled_entry.binary.data(),
566 432 static_cast<GLsizei>(precompiled_entry.binary.size()));
567 GLint link_status{}; 433
568 glGetProgramiv(shader->handle, GL_LINK_STATUS, &link_status); 434 GLint link_status;
435 glGetProgramiv(program->handle, GL_LINK_STATUS, &link_status);
569 if (link_status == GL_FALSE) { 436 if (link_status == GL_FALSE) {
570 LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver - removing"); 437 LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing");
571 return {}; 438 return {};
572 } 439 }
573 440
574 return shader; 441 return program;
575}
576
577bool ShaderCacheOpenGL::GenerateUnspecializedShaders(
578 const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback,
579 const std::vector<ShaderDiskCacheRaw>& raws) {
580 if (callback) {
581 callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size());
582 }
583
584 for (std::size_t i = 0; i < raws.size(); ++i) {
585 if (stop_loading) {
586 return false;
587 }
588 const auto& raw{raws[i]};
589 const u64 unique_identifier{raw.GetUniqueIdentifier()};
590 const u64 calculated_hash{
591 GetUniqueIdentifier(raw.GetType(), raw.HasProgramA(), raw.GetCode(), raw.GetCodeB())};
592 if (unique_identifier != calculated_hash) {
593 LOG_ERROR(Render_OpenGL,
594 "Invalid hash in entry={:016x} (obtained hash={:016x}) - "
595 "removing shader cache",
596 raw.GetUniqueIdentifier(), calculated_hash);
597 disk_cache.InvalidateTransferable();
598 return false;
599 }
600
601 const u32 main_offset =
602 raw.GetType() == ShaderType::Compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
603 ConstBufferLocker locker(raw.GetType());
604 const ShaderIR ir(raw.GetCode(), main_offset, COMPILER_SETTINGS, locker);
605 // TODO(Rodrigo): Handle VertexA shaders
606 // std::optional<ShaderIR> ir_b;
607 // if (raw.HasProgramA()) {
608 // ir_b.emplace(raw.GetProgramCodeB(), main_offset);
609 // }
610
611 UnspecializedShader unspecialized;
612 unspecialized.entries = GLShader::GetEntries(ir);
613 unspecialized.type = raw.GetType();
614 unspecialized.code = raw.GetCode();
615 unspecialized.code_b = raw.GetCodeB();
616 unspecialized_shaders.emplace(raw.GetUniqueIdentifier(), unspecialized);
617
618 if (callback) {
619 callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size());
620 }
621 }
622 return true;
623} 442}
624 443
625Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { 444Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
626 if (!system.GPU().Maxwell3D().dirty.shaders) { 445 if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) {
627 return last_shaders[static_cast<std::size_t>(program)]; 446 return last_shaders[static_cast<std::size_t>(program)];
628 } 447 }
629 448
@@ -647,17 +466,17 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
647 466
648 const auto unique_identifier = GetUniqueIdentifier( 467 const auto unique_identifier = GetUniqueIdentifier(
649 GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); 468 GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
650 const auto precompiled_variants = GetPrecompiledVariants(unique_identifier);
651 const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)}; 469 const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)};
652 const ShaderParameters params{system, disk_cache, precompiled_variants, device, 470 const ShaderParameters params{system, disk_cache, device,
653 cpu_addr, host_ptr, unique_identifier}; 471 cpu_addr, host_ptr, unique_identifier};
654 472
655 const auto found = unspecialized_shaders.find(unique_identifier); 473 const auto found = runtime_cache.find(unique_identifier);
656 if (found == unspecialized_shaders.end()) { 474 if (found == runtime_cache.end()) {
657 shader = CachedShader::CreateStageFromMemory(params, program, std::move(code), 475 shader = CachedShader::CreateStageFromMemory(params, program, std::move(code),
658 std::move(code_b)); 476 std::move(code_b));
659 } else { 477 } else {
660 shader = CachedShader::CreateFromCache(params, found->second); 478 const std::size_t size_in_bytes = code.size() * sizeof(u64);
479 shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
661 } 480 }
662 Register(shader); 481 Register(shader);
663 482
@@ -672,19 +491,19 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
672 return kernel; 491 return kernel;
673 } 492 }
674 493
675 // No kernel found - create a new one 494 // No kernel found, create a new one
676 auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; 495 auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
677 const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code, {})}; 496 const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
678 const auto precompiled_variants = GetPrecompiledVariants(unique_identifier);
679 const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; 497 const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)};
680 const ShaderParameters params{system, disk_cache, precompiled_variants, device, 498 const ShaderParameters params{system, disk_cache, device,
681 cpu_addr, host_ptr, unique_identifier}; 499 cpu_addr, host_ptr, unique_identifier};
682 500
683 const auto found = unspecialized_shaders.find(unique_identifier); 501 const auto found = runtime_cache.find(unique_identifier);
684 if (found == unspecialized_shaders.end()) { 502 if (found == runtime_cache.end()) {
685 kernel = CachedShader::CreateKernelFromMemory(params, std::move(code)); 503 kernel = CachedShader::CreateKernelFromMemory(params, std::move(code));
686 } else { 504 } else {
687 kernel = CachedShader::CreateFromCache(params, found->second); 505 const std::size_t size_in_bytes = code.size() * sizeof(u64);
506 kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
688 } 507 }
689 508
690 Register(kernel); 509 Register(kernel);
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 7b1470db3..4935019fc 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -22,7 +22,7 @@
22#include "video_core/renderer_opengl/gl_resource_manager.h" 22#include "video_core/renderer_opengl/gl_resource_manager.h"
23#include "video_core/renderer_opengl/gl_shader_decompiler.h" 23#include "video_core/renderer_opengl/gl_shader_decompiler.h"
24#include "video_core/renderer_opengl/gl_shader_disk_cache.h" 24#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
25#include "video_core/shader/const_buffer_locker.h" 25#include "video_core/shader/registry.h"
26#include "video_core/shader/shader_ir.h" 26#include "video_core/shader/shader_ir.h"
27 27
28namespace Core { 28namespace Core {
@@ -41,22 +41,17 @@ class RasterizerOpenGL;
41struct UnspecializedShader; 41struct UnspecializedShader;
42 42
43using Shader = std::shared_ptr<CachedShader>; 43using Shader = std::shared_ptr<CachedShader>;
44using CachedProgram = std::shared_ptr<OGLProgram>;
45using Maxwell = Tegra::Engines::Maxwell3D::Regs; 44using Maxwell = Tegra::Engines::Maxwell3D::Regs;
46using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>; 45
47using PrecompiledVariants = std::vector<PrecompiledPrograms::iterator>; 46struct PrecompiledShader {
48 47 std::shared_ptr<OGLProgram> program;
49struct UnspecializedShader { 48 std::shared_ptr<VideoCommon::Shader::Registry> registry;
50 GLShader::ShaderEntries entries; 49 ShaderEntries entries;
51 Tegra::Engines::ShaderType type;
52 ProgramCode code;
53 ProgramCode code_b;
54}; 50};
55 51
56struct ShaderParameters { 52struct ShaderParameters {
57 Core::System& system; 53 Core::System& system;
58 ShaderDiskCacheOpenGL& disk_cache; 54 ShaderDiskCacheOpenGL& disk_cache;
59 const PrecompiledVariants* precompiled_variants;
60 const Device& device; 55 const Device& device;
61 VAddr cpu_addr; 56 VAddr cpu_addr;
62 u8* host_ptr; 57 u8* host_ptr;
@@ -65,61 +60,45 @@ struct ShaderParameters {
65 60
66class CachedShader final : public RasterizerCacheObject { 61class CachedShader final : public RasterizerCacheObject {
67public: 62public:
68 static Shader CreateStageFromMemory(const ShaderParameters& params, 63 ~CachedShader();
69 Maxwell::ShaderProgram program_type,
70 ProgramCode program_code, ProgramCode program_code_b);
71 static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code);
72 64
73 static Shader CreateFromCache(const ShaderParameters& params, 65 /// Gets the GL program handle for the shader
74 const UnspecializedShader& unspecialized); 66 GLuint GetHandle() const;
75 67
68 /// Returns the guest CPU address of the shader
76 VAddr GetCpuAddr() const override { 69 VAddr GetCpuAddr() const override {
77 return cpu_addr; 70 return cpu_addr;
78 } 71 }
79 72
73 /// Returns the size in bytes of the shader
80 std::size_t GetSizeInBytes() const override { 74 std::size_t GetSizeInBytes() const override {
81 return code.size() * sizeof(u64); 75 return size_in_bytes;
82 } 76 }
83 77
84 /// Gets the shader entries for the shader 78 /// Gets the shader entries for the shader
85 const GLShader::ShaderEntries& GetShaderEntries() const { 79 const ShaderEntries& GetEntries() const {
86 return entries; 80 return entries;
87 } 81 }
88 82
89 /// Gets the GL program handle for the shader 83 static Shader CreateStageFromMemory(const ShaderParameters& params,
90 GLuint GetHandle(const ProgramVariant& variant); 84 Maxwell::ShaderProgram program_type,
91 85 ProgramCode program_code, ProgramCode program_code_b);
92private: 86 static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code);
93 struct LockerVariant {
94 std::unique_ptr<VideoCommon::Shader::ConstBufferLocker> locker;
95 std::unordered_map<ProgramVariant, CachedProgram> programs;
96 };
97
98 explicit CachedShader(const ShaderParameters& params, Tegra::Engines::ShaderType shader_type,
99 GLShader::ShaderEntries entries, ProgramCode program_code,
100 ProgramCode program_code_b);
101
102 bool EnsureValidLockerVariant();
103
104 ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant,
105 const VideoCommon::Shader::ConstBufferLocker& locker) const;
106
107 Core::System& system;
108 ShaderDiskCacheOpenGL& disk_cache;
109 const Device& device;
110
111 VAddr cpu_addr{};
112
113 u64 unique_identifier{};
114 Tegra::Engines::ShaderType shader_type{};
115
116 GLShader::ShaderEntries entries;
117 87
118 ProgramCode code; 88 static Shader CreateFromCache(const ShaderParameters& params,
119 ProgramCode code_b; 89 const PrecompiledShader& precompiled_shader,
90 std::size_t size_in_bytes);
120 91
121 LockerVariant* curr_locker_variant = nullptr; 92private:
122 std::vector<std::unique_ptr<LockerVariant>> locker_variants; 93 explicit CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes,
94 std::shared_ptr<VideoCommon::Shader::Registry> registry,
95 ShaderEntries entries, std::shared_ptr<OGLProgram> program);
96
97 std::shared_ptr<VideoCommon::Shader::Registry> registry;
98 ShaderEntries entries;
99 VAddr cpu_addr = 0;
100 std::size_t size_in_bytes = 0;
101 std::shared_ptr<OGLProgram> program;
123}; 102};
124 103
125class ShaderCacheOpenGL final : public RasterizerCache<Shader> { 104class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
@@ -142,25 +121,15 @@ protected:
142 void FlushObjectInner(const Shader& object) override {} 121 void FlushObjectInner(const Shader& object) override {}
143 122
144private: 123private:
145 bool GenerateUnspecializedShaders(const std::atomic_bool& stop_loading, 124 std::shared_ptr<OGLProgram> GeneratePrecompiledProgram(
146 const VideoCore::DiskResourceLoadCallback& callback, 125 const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
147 const std::vector<ShaderDiskCacheRaw>& raws); 126 const std::unordered_set<GLenum>& supported_formats);
148
149 CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump,
150 const std::unordered_set<GLenum>& supported_formats);
151
152 const PrecompiledVariants* GetPrecompiledVariants(u64 unique_identifier) const;
153 127
154 Core::System& system; 128 Core::System& system;
155 Core::Frontend::EmuWindow& emu_window; 129 Core::Frontend::EmuWindow& emu_window;
156 const Device& device; 130 const Device& device;
157
158 ShaderDiskCacheOpenGL disk_cache; 131 ShaderDiskCacheOpenGL disk_cache;
159 132 std::unordered_map<u64, PrecompiledShader> runtime_cache;
160 PrecompiledPrograms precompiled_programs;
161 std::unordered_map<u64, PrecompiledVariants> precompiled_variants;
162
163 std::unordered_map<u64, UnspecializedShader> unspecialized_shaders;
164 133
165 std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; 134 std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
166}; 135};
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 4735000b5..8aa4a7ac9 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -23,8 +23,9 @@
23#include "video_core/shader/ast.h" 23#include "video_core/shader/ast.h"
24#include "video_core/shader/node.h" 24#include "video_core/shader/node.h"
25#include "video_core/shader/shader_ir.h" 25#include "video_core/shader/shader_ir.h"
26#include "video_core/shader/transform_feedback.h"
26 27
27namespace OpenGL::GLShader { 28namespace OpenGL {
28 29
29namespace { 30namespace {
30 31
@@ -36,6 +37,8 @@ using Tegra::Shader::IpaInterpMode;
36using Tegra::Shader::IpaMode; 37using Tegra::Shader::IpaMode;
37using Tegra::Shader::IpaSampleMode; 38using Tegra::Shader::IpaSampleMode;
38using Tegra::Shader::Register; 39using Tegra::Shader::Register;
40using VideoCommon::Shader::BuildTransformFeedback;
41using VideoCommon::Shader::Registry;
39 42
40using namespace std::string_literals; 43using namespace std::string_literals;
41using namespace VideoCommon::Shader; 44using namespace VideoCommon::Shader;
@@ -48,6 +51,11 @@ class ExprDecompiler;
48 51
49enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; 52enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
50 53
54constexpr std::array FLOAT_TYPES{"float", "vec2", "vec3", "vec4"};
55
56constexpr std::string_view INPUT_ATTRIBUTE_NAME = "in_attr";
57constexpr std::string_view OUTPUT_ATTRIBUTE_NAME = "out_attr";
58
51struct TextureOffset {}; 59struct TextureOffset {};
52struct TextureDerivates {}; 60struct TextureDerivates {};
53using TextureArgument = std::pair<Type, Node>; 61using TextureArgument = std::pair<Type, Node>;
@@ -56,6 +64,25 @@ using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>
56constexpr u32 MAX_CONSTBUFFER_ELEMENTS = 64constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
57 static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float)); 65 static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float));
58 66
67constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt
68#define ftou floatBitsToUint
69#define itof intBitsToFloat
70#define utof uintBitsToFloat
71
72bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{
73 bvec2 is_nan1 = isnan(pair1);
74 bvec2 is_nan2 = isnan(pair2);
75 return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y);
76}}
77
78const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
79const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
80
81layout (std140, binding = {}) uniform vs_config {{
82 float y_direction;
83}};
84)";
85
59class ShaderWriter final { 86class ShaderWriter final {
60public: 87public:
61 void AddExpression(std::string_view text) { 88 void AddExpression(std::string_view text) {
@@ -269,12 +296,41 @@ const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) {
269 } 296 }
270} 297}
271 298
299/// Describes primitive behavior on geometry shaders
300std::pair<const char*, u32> GetPrimitiveDescription(Maxwell::PrimitiveTopology topology) {
301 switch (topology) {
302 case Maxwell::PrimitiveTopology::Points:
303 return {"points", 1};
304 case Maxwell::PrimitiveTopology::Lines:
305 case Maxwell::PrimitiveTopology::LineStrip:
306 return {"lines", 2};
307 case Maxwell::PrimitiveTopology::LinesAdjacency:
308 case Maxwell::PrimitiveTopology::LineStripAdjacency:
309 return {"lines_adjacency", 4};
310 case Maxwell::PrimitiveTopology::Triangles:
311 case Maxwell::PrimitiveTopology::TriangleStrip:
312 case Maxwell::PrimitiveTopology::TriangleFan:
313 return {"triangles", 3};
314 case Maxwell::PrimitiveTopology::TrianglesAdjacency:
315 case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
316 return {"triangles_adjacency", 6};
317 default:
318 UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology));
319 return {"points", 1};
320 }
321}
322
272/// Generates code to use for a swizzle operation. 323/// Generates code to use for a swizzle operation.
273constexpr const char* GetSwizzle(u32 element) { 324constexpr const char* GetSwizzle(std::size_t element) {
274 constexpr std::array swizzle = {".x", ".y", ".z", ".w"}; 325 constexpr std::array swizzle = {".x", ".y", ".z", ".w"};
275 return swizzle.at(element); 326 return swizzle.at(element);
276} 327}
277 328
329constexpr const char* GetColorSwizzle(std::size_t element) {
330 constexpr std::array swizzle = {".r", ".g", ".b", ".a"};
331 return swizzle.at(element);
332}
333
278/// Translate topology 334/// Translate topology
279std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { 335std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
280 switch (topology) { 336 switch (topology) {
@@ -310,10 +366,19 @@ constexpr bool IsGenericAttribute(Attribute::Index index) {
310 return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31; 366 return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31;
311} 367}
312 368
369constexpr bool IsLegacyTexCoord(Attribute::Index index) {
370 return static_cast<int>(index) >= static_cast<int>(Attribute::Index::TexCoord_0) &&
371 static_cast<int>(index) <= static_cast<int>(Attribute::Index::TexCoord_7);
372}
373
313constexpr Attribute::Index ToGenericAttribute(u64 value) { 374constexpr Attribute::Index ToGenericAttribute(u64 value) {
314 return static_cast<Attribute::Index>(value + static_cast<u64>(Attribute::Index::Attribute_0)); 375 return static_cast<Attribute::Index>(value + static_cast<u64>(Attribute::Index::Attribute_0));
315} 376}
316 377
378constexpr int GetLegacyTexCoordIndex(Attribute::Index index) {
379 return static_cast<int>(index) - static_cast<int>(Attribute::Index::TexCoord_0);
380}
381
317u32 GetGenericAttributeIndex(Attribute::Index index) { 382u32 GetGenericAttributeIndex(Attribute::Index index) {
318 ASSERT(IsGenericAttribute(index)); 383 ASSERT(IsGenericAttribute(index));
319 return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); 384 return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
@@ -337,15 +402,66 @@ std::string FlowStackTopName(MetaStackClass stack) {
337 return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); 402 return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
338} 403}
339 404
340[[deprecated]] constexpr bool IsVertexShader(ShaderType stage) { 405struct GenericVaryingDescription {
341 return stage == ShaderType::Vertex; 406 std::string name;
342} 407 u8 first_element = 0;
408 bool is_scalar = false;
409};
343 410
344class GLSLDecompiler final { 411class GLSLDecompiler final {
345public: 412public:
346 explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderType stage, 413 explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
347 std::string suffix) 414 ShaderType stage, std::string_view identifier, std::string_view suffix)
348 : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} 415 : device{device}, ir{ir}, registry{registry}, stage{stage},
416 identifier{identifier}, suffix{suffix}, header{ir.GetHeader()} {
417 if (stage != ShaderType::Compute) {
418 transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
419 }
420 }
421
422 void Decompile() {
423 DeclareHeader();
424 DeclareVertex();
425 DeclareGeometry();
426 DeclareFragment();
427 DeclareCompute();
428 DeclareInputAttributes();
429 DeclareOutputAttributes();
430 DeclareImages();
431 DeclareSamplers();
432 DeclareGlobalMemory();
433 DeclareConstantBuffers();
434 DeclareLocalMemory();
435 DeclareRegisters();
436 DeclarePredicates();
437 DeclareInternalFlags();
438 DeclareCustomVariables();
439 DeclarePhysicalAttributeReader();
440
441 code.AddLine("void main() {{");
442 ++code.scope;
443
444 if (stage == ShaderType::Vertex) {
445 code.AddLine("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);");
446 }
447
448 if (ir.IsDecompiled()) {
449 DecompileAST();
450 } else {
451 DecompileBranchMode();
452 }
453
454 --code.scope;
455 code.AddLine("}}");
456 }
457
458 std::string GetResult() {
459 return code.GetResult();
460 }
461
462private:
463 friend class ASTDecompiler;
464 friend class ExprDecompiler;
349 465
350 void DecompileBranchMode() { 466 void DecompileBranchMode() {
351 // VM's program counter 467 // VM's program counter
@@ -387,46 +503,40 @@ public:
387 503
388 void DecompileAST(); 504 void DecompileAST();
389 505
390 void Decompile() { 506 void DeclareHeader() {
391 DeclareVertex(); 507 if (!identifier.empty()) {
392 DeclareGeometry(); 508 code.AddLine("// {}", identifier);
393 DeclareRegisters(); 509 }
394 DeclareCustomVariables(); 510 code.AddLine("#version 440 {}", ir.UsesLegacyVaryings() ? "compatibility" : "core");
395 DeclarePredicates(); 511 code.AddLine("#extension GL_ARB_separate_shader_objects : enable");
396 DeclareLocalMemory(); 512 if (device.HasShaderBallot()) {
397 DeclareInternalFlags(); 513 code.AddLine("#extension GL_ARB_shader_ballot : require");
398 DeclareInputAttributes(); 514 }
399 DeclareOutputAttributes(); 515 if (device.HasVertexViewportLayer()) {
400 DeclareConstantBuffers(); 516 code.AddLine("#extension GL_ARB_shader_viewport_layer_array : require");
401 DeclareGlobalMemory(); 517 }
402 DeclareSamplers(); 518 if (device.HasImageLoadFormatted()) {
403 DeclareImages(); 519 code.AddLine("#extension GL_EXT_shader_image_load_formatted : require");
404 DeclarePhysicalAttributeReader();
405
406 code.AddLine("void execute_{}() {{", suffix);
407 ++code.scope;
408
409 if (ir.IsDecompiled()) {
410 DecompileAST();
411 } else {
412 DecompileBranchMode();
413 } 520 }
521 if (device.HasWarpIntrinsics()) {
522 code.AddLine("#extension GL_NV_gpu_shader5 : require");
523 code.AddLine("#extension GL_NV_shader_thread_group : require");
524 code.AddLine("#extension GL_NV_shader_thread_shuffle : require");
525 }
526 // This pragma stops Nvidia's driver from over optimizing math (probably using fp16
527 // operations) on places where we don't want to.
528 // Thanks to Ryujinx for finding this workaround.
529 code.AddLine("#pragma optionNV(fastmath off)");
414 530
415 --code.scope; 531 code.AddNewLine();
416 code.AddLine("}}");
417 }
418 532
419 std::string GetResult() { 533 code.AddLine(CommonDeclarations, EmulationUniformBlockBinding);
420 return code.GetResult();
421 } 534 }
422 535
423private:
424 friend class ASTDecompiler;
425 friend class ExprDecompiler;
426
427 void DeclareVertex() { 536 void DeclareVertex() {
428 if (!IsVertexShader(stage)) 537 if (stage != ShaderType::Vertex) {
429 return; 538 return;
539 }
430 540
431 DeclareVertexRedeclarations(); 541 DeclareVertexRedeclarations();
432 } 542 }
@@ -436,9 +546,15 @@ private:
436 return; 546 return;
437 } 547 }
438 548
549 const auto& info = registry.GetGraphicsInfo();
550 const auto input_topology = info.primitive_topology;
551 const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(input_topology);
552 max_input_vertices = max_vertices;
553 code.AddLine("layout ({}) in;", glsl_topology);
554
439 const auto topology = GetTopologyName(header.common3.output_topology); 555 const auto topology = GetTopologyName(header.common3.output_topology);
440 const auto max_vertices = header.common4.max_output_vertices.Value(); 556 const auto max_output_vertices = header.common4.max_output_vertices.Value();
441 code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_vertices); 557 code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_output_vertices);
442 code.AddNewLine(); 558 code.AddNewLine();
443 559
444 code.AddLine("in gl_PerVertex {{"); 560 code.AddLine("in gl_PerVertex {{");
@@ -450,11 +566,50 @@ private:
450 DeclareVertexRedeclarations(); 566 DeclareVertexRedeclarations();
451 } 567 }
452 568
569 void DeclareFragment() {
570 if (stage != ShaderType::Fragment) {
571 return;
572 }
573 if (ir.UsesLegacyVaryings()) {
574 code.AddLine("in gl_PerFragment {{");
575 ++code.scope;
576 code.AddLine("vec4 gl_TexCoord[8];");
577 code.AddLine("vec4 gl_Color;");
578 code.AddLine("vec4 gl_SecondaryColor;");
579 --code.scope;
580 code.AddLine("}};");
581 }
582
583 for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
584 code.AddLine("layout (location = {}) out vec4 frag_color{};", rt, rt);
585 }
586 }
587
588 void DeclareCompute() {
589 if (stage != ShaderType::Compute) {
590 return;
591 }
592 const auto& info = registry.GetComputeInfo();
593 if (const u32 size = info.shared_memory_size_in_words; size > 0) {
594 code.AddLine("shared uint smem[{}];", size);
595 code.AddNewLine();
596 }
597 code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;",
598 info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]);
599 code.AddNewLine();
600 }
601
453 void DeclareVertexRedeclarations() { 602 void DeclareVertexRedeclarations() {
454 code.AddLine("out gl_PerVertex {{"); 603 code.AddLine("out gl_PerVertex {{");
455 ++code.scope; 604 ++code.scope;
456 605
457 code.AddLine("vec4 gl_Position;"); 606 auto pos_xfb = GetTransformFeedbackDecoration(Attribute::Index::Position);
607 if (!pos_xfb.empty()) {
608 pos_xfb = fmt::format("layout ({}) ", pos_xfb);
609 }
610 const char* pos_type =
611 FLOAT_TYPES.at(GetNumComponents(Attribute::Index::Position).value_or(4) - 1);
612 code.AddLine("{}{} gl_Position;", pos_xfb, pos_type);
458 613
459 for (const auto attribute : ir.GetOutputAttributes()) { 614 for (const auto attribute : ir.GetOutputAttributes()) {
460 if (attribute == Attribute::Index::ClipDistances0123 || 615 if (attribute == Attribute::Index::ClipDistances0123 ||
@@ -463,14 +618,14 @@ private:
463 break; 618 break;
464 } 619 }
465 } 620 }
466 if (!IsVertexShader(stage) || device.HasVertexViewportLayer()) { 621 if (stage != ShaderType::Vertex || device.HasVertexViewportLayer()) {
467 if (ir.UsesLayer()) { 622 if (ir.UsesLayer()) {
468 code.AddLine("int gl_Layer;"); 623 code.AddLine("int gl_Layer;");
469 } 624 }
470 if (ir.UsesViewportIndex()) { 625 if (ir.UsesViewportIndex()) {
471 code.AddLine("int gl_ViewportIndex;"); 626 code.AddLine("int gl_ViewportIndex;");
472 } 627 }
473 } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && IsVertexShader(stage) && 628 } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && stage == ShaderType::Vertex &&
474 !device.HasVertexViewportLayer()) { 629 !device.HasVertexViewportLayer()) {
475 LOG_ERROR( 630 LOG_ERROR(
476 Render_OpenGL, 631 Render_OpenGL,
@@ -481,12 +636,12 @@ private:
481 code.AddLine("float gl_PointSize;"); 636 code.AddLine("float gl_PointSize;");
482 } 637 }
483 638
484 if (ir.UsesInstanceId()) { 639 if (ir.UsesLegacyVaryings()) {
485 code.AddLine("int gl_InstanceID;"); 640 code.AddLine("vec4 gl_TexCoord[8];");
486 } 641 code.AddLine("vec4 gl_FrontColor;");
487 642 code.AddLine("vec4 gl_FrontSecondaryColor;");
488 if (ir.UsesVertexId()) { 643 code.AddLine("vec4 gl_BackColor;");
489 code.AddLine("int gl_VertexID;"); 644 code.AddLine("vec4 gl_BackSecondaryColor;");
490 } 645 }
491 646
492 --code.scope; 647 --code.scope;
@@ -525,18 +680,16 @@ private:
525 } 680 }
526 681
527 void DeclareLocalMemory() { 682 void DeclareLocalMemory() {
683 u64 local_memory_size = 0;
528 if (stage == ShaderType::Compute) { 684 if (stage == ShaderType::Compute) {
529 code.AddLine("#ifdef LOCAL_MEMORY_SIZE"); 685 local_memory_size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL;
530 code.AddLine("uint {}[LOCAL_MEMORY_SIZE];", GetLocalMemory()); 686 } else {
531 code.AddLine("#endif"); 687 local_memory_size = header.GetLocalMemorySize();
532 return;
533 } 688 }
534
535 const u64 local_memory_size = header.GetLocalMemorySize();
536 if (local_memory_size == 0) { 689 if (local_memory_size == 0) {
537 return; 690 return;
538 } 691 }
539 const auto element_count = Common::AlignUp(local_memory_size, 4) / 4; 692 const u64 element_count = Common::AlignUp(local_memory_size, 4) / 4;
540 code.AddLine("uint {}[{}];", GetLocalMemory(), element_count); 693 code.AddLine("uint {}[{}];", GetLocalMemory(), element_count);
541 code.AddNewLine(); 694 code.AddNewLine();
542 } 695 }
@@ -589,7 +742,7 @@ private:
589 void DeclareInputAttribute(Attribute::Index index, bool skip_unused) { 742 void DeclareInputAttribute(Attribute::Index index, bool skip_unused) {
590 const u32 location{GetGenericAttributeIndex(index)}; 743 const u32 location{GetGenericAttributeIndex(index)};
591 744
592 std::string name{GetInputAttribute(index)}; 745 std::string name{GetGenericInputAttribute(index)};
593 if (stage == ShaderType::Geometry) { 746 if (stage == ShaderType::Geometry) {
594 name = "gs_" + name + "[]"; 747 name = "gs_" + name + "[]";
595 } 748 }
@@ -626,9 +779,59 @@ private:
626 } 779 }
627 } 780 }
628 781
782 std::optional<std::size_t> GetNumComponents(Attribute::Index index, u8 element = 0) const {
783 const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
784 const auto it = transform_feedback.find(location);
785 if (it == transform_feedback.end()) {
786 return {};
787 }
788 return it->second.components;
789 }
790
791 std::string GetTransformFeedbackDecoration(Attribute::Index index, u8 element = 0) const {
792 const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
793 const auto it = transform_feedback.find(location);
794 if (it == transform_feedback.end()) {
795 return {};
796 }
797
798 const VaryingTFB& tfb = it->second;
799 return fmt::format("xfb_buffer = {}, xfb_offset = {}, xfb_stride = {}", tfb.buffer,
800 tfb.offset, tfb.stride);
801 }
802
629 void DeclareOutputAttribute(Attribute::Index index) { 803 void DeclareOutputAttribute(Attribute::Index index) {
630 const u32 location{GetGenericAttributeIndex(index)}; 804 static constexpr std::string_view swizzle = "xyzw";
631 code.AddLine("layout (location = {}) out vec4 {};", location, GetOutputAttribute(index)); 805 u8 element = 0;
806 while (element < 4) {
807 auto xfb = GetTransformFeedbackDecoration(index, element);
808 if (!xfb.empty()) {
809 xfb = fmt::format(", {}", xfb);
810 }
811 const std::size_t remainder = 4 - element;
812 const std::size_t num_components = GetNumComponents(index, element).value_or(remainder);
813 const char* const type = FLOAT_TYPES.at(num_components - 1);
814
815 const u32 location = GetGenericAttributeIndex(index);
816
817 GenericVaryingDescription description;
818 description.first_element = static_cast<u8>(element);
819 description.is_scalar = num_components == 1;
820 description.name = AppendSuffix(location, OUTPUT_ATTRIBUTE_NAME);
821 if (element != 0 || num_components != 4) {
822 const std::string_view name_swizzle = swizzle.substr(element, num_components);
823 description.name = fmt::format("{}_{}", description.name, name_swizzle);
824 }
825 for (std::size_t i = 0; i < num_components; ++i) {
826 const u8 offset = static_cast<u8>(location * 4 + element + i);
827 varying_description.insert({offset, description});
828 }
829
830 code.AddLine("layout (location = {}, component = {}{}) out {} {};", location, element,
831 xfb, type, description.name);
832
833 element = static_cast<u8>(static_cast<std::size_t>(element) + num_components);
834 }
632 } 835 }
633 836
634 void DeclareConstantBuffers() { 837 void DeclareConstantBuffers() {
@@ -925,7 +1128,8 @@ private:
925 // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games 1128 // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
926 // set an 0x80000000 index for those and the shader fails to build. Find out why 1129 // set an 0x80000000 index for those and the shader fails to build. Find out why
927 // this happens and what's its intent. 1130 // this happens and what's its intent.
928 return fmt::format("gs_{}[{} % MAX_VERTEX_INPUT]", name, Visit(buffer).AsUint()); 1131 return fmt::format("gs_{}[{} % {}]", name, Visit(buffer).AsUint(),
1132 max_input_vertices.value());
929 } 1133 }
930 return std::string(name); 1134 return std::string(name);
931 }; 1135 };
@@ -943,6 +1147,10 @@ private:
943 default: 1147 default:
944 UNREACHABLE(); 1148 UNREACHABLE();
945 } 1149 }
1150 case Attribute::Index::FrontColor:
1151 return {"gl_Color"s + GetSwizzle(element), Type::Float};
1152 case Attribute::Index::FrontSecondaryColor:
1153 return {"gl_SecondaryColor"s + GetSwizzle(element), Type::Float};
946 case Attribute::Index::PointCoord: 1154 case Attribute::Index::PointCoord:
947 switch (element) { 1155 switch (element) {
948 case 0: 1156 case 0:
@@ -959,7 +1167,7 @@ private:
959 // TODO(Subv): Find out what the values are for the first two elements when inside a 1167 // TODO(Subv): Find out what the values are for the first two elements when inside a
960 // vertex shader, and what's the value of the fourth element when inside a Tess Eval 1168 // vertex shader, and what's the value of the fourth element when inside a Tess Eval
961 // shader. 1169 // shader.
962 ASSERT(IsVertexShader(stage)); 1170 ASSERT(stage == ShaderType::Vertex);
963 switch (element) { 1171 switch (element) {
964 case 2: 1172 case 2:
965 // Config pack's first value is instance_id. 1173 // Config pack's first value is instance_id.
@@ -980,7 +1188,13 @@ private:
980 return {"0", Type::Int}; 1188 return {"0", Type::Int};
981 default: 1189 default:
982 if (IsGenericAttribute(attribute)) { 1190 if (IsGenericAttribute(attribute)) {
983 return {GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element), 1191 return {GeometryPass(GetGenericInputAttribute(attribute)) + GetSwizzle(element),
1192 Type::Float};
1193 }
1194 if (IsLegacyTexCoord(attribute)) {
1195 UNIMPLEMENTED_IF(stage == ShaderType::Geometry);
1196 return {fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute),
1197 GetSwizzle(element)),
984 Type::Float}; 1198 Type::Float};
985 } 1199 }
986 break; 1200 break;
@@ -1021,21 +1235,22 @@ private:
1021 } 1235 }
1022 1236
1023 std::optional<Expression> GetOutputAttribute(const AbufNode* abuf) { 1237 std::optional<Expression> GetOutputAttribute(const AbufNode* abuf) {
1238 const u32 element = abuf->GetElement();
1024 switch (const auto attribute = abuf->GetIndex()) { 1239 switch (const auto attribute = abuf->GetIndex()) {
1025 case Attribute::Index::Position: 1240 case Attribute::Index::Position:
1026 return {{"gl_Position"s + GetSwizzle(abuf->GetElement()), Type::Float}}; 1241 return {{"gl_Position"s + GetSwizzle(element), Type::Float}};
1027 case Attribute::Index::LayerViewportPointSize: 1242 case Attribute::Index::LayerViewportPointSize:
1028 switch (abuf->GetElement()) { 1243 switch (element) {
1029 case 0: 1244 case 0:
1030 UNIMPLEMENTED(); 1245 UNIMPLEMENTED();
1031 return {}; 1246 return {};
1032 case 1: 1247 case 1:
1033 if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) { 1248 if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) {
1034 return {}; 1249 return {};
1035 } 1250 }
1036 return {{"gl_Layer", Type::Int}}; 1251 return {{"gl_Layer", Type::Int}};
1037 case 2: 1252 case 2:
1038 if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) { 1253 if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) {
1039 return {}; 1254 return {};
1040 } 1255 }
1041 return {{"gl_ViewportIndex", Type::Int}}; 1256 return {{"gl_ViewportIndex", Type::Int}};
@@ -1043,14 +1258,26 @@ private:
1043 return {{"gl_PointSize", Type::Float}}; 1258 return {{"gl_PointSize", Type::Float}};
1044 } 1259 }
1045 return {}; 1260 return {};
1261 case Attribute::Index::FrontColor:
1262 return {{"gl_FrontColor"s + GetSwizzle(element), Type::Float}};
1263 case Attribute::Index::FrontSecondaryColor:
1264 return {{"gl_FrontSecondaryColor"s + GetSwizzle(element), Type::Float}};
1265 case Attribute::Index::BackColor:
1266 return {{"gl_BackColor"s + GetSwizzle(element), Type::Float}};
1267 case Attribute::Index::BackSecondaryColor:
1268 return {{"gl_BackSecondaryColor"s + GetSwizzle(element), Type::Float}};
1046 case Attribute::Index::ClipDistances0123: 1269 case Attribute::Index::ClipDistances0123:
1047 return {{fmt::format("gl_ClipDistance[{}]", abuf->GetElement()), Type::Float}}; 1270 return {{fmt::format("gl_ClipDistance[{}]", element), Type::Float}};
1048 case Attribute::Index::ClipDistances4567: 1271 case Attribute::Index::ClipDistances4567:
1049 return {{fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), Type::Float}}; 1272 return {{fmt::format("gl_ClipDistance[{}]", element + 4), Type::Float}};
1050 default: 1273 default:
1051 if (IsGenericAttribute(attribute)) { 1274 if (IsGenericAttribute(attribute)) {
1052 return { 1275 return {{GetGenericOutputAttribute(attribute, element), Type::Float}};
1053 {GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()), Type::Float}}; 1276 }
1277 if (IsLegacyTexCoord(attribute)) {
1278 return {{fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute),
1279 GetSwizzle(element)),
1280 Type::Float}};
1054 } 1281 }
1055 UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute)); 1282 UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute));
1056 return {}; 1283 return {};
@@ -1822,16 +2049,19 @@ private:
1822 expr += GetSampler(meta->sampler); 2049 expr += GetSampler(meta->sampler);
1823 expr += ", "; 2050 expr += ", ";
1824 2051
1825 expr += constructors.at(operation.GetOperandsCount() - 1); 2052 expr += constructors.at(operation.GetOperandsCount() + (meta->array ? 1 : 0) - 1);
1826 expr += '('; 2053 expr += '(';
1827 for (std::size_t i = 0; i < count; ++i) { 2054 for (std::size_t i = 0; i < count; ++i) {
1828 expr += VisitOperand(operation, i).AsInt(); 2055 if (i > 0) {
1829 const std::size_t next = i + 1;
1830 if (next == count)
1831 expr += ')';
1832 else if (next < count)
1833 expr += ", "; 2056 expr += ", ";
2057 }
2058 expr += VisitOperand(operation, i).AsInt();
2059 }
2060 if (meta->array) {
2061 expr += ", ";
2062 expr += Visit(meta->array).AsInt();
1834 } 2063 }
2064 expr += ')';
1835 2065
1836 if (meta->lod && !meta->sampler.IsBuffer()) { 2066 if (meta->lod && !meta->sampler.IsBuffer()) {
1837 expr += ", "; 2067 expr += ", ";
@@ -1945,7 +2175,7 @@ private:
1945 // TODO(Subv): Figure out how dual-source blending is configured in the Switch. 2175 // TODO(Subv): Figure out how dual-source blending is configured in the Switch.
1946 for (u32 component = 0; component < 4; ++component) { 2176 for (u32 component = 0; component < 4; ++component) {
1947 if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { 2177 if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
1948 code.AddLine("FragColor{}[{}] = {};", render_target, component, 2178 code.AddLine("frag_color{}{} = {};", render_target, GetColorSwizzle(component),
1949 SafeGetRegister(current_reg).AsFloat()); 2179 SafeGetRegister(current_reg).AsFloat());
1950 ++current_reg; 2180 ++current_reg;
1951 } 2181 }
@@ -2261,27 +2491,34 @@ private:
2261 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); 2491 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
2262 2492
2263 std::string GetRegister(u32 index) const { 2493 std::string GetRegister(u32 index) const {
2264 return GetDeclarationWithSuffix(index, "gpr"); 2494 return AppendSuffix(index, "gpr");
2265 } 2495 }
2266 2496
2267 std::string GetCustomVariable(u32 index) const { 2497 std::string GetCustomVariable(u32 index) const {
2268 return GetDeclarationWithSuffix(index, "custom_var"); 2498 return AppendSuffix(index, "custom_var");
2269 } 2499 }
2270 2500
2271 std::string GetPredicate(Tegra::Shader::Pred pred) const { 2501 std::string GetPredicate(Tegra::Shader::Pred pred) const {
2272 return GetDeclarationWithSuffix(static_cast<u32>(pred), "pred"); 2502 return AppendSuffix(static_cast<u32>(pred), "pred");
2273 } 2503 }
2274 2504
2275 std::string GetInputAttribute(Attribute::Index attribute) const { 2505 std::string GetGenericInputAttribute(Attribute::Index attribute) const {
2276 return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "input_attr"); 2506 return AppendSuffix(GetGenericAttributeIndex(attribute), INPUT_ATTRIBUTE_NAME);
2277 } 2507 }
2278 2508
2279 std::string GetOutputAttribute(Attribute::Index attribute) const { 2509 std::unordered_map<u8, GenericVaryingDescription> varying_description;
2280 return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "output_attr"); 2510
2511 std::string GetGenericOutputAttribute(Attribute::Index attribute, std::size_t element) const {
2512 const u8 offset = static_cast<u8>(GetGenericAttributeIndex(attribute) * 4 + element);
2513 const auto& description = varying_description.at(offset);
2514 if (description.is_scalar) {
2515 return description.name;
2516 }
2517 return fmt::format("{}[{}]", description.name, element - description.first_element);
2281 } 2518 }
2282 2519
2283 std::string GetConstBuffer(u32 index) const { 2520 std::string GetConstBuffer(u32 index) const {
2284 return GetDeclarationWithSuffix(index, "cbuf"); 2521 return AppendSuffix(index, "cbuf");
2285 } 2522 }
2286 2523
2287 std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const { 2524 std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const {
@@ -2294,11 +2531,15 @@ private:
2294 } 2531 }
2295 2532
2296 std::string GetConstBufferBlock(u32 index) const { 2533 std::string GetConstBufferBlock(u32 index) const {
2297 return GetDeclarationWithSuffix(index, "cbuf_block"); 2534 return AppendSuffix(index, "cbuf_block");
2298 } 2535 }
2299 2536
2300 std::string GetLocalMemory() const { 2537 std::string GetLocalMemory() const {
2301 return "lmem_" + suffix; 2538 if (suffix.empty()) {
2539 return "lmem";
2540 } else {
2541 return "lmem_" + std::string{suffix};
2542 }
2302 } 2543 }
2303 2544
2304 std::string GetInternalFlag(InternalFlag flag) const { 2545 std::string GetInternalFlag(InternalFlag flag) const {
@@ -2307,23 +2548,31 @@ private:
2307 const auto index = static_cast<u32>(flag); 2548 const auto index = static_cast<u32>(flag);
2308 ASSERT(index < static_cast<u32>(InternalFlag::Amount)); 2549 ASSERT(index < static_cast<u32>(InternalFlag::Amount));
2309 2550
2310 return fmt::format("{}_{}", InternalFlagNames[index], suffix); 2551 if (suffix.empty()) {
2552 return InternalFlagNames[index];
2553 } else {
2554 return fmt::format("{}_{}", InternalFlagNames[index], suffix);
2555 }
2311 } 2556 }
2312 2557
2313 std::string GetSampler(const Sampler& sampler) const { 2558 std::string GetSampler(const Sampler& sampler) const {
2314 return GetDeclarationWithSuffix(static_cast<u32>(sampler.GetIndex()), "sampler"); 2559 return AppendSuffix(static_cast<u32>(sampler.GetIndex()), "sampler");
2315 } 2560 }
2316 2561
2317 std::string GetImage(const Image& image) const { 2562 std::string GetImage(const Image& image) const {
2318 return GetDeclarationWithSuffix(static_cast<u32>(image.GetIndex()), "image"); 2563 return AppendSuffix(static_cast<u32>(image.GetIndex()), "image");
2319 } 2564 }
2320 2565
2321 std::string GetDeclarationWithSuffix(u32 index, std::string_view name) const { 2566 std::string AppendSuffix(u32 index, std::string_view name) const {
2322 return fmt::format("{}_{}_{}", name, index, suffix); 2567 if (suffix.empty()) {
2568 return fmt::format("{}{}", name, index);
2569 } else {
2570 return fmt::format("{}{}_{}", name, index, suffix);
2571 }
2323 } 2572 }
2324 2573
2325 u32 GetNumPhysicalInputAttributes() const { 2574 u32 GetNumPhysicalInputAttributes() const {
2326 return IsVertexShader(stage) ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); 2575 return stage == ShaderType::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings();
2327 } 2576 }
2328 2577
2329 u32 GetNumPhysicalAttributes() const { 2578 u32 GetNumPhysicalAttributes() const {
@@ -2334,17 +2583,31 @@ private:
2334 return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings); 2583 return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings);
2335 } 2584 }
2336 2585
2586 bool IsRenderTargetEnabled(u32 render_target) const {
2587 for (u32 component = 0; component < 4; ++component) {
2588 if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
2589 return true;
2590 }
2591 }
2592 return false;
2593 }
2594
2337 const Device& device; 2595 const Device& device;
2338 const ShaderIR& ir; 2596 const ShaderIR& ir;
2597 const Registry& registry;
2339 const ShaderType stage; 2598 const ShaderType stage;
2340 const std::string suffix; 2599 const std::string_view identifier;
2600 const std::string_view suffix;
2341 const Header header; 2601 const Header header;
2602 std::unordered_map<u8, VaryingTFB> transform_feedback;
2342 2603
2343 ShaderWriter code; 2604 ShaderWriter code;
2605
2606 std::optional<u32> max_input_vertices;
2344}; 2607};
2345 2608
2346std::string GetFlowVariable(u32 i) { 2609std::string GetFlowVariable(u32 index) {
2347 return fmt::format("flow_var_{}", i); 2610 return fmt::format("flow_var{}", index);
2348} 2611}
2349 2612
2350class ExprDecompiler { 2613class ExprDecompiler {
@@ -2531,7 +2794,7 @@ void GLSLDecompiler::DecompileAST() {
2531 2794
2532} // Anonymous namespace 2795} // Anonymous namespace
2533 2796
2534ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) { 2797ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) {
2535 ShaderEntries entries; 2798 ShaderEntries entries;
2536 for (const auto& cbuf : ir.GetConstantBuffers()) { 2799 for (const auto& cbuf : ir.GetConstantBuffers()) {
2537 entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), 2800 entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
@@ -2547,33 +2810,20 @@ ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) {
2547 for (const auto& image : ir.GetImages()) { 2810 for (const auto& image : ir.GetImages()) {
2548 entries.images.emplace_back(image); 2811 entries.images.emplace_back(image);
2549 } 2812 }
2550 entries.clip_distances = ir.GetClipDistances(); 2813 const auto clip_distances = ir.GetClipDistances();
2814 for (std::size_t i = 0; i < std::size(clip_distances); ++i) {
2815 entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i;
2816 }
2551 entries.shader_length = ir.GetLength(); 2817 entries.shader_length = ir.GetLength();
2552 return entries; 2818 return entries;
2553} 2819}
2554 2820
2555std::string GetCommonDeclarations() { 2821std::string DecompileShader(const Device& device, const ShaderIR& ir, const Registry& registry,
2556 return R"(#define ftoi floatBitsToInt 2822 ShaderType stage, std::string_view identifier,
2557#define ftou floatBitsToUint 2823 std::string_view suffix) {
2558#define itof intBitsToFloat 2824 GLSLDecompiler decompiler(device, ir, registry, stage, identifier, suffix);
2559#define utof uintBitsToFloat
2560
2561bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {
2562 bvec2 is_nan1 = isnan(pair1);
2563 bvec2 is_nan2 = isnan(pair2);
2564 return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y);
2565}
2566
2567const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
2568const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
2569)";
2570}
2571
2572std::string Decompile(const Device& device, const ShaderIR& ir, ShaderType stage,
2573 const std::string& suffix) {
2574 GLSLDecompiler decompiler(device, ir, stage, suffix);
2575 decompiler.Decompile(); 2825 decompiler.Decompile();
2576 return decompiler.GetResult(); 2826 return decompiler.GetResult();
2577} 2827}
2578 2828
2579} // namespace OpenGL::GLShader 2829} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index 7876f48d6..e7dbd810c 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -6,22 +6,18 @@
6 6
7#include <array> 7#include <array>
8#include <string> 8#include <string>
9#include <string_view>
9#include <utility> 10#include <utility>
10#include <vector> 11#include <vector>
11#include "common/common_types.h" 12#include "common/common_types.h"
12#include "video_core/engines/maxwell_3d.h" 13#include "video_core/engines/maxwell_3d.h"
13#include "video_core/engines/shader_type.h" 14#include "video_core/engines/shader_type.h"
15#include "video_core/shader/registry.h"
14#include "video_core/shader/shader_ir.h" 16#include "video_core/shader/shader_ir.h"
15 17
16namespace VideoCommon::Shader {
17class ShaderIR;
18}
19
20namespace OpenGL { 18namespace OpenGL {
21class Device;
22}
23 19
24namespace OpenGL::GLShader { 20class Device;
25 21
26using Maxwell = Tegra::Engines::Maxwell3D::Regs; 22using Maxwell = Tegra::Engines::Maxwell3D::Regs;
27using SamplerEntry = VideoCommon::Shader::Sampler; 23using SamplerEntry = VideoCommon::Shader::Sampler;
@@ -74,15 +70,15 @@ struct ShaderEntries {
74 std::vector<GlobalMemoryEntry> global_memory_entries; 70 std::vector<GlobalMemoryEntry> global_memory_entries;
75 std::vector<SamplerEntry> samplers; 71 std::vector<SamplerEntry> samplers;
76 std::vector<ImageEntry> images; 72 std::vector<ImageEntry> images;
77 std::array<bool, Maxwell::NumClipDistances> clip_distances{}; 73 u32 clip_distances{};
78 std::size_t shader_length{}; 74 std::size_t shader_length{};
79}; 75};
80 76
81ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir); 77ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir);
82
83std::string GetCommonDeclarations();
84 78
85std::string Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, 79std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
86 Tegra::Engines::ShaderType stage, const std::string& suffix); 80 const VideoCommon::Shader::Registry& registry,
81 Tegra::Engines::ShaderType stage, std::string_view identifier,
82 std::string_view suffix = {});
87 83
88} // namespace OpenGL::GLShader 84} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 1fc204f6f..9e95a122b 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -31,32 +31,24 @@ namespace {
31 31
32using ShaderCacheVersionHash = std::array<u8, 64>; 32using ShaderCacheVersionHash = std::array<u8, 64>;
33 33
34enum class TransferableEntryKind : u32 {
35 Raw,
36 Usage,
37};
38
39struct ConstBufferKey { 34struct ConstBufferKey {
40 u32 cbuf{}; 35 u32 cbuf = 0;
41 u32 offset{}; 36 u32 offset = 0;
42 u32 value{}; 37 u32 value = 0;
43}; 38};
44 39
45struct BoundSamplerKey { 40struct BoundSamplerKey {
46 u32 offset{}; 41 u32 offset = 0;
47 Tegra::Engines::SamplerDescriptor sampler{}; 42 Tegra::Engines::SamplerDescriptor sampler;
48}; 43};
49 44
50struct BindlessSamplerKey { 45struct BindlessSamplerKey {
51 u32 cbuf{}; 46 u32 cbuf = 0;
52 u32 offset{}; 47 u32 offset = 0;
53 Tegra::Engines::SamplerDescriptor sampler{}; 48 Tegra::Engines::SamplerDescriptor sampler;
54}; 49};
55 50
56constexpr u32 NativeVersion = 12; 51constexpr u32 NativeVersion = 20;
57
58// Making sure sizes doesn't change by accident
59static_assert(sizeof(ProgramVariant) == 20);
60 52
61ShaderCacheVersionHash GetShaderCacheVersionHash() { 53ShaderCacheVersionHash GetShaderCacheVersionHash() {
62 ShaderCacheVersionHash hash{}; 54 ShaderCacheVersionHash hash{};
@@ -67,61 +59,124 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() {
67 59
68} // Anonymous namespace 60} // Anonymous namespace
69 61
70ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ShaderType type, ProgramCode code, 62ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default;
71 ProgramCode code_b)
72 : unique_identifier{unique_identifier}, type{type}, code{std::move(code)}, code_b{std::move(
73 code_b)} {}
74 63
75ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default; 64ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default;
76 65
77ShaderDiskCacheRaw::~ShaderDiskCacheRaw() = default; 66bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
78 67 if (file.ReadBytes(&type, sizeof(u32)) != sizeof(u32)) {
79bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) {
80 if (file.ReadBytes(&unique_identifier, sizeof(u64)) != sizeof(u64) ||
81 file.ReadBytes(&type, sizeof(u32)) != sizeof(u32)) {
82 return false; 68 return false;
83 } 69 }
84 u32 code_size{}; 70 u32 code_size;
85 u32 code_size_b{}; 71 u32 code_size_b;
86 if (file.ReadBytes(&code_size, sizeof(u32)) != sizeof(u32) || 72 if (file.ReadBytes(&code_size, sizeof(u32)) != sizeof(u32) ||
87 file.ReadBytes(&code_size_b, sizeof(u32)) != sizeof(u32)) { 73 file.ReadBytes(&code_size_b, sizeof(u32)) != sizeof(u32)) {
88 return false; 74 return false;
89 } 75 }
90
91 code.resize(code_size); 76 code.resize(code_size);
92 code_b.resize(code_size_b); 77 code_b.resize(code_size_b);
93 78
94 if (file.ReadArray(code.data(), code_size) != code_size) 79 if (file.ReadArray(code.data(), code_size) != code_size) {
95 return false; 80 return false;
96 81 }
97 if (HasProgramA() && file.ReadArray(code_b.data(), code_size_b) != code_size_b) { 82 if (HasProgramA() && file.ReadArray(code_b.data(), code_size_b) != code_size_b) {
98 return false; 83 return false;
99 } 84 }
85
86 u8 is_texture_handler_size_known;
87 u32 texture_handler_size_value;
88 u32 num_keys;
89 u32 num_bound_samplers;
90 u32 num_bindless_samplers;
91 if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 ||
92 file.ReadArray(&is_texture_handler_size_known, 1) != 1 ||
93 file.ReadArray(&texture_handler_size_value, 1) != 1 ||
94 file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 ||
95 file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 ||
96 file.ReadArray(&num_bindless_samplers, 1) != 1) {
97 return false;
98 }
99 if (is_texture_handler_size_known) {
100 texture_handler_size = texture_handler_size_value;
101 }
102
103 std::vector<ConstBufferKey> flat_keys(num_keys);
104 std::vector<BoundSamplerKey> flat_bound_samplers(num_bound_samplers);
105 std::vector<BindlessSamplerKey> flat_bindless_samplers(num_bindless_samplers);
106 if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() ||
107 file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) !=
108 flat_bound_samplers.size() ||
109 file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) !=
110 flat_bindless_samplers.size()) {
111 return false;
112 }
113 for (const auto& key : flat_keys) {
114 keys.insert({{key.cbuf, key.offset}, key.value});
115 }
116 for (const auto& key : flat_bound_samplers) {
117 bound_samplers.emplace(key.offset, key.sampler);
118 }
119 for (const auto& key : flat_bindless_samplers) {
120 bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
121 }
122
100 return true; 123 return true;
101} 124}
102 125
103bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const { 126bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
104 if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(static_cast<u32>(type)) != 1 || 127 if (file.WriteObject(static_cast<u32>(type)) != 1 ||
105 file.WriteObject(static_cast<u32>(code.size())) != 1 || 128 file.WriteObject(static_cast<u32>(code.size())) != 1 ||
106 file.WriteObject(static_cast<u32>(code_b.size())) != 1) { 129 file.WriteObject(static_cast<u32>(code_b.size())) != 1) {
107 return false; 130 return false;
108 } 131 }
109 132 if (file.WriteArray(code.data(), code.size()) != code.size()) {
110 if (file.WriteArray(code.data(), code.size()) != code.size())
111 return false; 133 return false;
112 134 }
113 if (HasProgramA() && file.WriteArray(code_b.data(), code_b.size()) != code_b.size()) { 135 if (HasProgramA() && file.WriteArray(code_b.data(), code_b.size()) != code_b.size()) {
114 return false; 136 return false;
115 } 137 }
116 return true; 138
139 if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(bound_buffer) != 1 ||
140 file.WriteObject(static_cast<u8>(texture_handler_size.has_value())) != 1 ||
141 file.WriteObject(texture_handler_size.value_or(0)) != 1 ||
142 file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 ||
143 file.WriteObject(static_cast<u32>(keys.size())) != 1 ||
144 file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 ||
145 file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) {
146 return false;
147 }
148
149 std::vector<ConstBufferKey> flat_keys;
150 flat_keys.reserve(keys.size());
151 for (const auto& [address, value] : keys) {
152 flat_keys.push_back(ConstBufferKey{address.first, address.second, value});
153 }
154
155 std::vector<BoundSamplerKey> flat_bound_samplers;
156 flat_bound_samplers.reserve(bound_samplers.size());
157 for (const auto& [address, sampler] : bound_samplers) {
158 flat_bound_samplers.push_back(BoundSamplerKey{address, sampler});
159 }
160
161 std::vector<BindlessSamplerKey> flat_bindless_samplers;
162 flat_bindless_samplers.reserve(bindless_samplers.size());
163 for (const auto& [address, sampler] : bindless_samplers) {
164 flat_bindless_samplers.push_back(
165 BindlessSamplerKey{address.first, address.second, sampler});
166 }
167
168 return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() &&
169 file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) ==
170 flat_bound_samplers.size() &&
171 file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) ==
172 flat_bindless_samplers.size();
117} 173}
118 174
119ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {} 175ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {}
120 176
121ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default; 177ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default;
122 178
123std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>> 179std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() {
124ShaderDiskCacheOpenGL::LoadTransferable() {
125 // Skip games without title id 180 // Skip games without title id
126 const bool has_title_id = system.CurrentProcess()->GetTitleID() != 0; 181 const bool has_title_id = system.CurrentProcess()->GetTitleID() != 0;
127 if (!Settings::values.use_disk_shader_cache || !has_title_id) { 182 if (!Settings::values.use_disk_shader_cache || !has_title_id) {
@@ -130,17 +185,14 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
130 185
131 FileUtil::IOFile file(GetTransferablePath(), "rb"); 186 FileUtil::IOFile file(GetTransferablePath(), "rb");
132 if (!file.IsOpen()) { 187 if (!file.IsOpen()) {
133 LOG_INFO(Render_OpenGL, "No transferable shader cache found for game with title id={}", 188 LOG_INFO(Render_OpenGL, "No transferable shader cache found");
134 GetTitleID());
135 is_usable = true; 189 is_usable = true;
136 return {}; 190 return {};
137 } 191 }
138 192
139 u32 version{}; 193 u32 version{};
140 if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) { 194 if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) {
141 LOG_ERROR(Render_OpenGL, 195 LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it");
142 "Failed to get transferable cache version for title id={}, skipping",
143 GetTitleID());
144 return {}; 196 return {};
145 } 197 }
146 198
@@ -158,105 +210,42 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
158 } 210 }
159 211
160 // Version is valid, load the shaders 212 // Version is valid, load the shaders
161 constexpr const char error_loading[] = "Failed to load transferable raw entry, skipping"; 213 std::vector<ShaderDiskCacheEntry> entries;
162 std::vector<ShaderDiskCacheRaw> raws;
163 std::vector<ShaderDiskCacheUsage> usages;
164 while (file.Tell() < file.GetSize()) { 214 while (file.Tell() < file.GetSize()) {
165 TransferableEntryKind kind{}; 215 ShaderDiskCacheEntry& entry = entries.emplace_back();
166 if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) { 216 if (!entry.Load(file)) {
167 LOG_ERROR(Render_OpenGL, "Failed to read transferable file, skipping"); 217 LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping");
168 return {};
169 }
170
171 switch (kind) {
172 case TransferableEntryKind::Raw: {
173 ShaderDiskCacheRaw entry;
174 if (!entry.Load(file)) {
175 LOG_ERROR(Render_OpenGL, error_loading);
176 return {};
177 }
178 transferable.insert({entry.GetUniqueIdentifier(), {}});
179 raws.push_back(std::move(entry));
180 break;
181 }
182 case TransferableEntryKind::Usage: {
183 ShaderDiskCacheUsage usage;
184
185 u32 num_keys{};
186 u32 num_bound_samplers{};
187 u32 num_bindless_samplers{};
188 if (file.ReadArray(&usage.unique_identifier, 1) != 1 ||
189 file.ReadArray(&usage.variant, 1) != 1 ||
190 file.ReadArray(&usage.bound_buffer, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 ||
191 file.ReadArray(&num_bound_samplers, 1) != 1 ||
192 file.ReadArray(&num_bindless_samplers, 1) != 1) {
193 LOG_ERROR(Render_OpenGL, error_loading);
194 return {};
195 }
196
197 std::vector<ConstBufferKey> keys(num_keys);
198 std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers);
199 std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers);
200 if (file.ReadArray(keys.data(), keys.size()) != keys.size() ||
201 file.ReadArray(bound_samplers.data(), bound_samplers.size()) !=
202 bound_samplers.size() ||
203 file.ReadArray(bindless_samplers.data(), bindless_samplers.size()) !=
204 bindless_samplers.size()) {
205 LOG_ERROR(Render_OpenGL, error_loading);
206 return {};
207 }
208 for (const auto& key : keys) {
209 usage.keys.insert({{key.cbuf, key.offset}, key.value});
210 }
211 for (const auto& key : bound_samplers) {
212 usage.bound_samplers.emplace(key.offset, key.sampler);
213 }
214 for (const auto& key : bindless_samplers) {
215 usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
216 }
217
218 usages.push_back(std::move(usage));
219 break;
220 }
221 default:
222 LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={}, skipping",
223 static_cast<u32>(kind));
224 return {}; 218 return {};
225 } 219 }
226 } 220 }
227 221
228 is_usable = true; 222 is_usable = true;
229 return {{std::move(raws), std::move(usages)}}; 223 return {std::move(entries)};
230} 224}
231 225
232std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> 226std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled() {
233ShaderDiskCacheOpenGL::LoadPrecompiled() {
234 if (!is_usable) { 227 if (!is_usable) {
235 return {}; 228 return {};
236 } 229 }
237 230
238 std::string path = GetPrecompiledPath(); 231 FileUtil::IOFile file(GetPrecompiledPath(), "rb");
239 FileUtil::IOFile file(path, "rb");
240 if (!file.IsOpen()) { 232 if (!file.IsOpen()) {
241 LOG_INFO(Render_OpenGL, "No precompiled shader cache found for game with title id={}", 233 LOG_INFO(Render_OpenGL, "No precompiled shader cache found");
242 GetTitleID());
243 return {}; 234 return {};
244 } 235 }
245 236
246 const auto result = LoadPrecompiledFile(file); 237 if (const auto result = LoadPrecompiledFile(file)) {
247 if (!result) { 238 return *result;
248 LOG_INFO(Render_OpenGL,
249 "Failed to load precompiled cache for game with title id={}, removing",
250 GetTitleID());
251 file.Close();
252 InvalidatePrecompiled();
253 return {};
254 } 239 }
255 return *result; 240
241 LOG_INFO(Render_OpenGL, "Failed to load precompiled cache");
242 file.Close();
243 InvalidatePrecompiled();
244 return {};
256} 245}
257 246
258std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>> 247std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::LoadPrecompiledFile(
259ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { 248 FileUtil::IOFile& file) {
260 // Read compressed file from disk and decompress to virtual precompiled cache file 249 // Read compressed file from disk and decompress to virtual precompiled cache file
261 std::vector<u8> compressed(file.GetSize()); 250 std::vector<u8> compressed(file.GetSize());
262 file.ReadBytes(compressed.data(), compressed.size()); 251 file.ReadBytes(compressed.data(), compressed.size());
@@ -275,58 +264,22 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
275 return {}; 264 return {};
276 } 265 }
277 266
278 ShaderDumpsMap dumps; 267 std::vector<ShaderDiskCachePrecompiled> entries;
279 while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { 268 while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) {
280 u32 num_keys{}; 269 u32 binary_size;
281 u32 num_bound_samplers{}; 270 auto& entry = entries.emplace_back();
282 u32 num_bindless_samplers{}; 271 if (!LoadObjectFromPrecompiled(entry.unique_identifier) ||
283 ShaderDiskCacheUsage usage; 272 !LoadObjectFromPrecompiled(entry.binary_format) ||
284 if (!LoadObjectFromPrecompiled(usage.unique_identifier) || 273 !LoadObjectFromPrecompiled(binary_size)) {
285 !LoadObjectFromPrecompiled(usage.variant) ||
286 !LoadObjectFromPrecompiled(usage.bound_buffer) ||
287 !LoadObjectFromPrecompiled(num_keys) ||
288 !LoadObjectFromPrecompiled(num_bound_samplers) ||
289 !LoadObjectFromPrecompiled(num_bindless_samplers)) {
290 return {};
291 }
292 std::vector<ConstBufferKey> keys(num_keys);
293 std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers);
294 std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers);
295 if (!LoadArrayFromPrecompiled(keys.data(), keys.size()) ||
296 !LoadArrayFromPrecompiled(bound_samplers.data(), bound_samplers.size()) !=
297 bound_samplers.size() ||
298 !LoadArrayFromPrecompiled(bindless_samplers.data(), bindless_samplers.size()) !=
299 bindless_samplers.size()) {
300 return {};
301 }
302 for (const auto& key : keys) {
303 usage.keys.insert({{key.cbuf, key.offset}, key.value});
304 }
305 for (const auto& key : bound_samplers) {
306 usage.bound_samplers.emplace(key.offset, key.sampler);
307 }
308 for (const auto& key : bindless_samplers) {
309 usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
310 }
311
312 ShaderDiskCacheDump dump;
313 if (!LoadObjectFromPrecompiled(dump.binary_format)) {
314 return {};
315 }
316
317 u32 binary_length{};
318 if (!LoadObjectFromPrecompiled(binary_length)) {
319 return {}; 274 return {};
320 } 275 }
321 276
322 dump.binary.resize(binary_length); 277 entry.binary.resize(binary_size);
323 if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) { 278 if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) {
324 return {}; 279 return {};
325 } 280 }
326
327 dumps.emplace(std::move(usage), dump);
328 } 281 }
329 return dumps; 282 return entries;
330} 283}
331 284
332void ShaderDiskCacheOpenGL::InvalidateTransferable() { 285void ShaderDiskCacheOpenGL::InvalidateTransferable() {
@@ -346,13 +299,13 @@ void ShaderDiskCacheOpenGL::InvalidatePrecompiled() {
346 } 299 }
347} 300}
348 301
349void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) { 302void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) {
350 if (!is_usable) { 303 if (!is_usable) {
351 return; 304 return;
352 } 305 }
353 306
354 const u64 id = entry.GetUniqueIdentifier(); 307 const u64 id = entry.unique_identifier;
355 if (transferable.find(id) != transferable.end()) { 308 if (stored_transferable.find(id) != stored_transferable.end()) {
356 // The shader already exists 309 // The shader already exists
357 return; 310 return;
358 } 311 }
@@ -361,71 +314,17 @@ void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) {
361 if (!file.IsOpen()) { 314 if (!file.IsOpen()) {
362 return; 315 return;
363 } 316 }
364 if (file.WriteObject(TransferableEntryKind::Raw) != 1 || !entry.Save(file)) { 317 if (!entry.Save(file)) {
365 LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing"); 318 LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing");
366 file.Close(); 319 file.Close();
367 InvalidateTransferable(); 320 InvalidateTransferable();
368 return; 321 return;
369 } 322 }
370 transferable.insert({id, {}});
371}
372 323
373void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) { 324 stored_transferable.insert(id);
374 if (!is_usable) {
375 return;
376 }
377
378 const auto it = transferable.find(usage.unique_identifier);
379 ASSERT_MSG(it != transferable.end(), "Saving shader usage without storing raw previously");
380
381 auto& usages{it->second};
382 if (usages.find(usage) != usages.end()) {
383 // Skip this variant since the shader is already stored.
384 return;
385 }
386 usages.insert(usage);
387
388 FileUtil::IOFile file = AppendTransferableFile();
389 if (!file.IsOpen())
390 return;
391 const auto Close = [&] {
392 LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry, removing");
393 file.Close();
394 InvalidateTransferable();
395 };
396
397 if (file.WriteObject(TransferableEntryKind::Usage) != 1 ||
398 file.WriteObject(usage.unique_identifier) != 1 || file.WriteObject(usage.variant) != 1 ||
399 file.WriteObject(usage.bound_buffer) != 1 ||
400 file.WriteObject(static_cast<u32>(usage.keys.size())) != 1 ||
401 file.WriteObject(static_cast<u32>(usage.bound_samplers.size())) != 1 ||
402 file.WriteObject(static_cast<u32>(usage.bindless_samplers.size())) != 1) {
403 Close();
404 return;
405 }
406 for (const auto& [pair, value] : usage.keys) {
407 const auto [cbuf, offset] = pair;
408 if (file.WriteObject(ConstBufferKey{cbuf, offset, value}) != 1) {
409 Close();
410 return;
411 }
412 }
413 for (const auto& [offset, sampler] : usage.bound_samplers) {
414 if (file.WriteObject(BoundSamplerKey{offset, sampler}) != 1) {
415 Close();
416 return;
417 }
418 }
419 for (const auto& [pair, sampler] : usage.bindless_samplers) {
420 const auto [cbuf, offset] = pair;
421 if (file.WriteObject(BindlessSamplerKey{cbuf, offset, sampler}) != 1) {
422 Close();
423 return;
424 }
425 }
426} 325}
427 326
428void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint program) { 327void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint program) {
429 if (!is_usable) { 328 if (!is_usable) {
430 return; 329 return;
431 } 330 }
@@ -437,51 +336,19 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p
437 SavePrecompiledHeaderToVirtualPrecompiledCache(); 336 SavePrecompiledHeaderToVirtualPrecompiledCache();
438 } 337 }
439 338
440 GLint binary_length{}; 339 GLint binary_length;
441 glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length); 340 glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length);
442 341
443 GLenum binary_format{}; 342 GLenum binary_format;
444 std::vector<u8> binary(binary_length); 343 std::vector<u8> binary(binary_length);
445 glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); 344 glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
446 345
447 const auto Close = [&] { 346 if (!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(binary_format) ||
347 !SaveObjectToPrecompiled(static_cast<u32>(binary.size())) ||
348 !SaveArrayToPrecompiled(binary.data(), binary.size())) {
448 LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing", 349 LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing",
449 usage.unique_identifier); 350 unique_identifier);
450 InvalidatePrecompiled(); 351 InvalidatePrecompiled();
451 };
452
453 if (!SaveObjectToPrecompiled(usage.unique_identifier) ||
454 !SaveObjectToPrecompiled(usage.variant) || !SaveObjectToPrecompiled(usage.bound_buffer) ||
455 !SaveObjectToPrecompiled(static_cast<u32>(usage.keys.size())) ||
456 !SaveObjectToPrecompiled(static_cast<u32>(usage.bound_samplers.size())) ||
457 !SaveObjectToPrecompiled(static_cast<u32>(usage.bindless_samplers.size()))) {
458 Close();
459 return;
460 }
461 for (const auto& [pair, value] : usage.keys) {
462 const auto [cbuf, offset] = pair;
463 if (SaveObjectToPrecompiled(ConstBufferKey{cbuf, offset, value}) != 1) {
464 Close();
465 return;
466 }
467 }
468 for (const auto& [offset, sampler] : usage.bound_samplers) {
469 if (SaveObjectToPrecompiled(BoundSamplerKey{offset, sampler}) != 1) {
470 Close();
471 return;
472 }
473 }
474 for (const auto& [pair, sampler] : usage.bindless_samplers) {
475 const auto [cbuf, offset] = pair;
476 if (SaveObjectToPrecompiled(BindlessSamplerKey{cbuf, offset, sampler}) != 1) {
477 Close();
478 return;
479 }
480 }
481 if (!SaveObjectToPrecompiled(static_cast<u32>(binary_format)) ||
482 !SaveObjectToPrecompiled(static_cast<u32>(binary_length)) ||
483 !SaveArrayToPrecompiled(binary.data(), binary.size())) {
484 Close();
485 } 352 }
486} 353}
487 354
@@ -534,7 +401,6 @@ void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
534 if (file.WriteBytes(compressed.data(), compressed.size()) != compressed.size()) { 401 if (file.WriteBytes(compressed.data(), compressed.size()) != compressed.size()) {
535 LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}", 402 LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}",
536 precompiled_path); 403 precompiled_path);
537 return;
538 } 404 }
539} 405}
540 406
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index ef2371f6d..d5be52e40 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -19,8 +19,7 @@
19#include "common/common_types.h" 19#include "common/common_types.h"
20#include "core/file_sys/vfs_vector.h" 20#include "core/file_sys/vfs_vector.h"
21#include "video_core/engines/shader_type.h" 21#include "video_core/engines/shader_type.h"
22#include "video_core/renderer_opengl/gl_shader_gen.h" 22#include "video_core/shader/registry.h"
23#include "video_core/shader/const_buffer_locker.h"
24 23
25namespace Core { 24namespace Core {
26class System; 25class System;
@@ -32,139 +31,39 @@ class IOFile;
32 31
33namespace OpenGL { 32namespace OpenGL {
34 33
35struct ShaderDiskCacheUsage;
36struct ShaderDiskCacheDump;
37
38using ProgramCode = std::vector<u64>; 34using ProgramCode = std::vector<u64>;
39using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>;
40
41/// Describes the different variants a program can be compiled with.
42struct ProgramVariant final {
43 ProgramVariant() = default;
44
45 /// Graphics constructor.
46 explicit constexpr ProgramVariant(GLenum primitive_mode) noexcept
47 : primitive_mode{primitive_mode} {}
48
49 /// Compute constructor.
50 explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z, u32 shared_memory_size,
51 u32 local_memory_size) noexcept
52 : block_x{block_x}, block_y{static_cast<u16>(block_y)}, block_z{static_cast<u16>(block_z)},
53 shared_memory_size{shared_memory_size}, local_memory_size{local_memory_size} {}
54
55 // Graphics specific parameters.
56 GLenum primitive_mode{};
57
58 // Compute specific parameters.
59 u32 block_x{};
60 u16 block_y{};
61 u16 block_z{};
62 u32 shared_memory_size{};
63 u32 local_memory_size{};
64
65 bool operator==(const ProgramVariant& rhs) const noexcept {
66 return std::tie(primitive_mode, block_x, block_y, block_z, shared_memory_size,
67 local_memory_size) == std::tie(rhs.primitive_mode, rhs.block_x, rhs.block_y,
68 rhs.block_z, rhs.shared_memory_size,
69 rhs.local_memory_size);
70 }
71
72 bool operator!=(const ProgramVariant& rhs) const noexcept {
73 return !operator==(rhs);
74 }
75};
76static_assert(std::is_trivially_copyable_v<ProgramVariant>);
77
78/// Describes how a shader is used.
79struct ShaderDiskCacheUsage {
80 u64 unique_identifier{};
81 ProgramVariant variant;
82 u32 bound_buffer{};
83 VideoCommon::Shader::KeyMap keys;
84 VideoCommon::Shader::BoundSamplerMap bound_samplers;
85 VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
86
87 bool operator==(const ShaderDiskCacheUsage& rhs) const {
88 return std::tie(unique_identifier, variant, keys, bound_samplers, bindless_samplers) ==
89 std::tie(rhs.unique_identifier, rhs.variant, rhs.keys, rhs.bound_samplers,
90 rhs.bindless_samplers);
91 }
92
93 bool operator!=(const ShaderDiskCacheUsage& rhs) const {
94 return !operator==(rhs);
95 }
96};
97
98} // namespace OpenGL
99
100namespace std {
101
102template <>
103struct hash<OpenGL::ProgramVariant> {
104 std::size_t operator()(const OpenGL::ProgramVariant& variant) const noexcept {
105 return (static_cast<std::size_t>(variant.primitive_mode) << 6) ^
106 static_cast<std::size_t>(variant.block_x) ^
107 (static_cast<std::size_t>(variant.block_y) << 32) ^
108 (static_cast<std::size_t>(variant.block_z) << 48) ^
109 (static_cast<std::size_t>(variant.shared_memory_size) << 16) ^
110 (static_cast<std::size_t>(variant.local_memory_size) << 36);
111 }
112};
113
114template <>
115struct hash<OpenGL::ShaderDiskCacheUsage> {
116 std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept {
117 return static_cast<std::size_t>(usage.unique_identifier) ^
118 std::hash<OpenGL::ProgramVariant>{}(usage.variant);
119 }
120};
121
122} // namespace std
123
124namespace OpenGL {
125 35
126/// Describes a shader how it's used by the guest GPU 36/// Describes a shader and how it's used by the guest GPU
127class ShaderDiskCacheRaw { 37struct ShaderDiskCacheEntry {
128public: 38 ShaderDiskCacheEntry();
129 explicit ShaderDiskCacheRaw(u64 unique_identifier, Tegra::Engines::ShaderType type, 39 ~ShaderDiskCacheEntry();
130 ProgramCode code, ProgramCode code_b = {});
131 ShaderDiskCacheRaw();
132 ~ShaderDiskCacheRaw();
133 40
134 bool Load(FileUtil::IOFile& file); 41 bool Load(FileUtil::IOFile& file);
135 42
136 bool Save(FileUtil::IOFile& file) const; 43 bool Save(FileUtil::IOFile& file) const;
137 44
138 u64 GetUniqueIdentifier() const {
139 return unique_identifier;
140 }
141
142 bool HasProgramA() const { 45 bool HasProgramA() const {
143 return !code.empty() && !code_b.empty(); 46 return !code.empty() && !code_b.empty();
144 } 47 }
145 48
146 Tegra::Engines::ShaderType GetType() const {
147 return type;
148 }
149
150 const ProgramCode& GetCode() const {
151 return code;
152 }
153
154 const ProgramCode& GetCodeB() const {
155 return code_b;
156 }
157
158private:
159 u64 unique_identifier{};
160 Tegra::Engines::ShaderType type{}; 49 Tegra::Engines::ShaderType type{};
161 ProgramCode code; 50 ProgramCode code;
162 ProgramCode code_b; 51 ProgramCode code_b;
52
53 u64 unique_identifier = 0;
54 std::optional<u32> texture_handler_size;
55 u32 bound_buffer = 0;
56 VideoCommon::Shader::GraphicsInfo graphics_info;
57 VideoCommon::Shader::ComputeInfo compute_info;
58 VideoCommon::Shader::KeyMap keys;
59 VideoCommon::Shader::BoundSamplerMap bound_samplers;
60 VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
163}; 61};
164 62
165/// Contains an OpenGL dumped binary program 63/// Contains an OpenGL dumped binary program
166struct ShaderDiskCacheDump { 64struct ShaderDiskCachePrecompiled {
167 GLenum binary_format{}; 65 u64 unique_identifier = 0;
66 GLenum binary_format = 0;
168 std::vector<u8> binary; 67 std::vector<u8> binary;
169}; 68};
170 69
@@ -174,11 +73,10 @@ public:
174 ~ShaderDiskCacheOpenGL(); 73 ~ShaderDiskCacheOpenGL();
175 74
176 /// Loads transferable cache. If file has a old version or on failure, it deletes the file. 75 /// Loads transferable cache. If file has a old version or on failure, it deletes the file.
177 std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>> 76 std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable();
178 LoadTransferable();
179 77
180 /// Loads current game's precompiled cache. Invalidates on failure. 78 /// Loads current game's precompiled cache. Invalidates on failure.
181 std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> LoadPrecompiled(); 79 std::vector<ShaderDiskCachePrecompiled> LoadPrecompiled();
182 80
183 /// Removes the transferable (and precompiled) cache file. 81 /// Removes the transferable (and precompiled) cache file.
184 void InvalidateTransferable(); 82 void InvalidateTransferable();
@@ -187,21 +85,18 @@ public:
187 void InvalidatePrecompiled(); 85 void InvalidatePrecompiled();
188 86
189 /// Saves a raw dump to the transferable file. Checks for collisions. 87 /// Saves a raw dump to the transferable file. Checks for collisions.
190 void SaveRaw(const ShaderDiskCacheRaw& entry); 88 void SaveEntry(const ShaderDiskCacheEntry& entry);
191
192 /// Saves shader usage to the transferable file. Does not check for collisions.
193 void SaveUsage(const ShaderDiskCacheUsage& usage);
194 89
195 /// Saves a dump entry to the precompiled file. Does not check for collisions. 90 /// Saves a dump entry to the precompiled file. Does not check for collisions.
196 void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program); 91 void SavePrecompiled(u64 unique_identifier, GLuint program);
197 92
198 /// Serializes virtual precompiled shader cache file to real file 93 /// Serializes virtual precompiled shader cache file to real file
199 void SaveVirtualPrecompiledFile(); 94 void SaveVirtualPrecompiledFile();
200 95
201private: 96private:
202 /// Loads the transferable cache. Returns empty on failure. 97 /// Loads the transferable cache. Returns empty on failure.
203 std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>> 98 std::optional<std::vector<ShaderDiskCachePrecompiled>> LoadPrecompiledFile(
204 LoadPrecompiledFile(FileUtil::IOFile& file); 99 FileUtil::IOFile& file);
205 100
206 /// Opens current game's transferable file and write it's header if it doesn't exist 101 /// Opens current game's transferable file and write it's header if it doesn't exist
207 FileUtil::IOFile AppendTransferableFile() const; 102 FileUtil::IOFile AppendTransferableFile() const;
@@ -270,7 +165,7 @@ private:
270 std::size_t precompiled_cache_virtual_file_offset = 0; 165 std::size_t precompiled_cache_virtual_file_offset = 0;
271 166
272 // Stored transferable shaders 167 // Stored transferable shaders
273 std::unordered_map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable; 168 std::unordered_set<u64> stored_transferable;
274 169
275 // The cache has been loaded at boot 170 // The cache has been loaded at boot
276 bool is_usable{}; 171 bool is_usable{};
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
deleted file mode 100644
index 34946fb47..000000000
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ /dev/null
@@ -1,109 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string>
6
7#include <fmt/format.h>
8
9#include "video_core/engines/maxwell_3d.h"
10#include "video_core/engines/shader_type.h"
11#include "video_core/renderer_opengl/gl_device.h"
12#include "video_core/renderer_opengl/gl_shader_decompiler.h"
13#include "video_core/renderer_opengl/gl_shader_gen.h"
14#include "video_core/shader/shader_ir.h"
15
16namespace OpenGL::GLShader {
17
18using Tegra::Engines::Maxwell3D;
19using Tegra::Engines::ShaderType;
20using VideoCommon::Shader::CompileDepth;
21using VideoCommon::Shader::CompilerSettings;
22using VideoCommon::Shader::ProgramCode;
23using VideoCommon::Shader::ShaderIR;
24
25std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b) {
26 std::string out = GetCommonDeclarations();
27 out += fmt::format(R"(
28layout (std140, binding = {}) uniform vs_config {{
29 float y_direction;
30}};
31
32)",
33 EmulationUniformBlockBinding);
34 out += Decompile(device, ir, ShaderType::Vertex, "vertex");
35 if (ir_b) {
36 out += Decompile(device, *ir_b, ShaderType::Vertex, "vertex_b");
37 }
38
39 out += R"(
40void main() {
41 gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);
42 execute_vertex();
43)";
44 if (ir_b) {
45 out += " execute_vertex_b();";
46 }
47 out += "}\n";
48 return out;
49}
50
51std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir) {
52 std::string out = GetCommonDeclarations();
53 out += fmt::format(R"(
54layout (std140, binding = {}) uniform gs_config {{
55 float y_direction;
56}};
57
58)",
59 EmulationUniformBlockBinding);
60 out += Decompile(device, ir, ShaderType::Geometry, "geometry");
61
62 out += R"(
63void main() {
64 execute_geometry();
65}
66)";
67 return out;
68}
69
70std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir) {
71 std::string out = GetCommonDeclarations();
72 out += fmt::format(R"(
73layout (location = 0) out vec4 FragColor0;
74layout (location = 1) out vec4 FragColor1;
75layout (location = 2) out vec4 FragColor2;
76layout (location = 3) out vec4 FragColor3;
77layout (location = 4) out vec4 FragColor4;
78layout (location = 5) out vec4 FragColor5;
79layout (location = 6) out vec4 FragColor6;
80layout (location = 7) out vec4 FragColor7;
81
82layout (std140, binding = {}) uniform fs_config {{
83 float y_direction;
84}};
85
86)",
87 EmulationUniformBlockBinding);
88 out += Decompile(device, ir, ShaderType::Fragment, "fragment");
89
90 out += R"(
91void main() {
92 execute_fragment();
93}
94)";
95 return out;
96}
97
98std::string GenerateComputeShader(const Device& device, const ShaderIR& ir) {
99 std::string out = GetCommonDeclarations();
100 out += Decompile(device, ir, ShaderType::Compute, "compute");
101 out += R"(
102void main() {
103 execute_compute();
104}
105)";
106 return out;
107}
108
109} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
deleted file mode 100644
index cba2be9f9..000000000
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ /dev/null
@@ -1,34 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <vector>
8
9#include "common/common_types.h"
10#include "video_core/renderer_opengl/gl_shader_decompiler.h"
11#include "video_core/shader/shader_ir.h"
12
13namespace OpenGL {
14class Device;
15}
16
17namespace OpenGL::GLShader {
18
19using VideoCommon::Shader::ProgramCode;
20using VideoCommon::Shader::ShaderIR;
21
22/// Generates the GLSL vertex shader program source code for the given VS program
23std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b);
24
25/// Generates the GLSL geometry shader program source code for the given GS program
26std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir);
27
28/// Generates the GLSL fragment shader program source code for the given FS program
29std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir);
30
31/// Generates the GLSL compute shader program source code for the given CS program
32std::string GenerateComputeShader(const Device& device, const ShaderIR& ir);
33
34} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 75d3fac04..9c7b0adbd 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -2,45 +2,52 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <glad/glad.h>
6
5#include "common/common_types.h" 7#include "common/common_types.h"
6#include "video_core/engines/maxwell_3d.h" 8#include "video_core/engines/maxwell_3d.h"
7#include "video_core/renderer_opengl/gl_shader_manager.h" 9#include "video_core/renderer_opengl/gl_shader_manager.h"
8 10
9namespace OpenGL::GLShader { 11namespace OpenGL::GLShader {
10 12
11using Tegra::Engines::Maxwell3D; 13ProgramManager::ProgramManager() = default;
12
13ProgramManager::ProgramManager() {
14 pipeline.Create();
15}
16 14
17ProgramManager::~ProgramManager() = default; 15ProgramManager::~ProgramManager() = default;
18 16
19void ProgramManager::ApplyTo(OpenGLState& state) { 17void ProgramManager::Create() {
20 UpdatePipeline(); 18 graphics_pipeline.Create();
21 state.draw.shader_program = 0; 19 glBindProgramPipeline(graphics_pipeline.handle);
22 state.draw.program_pipeline = pipeline.handle;
23} 20}
24 21
25void ProgramManager::UpdatePipeline() { 22void ProgramManager::BindGraphicsPipeline() {
23 if (!is_graphics_bound) {
24 is_graphics_bound = true;
25 glUseProgram(0);
26 }
27
26 // Avoid updating the pipeline when values have no changed 28 // Avoid updating the pipeline when values have no changed
27 if (old_state == current_state) { 29 if (old_state == current_state) {
28 return; 30 return;
29 } 31 }
30 32
31 // Workaround for AMD bug 33 // Workaround for AMD bug
32 constexpr GLenum all_used_stages{GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | 34 static constexpr GLenum all_used_stages{GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT |
33 GL_FRAGMENT_SHADER_BIT}; 35 GL_FRAGMENT_SHADER_BIT};
34 glUseProgramStages(pipeline.handle, all_used_stages, 0); 36 const GLuint handle = graphics_pipeline.handle;
35 37 glUseProgramStages(handle, all_used_stages, 0);
36 glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current_state.vertex_shader); 38 glUseProgramStages(handle, GL_VERTEX_SHADER_BIT, current_state.vertex_shader);
37 glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, current_state.geometry_shader); 39 glUseProgramStages(handle, GL_GEOMETRY_SHADER_BIT, current_state.geometry_shader);
38 glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current_state.fragment_shader); 40 glUseProgramStages(handle, GL_FRAGMENT_SHADER_BIT, current_state.fragment_shader);
39 41
40 old_state = current_state; 42 old_state = current_state;
41} 43}
42 44
43void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell) { 45void ProgramManager::BindComputeShader(GLuint program) {
46 is_graphics_bound = false;
47 glUseProgram(program);
48}
49
50void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) {
44 const auto& regs = maxwell.regs; 51 const auto& regs = maxwell.regs;
45 52
46 // Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value. 53 // Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value.
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 478c165ce..d2e47f2a9 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -9,7 +9,6 @@
9#include <glad/glad.h> 9#include <glad/glad.h>
10 10
11#include "video_core/renderer_opengl/gl_resource_manager.h" 11#include "video_core/renderer_opengl/gl_resource_manager.h"
12#include "video_core/renderer_opengl/gl_state.h"
13#include "video_core/renderer_opengl/maxwell_to_gl.h" 12#include "video_core/renderer_opengl/maxwell_to_gl.h"
14 13
15namespace OpenGL::GLShader { 14namespace OpenGL::GLShader {
@@ -32,49 +31,47 @@ public:
32 explicit ProgramManager(); 31 explicit ProgramManager();
33 ~ProgramManager(); 32 ~ProgramManager();
34 33
35 void ApplyTo(OpenGLState& state); 34 void Create();
36 35
37 void UseProgrammableVertexShader(GLuint program) { 36 /// Updates the graphics pipeline and binds it.
37 void BindGraphicsPipeline();
38
39 /// Binds a compute shader.
40 void BindComputeShader(GLuint program);
41
42 void UseVertexShader(GLuint program) {
38 current_state.vertex_shader = program; 43 current_state.vertex_shader = program;
39 } 44 }
40 45
41 void UseProgrammableGeometryShader(GLuint program) { 46 void UseGeometryShader(GLuint program) {
42 current_state.geometry_shader = program; 47 current_state.geometry_shader = program;
43 } 48 }
44 49
45 void UseProgrammableFragmentShader(GLuint program) { 50 void UseFragmentShader(GLuint program) {
46 current_state.fragment_shader = program; 51 current_state.fragment_shader = program;
47 } 52 }
48 53
49 void UseTrivialGeometryShader() {
50 current_state.geometry_shader = 0;
51 }
52
53 void UseTrivialFragmentShader() {
54 current_state.fragment_shader = 0;
55 }
56
57private: 54private:
58 struct PipelineState { 55 struct PipelineState {
59 bool operator==(const PipelineState& rhs) const { 56 bool operator==(const PipelineState& rhs) const noexcept {
60 return vertex_shader == rhs.vertex_shader && fragment_shader == rhs.fragment_shader && 57 return vertex_shader == rhs.vertex_shader && fragment_shader == rhs.fragment_shader &&
61 geometry_shader == rhs.geometry_shader; 58 geometry_shader == rhs.geometry_shader;
62 } 59 }
63 60
64 bool operator!=(const PipelineState& rhs) const { 61 bool operator!=(const PipelineState& rhs) const noexcept {
65 return !operator==(rhs); 62 return !operator==(rhs);
66 } 63 }
67 64
68 GLuint vertex_shader{}; 65 GLuint vertex_shader = 0;
69 GLuint fragment_shader{}; 66 GLuint fragment_shader = 0;
70 GLuint geometry_shader{}; 67 GLuint geometry_shader = 0;
71 }; 68 };
72 69
73 void UpdatePipeline(); 70 OGLPipeline graphics_pipeline;
74 71 OGLPipeline compute_pipeline;
75 OGLPipeline pipeline;
76 PipelineState current_state; 72 PipelineState current_state;
77 PipelineState old_state; 73 PipelineState old_state;
74 bool is_graphics_bound = true;
78}; 75};
79 76
80} // namespace OpenGL::GLShader 77} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
deleted file mode 100644
index ab1f7983c..000000000
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ /dev/null
@@ -1,554 +0,0 @@
1// Copyright 2015 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <iterator>
7#include <glad/glad.h>
8#include "common/assert.h"
9#include "common/logging/log.h"
10#include "common/microprofile.h"
11#include "video_core/renderer_opengl/gl_state.h"
12
13MICROPROFILE_DEFINE(OpenGL_State, "OpenGL", "State Change", MP_RGB(192, 128, 128));
14
15namespace OpenGL {
16
17using Maxwell = Tegra::Engines::Maxwell3D::Regs;
18
19OpenGLState OpenGLState::cur_state;
20
21namespace {
22
23template <typename T>
24bool UpdateValue(T& current_value, const T new_value) {
25 const bool changed = current_value != new_value;
26 current_value = new_value;
27 return changed;
28}
29
30template <typename T1, typename T2>
31bool UpdateTie(T1 current_value, const T2 new_value) {
32 const bool changed = current_value != new_value;
33 current_value = new_value;
34 return changed;
35}
36
37template <typename T>
38std::optional<std::pair<GLuint, GLsizei>> UpdateArray(T& current_values, const T& new_values) {
39 std::optional<std::size_t> first;
40 std::size_t last;
41 for (std::size_t i = 0; i < std::size(current_values); ++i) {
42 if (!UpdateValue(current_values[i], new_values[i])) {
43 continue;
44 }
45 if (!first) {
46 first = i;
47 }
48 last = i;
49 }
50 if (!first) {
51 return std::nullopt;
52 }
53 return std::make_pair(static_cast<GLuint>(*first), static_cast<GLsizei>(last - *first + 1));
54}
55
56void Enable(GLenum cap, bool enable) {
57 if (enable) {
58 glEnable(cap);
59 } else {
60 glDisable(cap);
61 }
62}
63
64void Enable(GLenum cap, GLuint index, bool enable) {
65 if (enable) {
66 glEnablei(cap, index);
67 } else {
68 glDisablei(cap, index);
69 }
70}
71
72void Enable(GLenum cap, bool& current_value, bool new_value) {
73 if (UpdateValue(current_value, new_value)) {
74 Enable(cap, new_value);
75 }
76}
77
78void Enable(GLenum cap, GLuint index, bool& current_value, bool new_value) {
79 if (UpdateValue(current_value, new_value)) {
80 Enable(cap, index, new_value);
81 }
82}
83
84} // Anonymous namespace
85
86OpenGLState::OpenGLState() = default;
87
88void OpenGLState::SetDefaultViewports() {
89 viewports.fill(Viewport{});
90
91 depth_clamp.far_plane = false;
92 depth_clamp.near_plane = false;
93}
94
95void OpenGLState::ApplyFramebufferState() {
96 if (UpdateValue(cur_state.draw.read_framebuffer, draw.read_framebuffer)) {
97 glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
98 }
99 if (UpdateValue(cur_state.draw.draw_framebuffer, draw.draw_framebuffer)) {
100 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer);
101 }
102}
103
104void OpenGLState::ApplyVertexArrayState() {
105 if (UpdateValue(cur_state.draw.vertex_array, draw.vertex_array)) {
106 glBindVertexArray(draw.vertex_array);
107 }
108}
109
110void OpenGLState::ApplyShaderProgram() {
111 if (UpdateValue(cur_state.draw.shader_program, draw.shader_program)) {
112 glUseProgram(draw.shader_program);
113 }
114}
115
116void OpenGLState::ApplyProgramPipeline() {
117 if (UpdateValue(cur_state.draw.program_pipeline, draw.program_pipeline)) {
118 glBindProgramPipeline(draw.program_pipeline);
119 }
120}
121
122void OpenGLState::ApplyClipDistances() {
123 for (std::size_t i = 0; i < clip_distance.size(); ++i) {
124 Enable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i), cur_state.clip_distance[i],
125 clip_distance[i]);
126 }
127}
128
129void OpenGLState::ApplyPointSize() {
130 Enable(GL_PROGRAM_POINT_SIZE, cur_state.point.program_control, point.program_control);
131 Enable(GL_POINT_SPRITE, cur_state.point.sprite, point.sprite);
132 if (UpdateValue(cur_state.point.size, point.size)) {
133 glPointSize(point.size);
134 }
135}
136
137void OpenGLState::ApplyFragmentColorClamp() {
138 if (UpdateValue(cur_state.fragment_color_clamp.enabled, fragment_color_clamp.enabled)) {
139 glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB,
140 fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE);
141 }
142}
143
144void OpenGLState::ApplyMultisample() {
145 Enable(GL_SAMPLE_ALPHA_TO_COVERAGE, cur_state.multisample_control.alpha_to_coverage,
146 multisample_control.alpha_to_coverage);
147 Enable(GL_SAMPLE_ALPHA_TO_ONE, cur_state.multisample_control.alpha_to_one,
148 multisample_control.alpha_to_one);
149}
150
151void OpenGLState::ApplyDepthClamp() {
152 if (depth_clamp.far_plane == cur_state.depth_clamp.far_plane &&
153 depth_clamp.near_plane == cur_state.depth_clamp.near_plane) {
154 return;
155 }
156 cur_state.depth_clamp = depth_clamp;
157
158 UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane,
159 "Unimplemented Depth Clamp Separation!");
160
161 Enable(GL_DEPTH_CLAMP, depth_clamp.far_plane || depth_clamp.near_plane);
162}
163
164void OpenGLState::ApplySRgb() {
165 if (cur_state.framebuffer_srgb.enabled == framebuffer_srgb.enabled)
166 return;
167 cur_state.framebuffer_srgb.enabled = framebuffer_srgb.enabled;
168 if (framebuffer_srgb.enabled) {
169 glEnable(GL_FRAMEBUFFER_SRGB);
170 } else {
171 glDisable(GL_FRAMEBUFFER_SRGB);
172 }
173}
174
175void OpenGLState::ApplyCulling() {
176 Enable(GL_CULL_FACE, cur_state.cull.enabled, cull.enabled);
177
178 if (UpdateValue(cur_state.cull.mode, cull.mode)) {
179 glCullFace(cull.mode);
180 }
181
182 if (UpdateValue(cur_state.cull.front_face, cull.front_face)) {
183 glFrontFace(cull.front_face);
184 }
185}
186
187void OpenGLState::ApplyRasterizerDiscard() {
188 Enable(GL_RASTERIZER_DISCARD, cur_state.rasterizer_discard, rasterizer_discard);
189}
190
191void OpenGLState::ApplyColorMask() {
192 if (!dirty.color_mask) {
193 return;
194 }
195 dirty.color_mask = false;
196
197 for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) {
198 const auto& updated = color_mask[i];
199 auto& current = cur_state.color_mask[i];
200 if (updated.red_enabled != current.red_enabled ||
201 updated.green_enabled != current.green_enabled ||
202 updated.blue_enabled != current.blue_enabled ||
203 updated.alpha_enabled != current.alpha_enabled) {
204 current = updated;
205 glColorMaski(static_cast<GLuint>(i), updated.red_enabled, updated.green_enabled,
206 updated.blue_enabled, updated.alpha_enabled);
207 }
208 }
209}
210
211void OpenGLState::ApplyDepth() {
212 Enable(GL_DEPTH_TEST, cur_state.depth.test_enabled, depth.test_enabled);
213
214 if (cur_state.depth.test_func != depth.test_func) {
215 cur_state.depth.test_func = depth.test_func;
216 glDepthFunc(depth.test_func);
217 }
218
219 if (cur_state.depth.write_mask != depth.write_mask) {
220 cur_state.depth.write_mask = depth.write_mask;
221 glDepthMask(depth.write_mask);
222 }
223}
224
225void OpenGLState::ApplyPrimitiveRestart() {
226 Enable(GL_PRIMITIVE_RESTART, cur_state.primitive_restart.enabled, primitive_restart.enabled);
227
228 if (cur_state.primitive_restart.index != primitive_restart.index) {
229 cur_state.primitive_restart.index = primitive_restart.index;
230 glPrimitiveRestartIndex(primitive_restart.index);
231 }
232}
233
234void OpenGLState::ApplyStencilTest() {
235 if (!dirty.stencil_state) {
236 return;
237 }
238 dirty.stencil_state = false;
239
240 Enable(GL_STENCIL_TEST, cur_state.stencil.test_enabled, stencil.test_enabled);
241
242 const auto ConfigStencil = [](GLenum face, const auto& config, auto& current) {
243 if (current.test_func != config.test_func || current.test_ref != config.test_ref ||
244 current.test_mask != config.test_mask) {
245 current.test_func = config.test_func;
246 current.test_ref = config.test_ref;
247 current.test_mask = config.test_mask;
248 glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask);
249 }
250 if (current.action_depth_fail != config.action_depth_fail ||
251 current.action_depth_pass != config.action_depth_pass ||
252 current.action_stencil_fail != config.action_stencil_fail) {
253 current.action_depth_fail = config.action_depth_fail;
254 current.action_depth_pass = config.action_depth_pass;
255 current.action_stencil_fail = config.action_stencil_fail;
256 glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail,
257 config.action_depth_pass);
258 }
259 if (current.write_mask != config.write_mask) {
260 current.write_mask = config.write_mask;
261 glStencilMaskSeparate(face, config.write_mask);
262 }
263 };
264 ConfigStencil(GL_FRONT, stencil.front, cur_state.stencil.front);
265 ConfigStencil(GL_BACK, stencil.back, cur_state.stencil.back);
266}
267
268void OpenGLState::ApplyViewport() {
269 for (GLuint i = 0; i < static_cast<GLuint>(Maxwell::NumViewports); ++i) {
270 const auto& updated = viewports[i];
271 auto& current = cur_state.viewports[i];
272
273 if (current.x != updated.x || current.y != updated.y || current.width != updated.width ||
274 current.height != updated.height) {
275 current.x = updated.x;
276 current.y = updated.y;
277 current.width = updated.width;
278 current.height = updated.height;
279 glViewportIndexedf(i, static_cast<GLfloat>(updated.x), static_cast<GLfloat>(updated.y),
280 static_cast<GLfloat>(updated.width),
281 static_cast<GLfloat>(updated.height));
282 }
283 if (current.depth_range_near != updated.depth_range_near ||
284 current.depth_range_far != updated.depth_range_far) {
285 current.depth_range_near = updated.depth_range_near;
286 current.depth_range_far = updated.depth_range_far;
287 glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far);
288 }
289
290 Enable(GL_SCISSOR_TEST, i, current.scissor.enabled, updated.scissor.enabled);
291
292 if (current.scissor.x != updated.scissor.x || current.scissor.y != updated.scissor.y ||
293 current.scissor.width != updated.scissor.width ||
294 current.scissor.height != updated.scissor.height) {
295 current.scissor.x = updated.scissor.x;
296 current.scissor.y = updated.scissor.y;
297 current.scissor.width = updated.scissor.width;
298 current.scissor.height = updated.scissor.height;
299 glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width,
300 updated.scissor.height);
301 }
302 }
303}
304
305void OpenGLState::ApplyGlobalBlending() {
306 const Blend& updated = blend[0];
307 Blend& current = cur_state.blend[0];
308
309 Enable(GL_BLEND, current.enabled, updated.enabled);
310
311 if (current.src_rgb_func != updated.src_rgb_func ||
312 current.dst_rgb_func != updated.dst_rgb_func || current.src_a_func != updated.src_a_func ||
313 current.dst_a_func != updated.dst_a_func) {
314 current.src_rgb_func = updated.src_rgb_func;
315 current.dst_rgb_func = updated.dst_rgb_func;
316 current.src_a_func = updated.src_a_func;
317 current.dst_a_func = updated.dst_a_func;
318 glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func,
319 updated.dst_a_func);
320 }
321
322 if (current.rgb_equation != updated.rgb_equation || current.a_equation != updated.a_equation) {
323 current.rgb_equation = updated.rgb_equation;
324 current.a_equation = updated.a_equation;
325 glBlendEquationSeparate(updated.rgb_equation, updated.a_equation);
326 }
327}
328
329void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) {
330 const Blend& updated = blend[target];
331 Blend& current = cur_state.blend[target];
332
333 if (current.enabled != updated.enabled || force) {
334 current.enabled = updated.enabled;
335 Enable(GL_BLEND, static_cast<GLuint>(target), updated.enabled);
336 }
337
338 if (UpdateTie(std::tie(current.src_rgb_func, current.dst_rgb_func, current.src_a_func,
339 current.dst_a_func),
340 std::tie(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func,
341 updated.dst_a_func))) {
342 glBlendFuncSeparatei(static_cast<GLuint>(target), updated.src_rgb_func,
343 updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func);
344 }
345
346 if (UpdateTie(std::tie(current.rgb_equation, current.a_equation),
347 std::tie(updated.rgb_equation, updated.a_equation))) {
348 glBlendEquationSeparatei(static_cast<GLuint>(target), updated.rgb_equation,
349 updated.a_equation);
350 }
351}
352
353void OpenGLState::ApplyBlending() {
354 if (!dirty.blend_state) {
355 return;
356 }
357 dirty.blend_state = false;
358
359 if (independant_blend.enabled) {
360 const bool force = independant_blend.enabled != cur_state.independant_blend.enabled;
361 for (std::size_t target = 0; target < Maxwell::NumRenderTargets; ++target) {
362 ApplyTargetBlending(target, force);
363 }
364 } else {
365 ApplyGlobalBlending();
366 }
367 cur_state.independant_blend.enabled = independant_blend.enabled;
368
369 if (UpdateTie(
370 std::tie(cur_state.blend_color.red, cur_state.blend_color.green,
371 cur_state.blend_color.blue, cur_state.blend_color.alpha),
372 std::tie(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha))) {
373 glBlendColor(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha);
374 }
375}
376
377void OpenGLState::ApplyLogicOp() {
378 Enable(GL_COLOR_LOGIC_OP, cur_state.logic_op.enabled, logic_op.enabled);
379
380 if (UpdateValue(cur_state.logic_op.operation, logic_op.operation)) {
381 glLogicOp(logic_op.operation);
382 }
383}
384
385void OpenGLState::ApplyPolygonOffset() {
386 if (!dirty.polygon_offset) {
387 return;
388 }
389 dirty.polygon_offset = false;
390
391 Enable(GL_POLYGON_OFFSET_FILL, cur_state.polygon_offset.fill_enable,
392 polygon_offset.fill_enable);
393 Enable(GL_POLYGON_OFFSET_LINE, cur_state.polygon_offset.line_enable,
394 polygon_offset.line_enable);
395 Enable(GL_POLYGON_OFFSET_POINT, cur_state.polygon_offset.point_enable,
396 polygon_offset.point_enable);
397
398 if (UpdateTie(std::tie(cur_state.polygon_offset.factor, cur_state.polygon_offset.units,
399 cur_state.polygon_offset.clamp),
400 std::tie(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp))) {
401 if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) {
402 glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp);
403 } else {
404 UNIMPLEMENTED_IF_MSG(polygon_offset.clamp != 0,
405 "Unimplemented Depth polygon offset clamp.");
406 glPolygonOffset(polygon_offset.factor, polygon_offset.units);
407 }
408 }
409}
410
411void OpenGLState::ApplyAlphaTest() {
412 Enable(GL_ALPHA_TEST, cur_state.alpha_test.enabled, alpha_test.enabled);
413 if (UpdateTie(std::tie(cur_state.alpha_test.func, cur_state.alpha_test.ref),
414 std::tie(alpha_test.func, alpha_test.ref))) {
415 glAlphaFunc(alpha_test.func, alpha_test.ref);
416 }
417}
418
419void OpenGLState::ApplyClipControl() {
420 if (UpdateTie(std::tie(cur_state.clip_control.origin, cur_state.clip_control.depth_mode),
421 std::tie(clip_control.origin, clip_control.depth_mode))) {
422 glClipControl(clip_control.origin, clip_control.depth_mode);
423 }
424}
425
426void OpenGLState::ApplyTextures() {
427 const std::size_t size = std::size(textures);
428 for (std::size_t i = 0; i < size; ++i) {
429 if (UpdateValue(cur_state.textures[i], textures[i])) {
430 // BindTextureUnit doesn't support binding null textures, skip those binds.
431 // TODO(Rodrigo): Stop using null textures
432 if (textures[i] != 0) {
433 glBindTextureUnit(static_cast<GLuint>(i), textures[i]);
434 }
435 }
436 }
437}
438
439void OpenGLState::ApplySamplers() {
440 const std::size_t size = std::size(samplers);
441 for (std::size_t i = 0; i < size; ++i) {
442 if (UpdateValue(cur_state.samplers[i], samplers[i])) {
443 glBindSampler(static_cast<GLuint>(i), samplers[i]);
444 }
445 }
446}
447
448void OpenGLState::ApplyImages() {
449 if (const auto update = UpdateArray(cur_state.images, images)) {
450 glBindImageTextures(update->first, update->second, images.data() + update->first);
451 }
452}
453
454void OpenGLState::Apply() {
455 MICROPROFILE_SCOPE(OpenGL_State);
456 ApplyFramebufferState();
457 ApplyVertexArrayState();
458 ApplyShaderProgram();
459 ApplyProgramPipeline();
460 ApplyClipDistances();
461 ApplyPointSize();
462 ApplyFragmentColorClamp();
463 ApplyMultisample();
464 ApplyRasterizerDiscard();
465 ApplyColorMask();
466 ApplyDepthClamp();
467 ApplyViewport();
468 ApplyStencilTest();
469 ApplySRgb();
470 ApplyCulling();
471 ApplyDepth();
472 ApplyPrimitiveRestart();
473 ApplyBlending();
474 ApplyLogicOp();
475 ApplyTextures();
476 ApplySamplers();
477 ApplyImages();
478 ApplyPolygonOffset();
479 ApplyAlphaTest();
480 ApplyClipControl();
481}
482
483void OpenGLState::EmulateViewportWithScissor() {
484 auto& current = viewports[0];
485 if (current.scissor.enabled) {
486 const GLint left = std::max(current.x, current.scissor.x);
487 const GLint right =
488 std::max(current.x + current.width, current.scissor.x + current.scissor.width);
489 const GLint bottom = std::max(current.y, current.scissor.y);
490 const GLint top =
491 std::max(current.y + current.height, current.scissor.y + current.scissor.height);
492 current.scissor.x = std::max(left, 0);
493 current.scissor.y = std::max(bottom, 0);
494 current.scissor.width = std::max(right - left, 0);
495 current.scissor.height = std::max(top - bottom, 0);
496 } else {
497 current.scissor.enabled = true;
498 current.scissor.x = current.x;
499 current.scissor.y = current.y;
500 current.scissor.width = current.width;
501 current.scissor.height = current.height;
502 }
503}
504
505OpenGLState& OpenGLState::UnbindTexture(GLuint handle) {
506 for (auto& texture : textures) {
507 if (texture == handle) {
508 texture = 0;
509 }
510 }
511 return *this;
512}
513
514OpenGLState& OpenGLState::ResetSampler(GLuint handle) {
515 for (auto& sampler : samplers) {
516 if (sampler == handle) {
517 sampler = 0;
518 }
519 }
520 return *this;
521}
522
523OpenGLState& OpenGLState::ResetProgram(GLuint handle) {
524 if (draw.shader_program == handle) {
525 draw.shader_program = 0;
526 }
527 return *this;
528}
529
530OpenGLState& OpenGLState::ResetPipeline(GLuint handle) {
531 if (draw.program_pipeline == handle) {
532 draw.program_pipeline = 0;
533 }
534 return *this;
535}
536
537OpenGLState& OpenGLState::ResetVertexArray(GLuint handle) {
538 if (draw.vertex_array == handle) {
539 draw.vertex_array = 0;
540 }
541 return *this;
542}
543
544OpenGLState& OpenGLState::ResetFramebuffer(GLuint handle) {
545 if (draw.read_framebuffer == handle) {
546 draw.read_framebuffer = 0;
547 }
548 if (draw.draw_framebuffer == handle) {
549 draw.draw_framebuffer = 0;
550 }
551 return *this;
552}
553
554} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
deleted file mode 100644
index 4953eeda2..000000000
--- a/src/video_core/renderer_opengl/gl_state.h
+++ /dev/null
@@ -1,247 +0,0 @@
1// Copyright 2015 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <type_traits>
9#include <glad/glad.h>
10#include "video_core/engines/maxwell_3d.h"
11
12namespace OpenGL {
13
14class OpenGLState {
15public:
16 struct {
17 bool enabled = false; // GL_FRAMEBUFFER_SRGB
18 } framebuffer_srgb;
19
20 struct {
21 bool alpha_to_coverage = false; // GL_ALPHA_TO_COVERAGE
22 bool alpha_to_one = false; // GL_ALPHA_TO_ONE
23 } multisample_control;
24
25 struct {
26 bool enabled = false; // GL_CLAMP_FRAGMENT_COLOR_ARB
27 } fragment_color_clamp;
28
29 struct {
30 bool far_plane = false;
31 bool near_plane = false;
32 } depth_clamp; // GL_DEPTH_CLAMP
33
34 struct {
35 bool enabled = false; // GL_CULL_FACE
36 GLenum mode = GL_BACK; // GL_CULL_FACE_MODE
37 GLenum front_face = GL_CCW; // GL_FRONT_FACE
38 } cull;
39
40 struct {
41 bool test_enabled = false; // GL_DEPTH_TEST
42 GLboolean write_mask = GL_TRUE; // GL_DEPTH_WRITEMASK
43 GLenum test_func = GL_LESS; // GL_DEPTH_FUNC
44 } depth;
45
46 struct {
47 bool enabled = false;
48 GLuint index = 0;
49 } primitive_restart; // GL_PRIMITIVE_RESTART
50
51 bool rasterizer_discard = false; // GL_RASTERIZER_DISCARD
52
53 struct ColorMask {
54 GLboolean red_enabled = GL_TRUE;
55 GLboolean green_enabled = GL_TRUE;
56 GLboolean blue_enabled = GL_TRUE;
57 GLboolean alpha_enabled = GL_TRUE;
58 };
59 std::array<ColorMask, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets>
60 color_mask; // GL_COLOR_WRITEMASK
61
62 struct {
63 bool test_enabled = false; // GL_STENCIL_TEST
64 struct {
65 GLenum test_func = GL_ALWAYS; // GL_STENCIL_FUNC
66 GLint test_ref = 0; // GL_STENCIL_REF
67 GLuint test_mask = 0xFFFFFFFF; // GL_STENCIL_VALUE_MASK
68 GLuint write_mask = 0xFFFFFFFF; // GL_STENCIL_WRITEMASK
69 GLenum action_stencil_fail = GL_KEEP; // GL_STENCIL_FAIL
70 GLenum action_depth_fail = GL_KEEP; // GL_STENCIL_PASS_DEPTH_FAIL
71 GLenum action_depth_pass = GL_KEEP; // GL_STENCIL_PASS_DEPTH_PASS
72 } front, back;
73 } stencil;
74
75 struct Blend {
76 bool enabled = false; // GL_BLEND
77 GLenum rgb_equation = GL_FUNC_ADD; // GL_BLEND_EQUATION_RGB
78 GLenum a_equation = GL_FUNC_ADD; // GL_BLEND_EQUATION_ALPHA
79 GLenum src_rgb_func = GL_ONE; // GL_BLEND_SRC_RGB
80 GLenum dst_rgb_func = GL_ZERO; // GL_BLEND_DST_RGB
81 GLenum src_a_func = GL_ONE; // GL_BLEND_SRC_ALPHA
82 GLenum dst_a_func = GL_ZERO; // GL_BLEND_DST_ALPHA
83 };
84 std::array<Blend, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> blend;
85
86 struct {
87 bool enabled = false;
88 } independant_blend;
89
90 struct {
91 GLclampf red = 0.0f;
92 GLclampf green = 0.0f;
93 GLclampf blue = 0.0f;
94 GLclampf alpha = 0.0f;
95 } blend_color; // GL_BLEND_COLOR
96
97 struct {
98 bool enabled = false; // GL_LOGIC_OP_MODE
99 GLenum operation = GL_COPY;
100 } logic_op;
101
102 static constexpr std::size_t NumSamplers = 32 * 5;
103 static constexpr std::size_t NumImages = 8 * 5;
104 std::array<GLuint, NumSamplers> textures = {};
105 std::array<GLuint, NumSamplers> samplers = {};
106 std::array<GLuint, NumImages> images = {};
107
108 struct {
109 GLuint read_framebuffer = 0; // GL_READ_FRAMEBUFFER_BINDING
110 GLuint draw_framebuffer = 0; // GL_DRAW_FRAMEBUFFER_BINDING
111 GLuint vertex_array = 0; // GL_VERTEX_ARRAY_BINDING
112 GLuint shader_program = 0; // GL_CURRENT_PROGRAM
113 GLuint program_pipeline = 0; // GL_PROGRAM_PIPELINE_BINDING
114 } draw;
115
116 struct Viewport {
117 GLint x = 0;
118 GLint y = 0;
119 GLint width = 0;
120 GLint height = 0;
121 GLfloat depth_range_near = 0.0f; // GL_DEPTH_RANGE
122 GLfloat depth_range_far = 1.0f; // GL_DEPTH_RANGE
123 struct {
124 bool enabled = false; // GL_SCISSOR_TEST
125 GLint x = 0;
126 GLint y = 0;
127 GLsizei width = 0;
128 GLsizei height = 0;
129 } scissor;
130 };
131 std::array<Viewport, Tegra::Engines::Maxwell3D::Regs::NumViewports> viewports;
132
133 struct {
134 bool program_control = false; // GL_PROGRAM_POINT_SIZE
135 bool sprite = false; // GL_POINT_SPRITE
136 GLfloat size = 1.0f; // GL_POINT_SIZE
137 } point;
138
139 struct {
140 bool point_enable = false;
141 bool line_enable = false;
142 bool fill_enable = false;
143 GLfloat units = 0.0f;
144 GLfloat factor = 0.0f;
145 GLfloat clamp = 0.0f;
146 } polygon_offset;
147
148 struct {
149 bool enabled = false; // GL_ALPHA_TEST
150 GLenum func = GL_ALWAYS; // GL_ALPHA_TEST_FUNC
151 GLfloat ref = 0.0f; // GL_ALPHA_TEST_REF
152 } alpha_test;
153
154 std::array<bool, 8> clip_distance = {}; // GL_CLIP_DISTANCE
155
156 struct {
157 GLenum origin = GL_LOWER_LEFT;
158 GLenum depth_mode = GL_NEGATIVE_ONE_TO_ONE;
159 } clip_control;
160
161 OpenGLState();
162
163 /// Get the currently active OpenGL state
164 static OpenGLState GetCurState() {
165 return cur_state;
166 }
167
168 void SetDefaultViewports();
169 /// Apply this state as the current OpenGL state
170 void Apply();
171
172 void ApplyFramebufferState();
173 void ApplyVertexArrayState();
174 void ApplyShaderProgram();
175 void ApplyProgramPipeline();
176 void ApplyClipDistances();
177 void ApplyPointSize();
178 void ApplyFragmentColorClamp();
179 void ApplyMultisample();
180 void ApplySRgb();
181 void ApplyCulling();
182 void ApplyRasterizerDiscard();
183 void ApplyColorMask();
184 void ApplyDepth();
185 void ApplyPrimitiveRestart();
186 void ApplyStencilTest();
187 void ApplyViewport();
188 void ApplyTargetBlending(std::size_t target, bool force);
189 void ApplyGlobalBlending();
190 void ApplyBlending();
191 void ApplyLogicOp();
192 void ApplyTextures();
193 void ApplySamplers();
194 void ApplyImages();
195 void ApplyDepthClamp();
196 void ApplyPolygonOffset();
197 void ApplyAlphaTest();
198 void ApplyClipControl();
199
200 /// Resets any references to the given resource
201 OpenGLState& UnbindTexture(GLuint handle);
202 OpenGLState& ResetSampler(GLuint handle);
203 OpenGLState& ResetProgram(GLuint handle);
204 OpenGLState& ResetPipeline(GLuint handle);
205 OpenGLState& ResetVertexArray(GLuint handle);
206 OpenGLState& ResetFramebuffer(GLuint handle);
207
208 /// Viewport does not affects glClearBuffer so emulate viewport using scissor test
209 void EmulateViewportWithScissor();
210
211 void MarkDirtyBlendState() {
212 dirty.blend_state = true;
213 }
214
215 void MarkDirtyStencilState() {
216 dirty.stencil_state = true;
217 }
218
219 void MarkDirtyPolygonOffset() {
220 dirty.polygon_offset = true;
221 }
222
223 void MarkDirtyColorMask() {
224 dirty.color_mask = true;
225 }
226
227 void AllDirty() {
228 dirty.blend_state = true;
229 dirty.stencil_state = true;
230 dirty.polygon_offset = true;
231 dirty.color_mask = true;
232 }
233
234private:
235 static OpenGLState cur_state;
236
237 struct {
238 bool blend_state;
239 bool stencil_state;
240 bool viewport_state;
241 bool polygon_offset;
242 bool color_mask;
243 } dirty{};
244};
245static_assert(std::is_trivially_copyable_v<OpenGLState>);
246
247} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp
new file mode 100644
index 000000000..255ac3147
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp
@@ -0,0 +1,247 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <cstddef>
8
9#include "common/common_types.h"
10#include "core/core.h"
11#include "video_core/engines/maxwell_3d.h"
12#include "video_core/gpu.h"
13#include "video_core/renderer_opengl/gl_state_tracker.h"
14
15#define OFF(field_name) MAXWELL3D_REG_INDEX(field_name)
16#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / sizeof(u32))
17
18namespace OpenGL {
19
20namespace {
21
22using namespace Dirty;
23using namespace VideoCommon::Dirty;
24using Tegra::Engines::Maxwell3D;
25using Regs = Maxwell3D::Regs;
26using Tables = Maxwell3D::DirtyState::Tables;
27using Table = Maxwell3D::DirtyState::Table;
28
29void SetupDirtyColorMasks(Tables& tables) {
30 tables[0][OFF(color_mask_common)] = ColorMaskCommon;
31 for (std::size_t rt = 0; rt < Regs::NumRenderTargets; ++rt) {
32 const std::size_t offset = OFF(color_mask) + rt * NUM(color_mask[0]);
33 FillBlock(tables[0], offset, NUM(color_mask[0]), ColorMask0 + rt);
34 }
35
36 FillBlock(tables[1], OFF(color_mask), NUM(color_mask), ColorMasks);
37}
38
39void SetupDirtyVertexArrays(Tables& tables) {
40 static constexpr std::size_t num_array = 3;
41 static constexpr std::size_t instance_base_offset = 3;
42 for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) {
43 const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]);
44 const std::size_t limit_offset = OFF(vertex_array_limit) + i * NUM(vertex_array_limit[0]);
45
46 FillBlock(tables, array_offset, num_array, VertexBuffer0 + i, VertexBuffers);
47 FillBlock(tables, limit_offset, NUM(vertex_array_limit), VertexBuffer0 + i, VertexBuffers);
48
49 const std::size_t instance_array_offset = array_offset + instance_base_offset;
50 tables[0][instance_array_offset] = static_cast<u8>(VertexInstance0 + i);
51 tables[1][instance_array_offset] = VertexInstances;
52
53 const std::size_t instance_offset = OFF(instanced_arrays) + i;
54 tables[0][instance_offset] = static_cast<u8>(VertexInstance0 + i);
55 tables[1][instance_offset] = VertexInstances;
56 }
57}
58
59void SetupDirtyVertexFormat(Tables& tables) {
60 for (std::size_t i = 0; i < Regs::NumVertexAttributes; ++i) {
61 const std::size_t offset = OFF(vertex_attrib_format) + i * NUM(vertex_attrib_format[0]);
62 FillBlock(tables[0], offset, NUM(vertex_attrib_format[0]), VertexFormat0 + i);
63 }
64
65 FillBlock(tables[1], OFF(vertex_attrib_format), Regs::NumVertexAttributes, VertexFormats);
66}
67
68void SetupDirtyViewports(Tables& tables) {
69 for (std::size_t i = 0; i < Regs::NumViewports; ++i) {
70 const std::size_t transf_offset = OFF(viewport_transform) + i * NUM(viewport_transform[0]);
71 const std::size_t viewport_offset = OFF(viewports) + i * NUM(viewports[0]);
72
73 FillBlock(tables[0], transf_offset, NUM(viewport_transform[0]), Viewport0 + i);
74 FillBlock(tables[0], viewport_offset, NUM(viewports[0]), Viewport0 + i);
75 }
76
77 FillBlock(tables[1], OFF(viewport_transform), NUM(viewport_transform), Viewports);
78 FillBlock(tables[1], OFF(viewports), NUM(viewports), Viewports);
79
80 tables[0][OFF(viewport_transform_enabled)] = ViewportTransform;
81 tables[1][OFF(viewport_transform_enabled)] = Viewports;
82}
83
84void SetupDirtyScissors(Tables& tables) {
85 for (std::size_t i = 0; i < Regs::NumViewports; ++i) {
86 const std::size_t offset = OFF(scissor_test) + i * NUM(scissor_test[0]);
87 FillBlock(tables[0], offset, NUM(scissor_test[0]), Scissor0 + i);
88 }
89 FillBlock(tables[1], OFF(scissor_test), NUM(scissor_test), Scissors);
90}
91
92void SetupDirtyShaders(Tables& tables) {
93 FillBlock(tables[0], OFF(shader_config[0]), NUM(shader_config[0]) * Regs::MaxShaderProgram,
94 Shaders);
95}
96
97void SetupDirtyPolygonModes(Tables& tables) {
98 tables[0][OFF(polygon_mode_front)] = PolygonModeFront;
99 tables[0][OFF(polygon_mode_back)] = PolygonModeBack;
100
101 tables[1][OFF(polygon_mode_front)] = PolygonModes;
102 tables[1][OFF(polygon_mode_back)] = PolygonModes;
103 tables[0][OFF(fill_rectangle)] = PolygonModes;
104}
105
106void SetupDirtyDepthTest(Tables& tables) {
107 auto& table = tables[0];
108 table[OFF(depth_test_enable)] = DepthTest;
109 table[OFF(depth_write_enabled)] = DepthMask;
110 table[OFF(depth_test_func)] = DepthTest;
111}
112
113void SetupDirtyStencilTest(Tables& tables) {
114 static constexpr std::array offsets = {
115 OFF(stencil_enable), OFF(stencil_front_func_func), OFF(stencil_front_func_ref),
116 OFF(stencil_front_func_mask), OFF(stencil_front_op_fail), OFF(stencil_front_op_zfail),
117 OFF(stencil_front_op_zpass), OFF(stencil_front_mask), OFF(stencil_two_side_enable),
118 OFF(stencil_back_func_func), OFF(stencil_back_func_ref), OFF(stencil_back_func_mask),
119 OFF(stencil_back_op_fail), OFF(stencil_back_op_zfail), OFF(stencil_back_op_zpass),
120 OFF(stencil_back_mask)};
121 for (const auto offset : offsets) {
122 tables[0][offset] = StencilTest;
123 }
124}
125
126void SetupDirtyAlphaTest(Tables& tables) {
127 auto& table = tables[0];
128 table[OFF(alpha_test_ref)] = AlphaTest;
129 table[OFF(alpha_test_func)] = AlphaTest;
130 table[OFF(alpha_test_enabled)] = AlphaTest;
131}
132
133void SetupDirtyBlend(Tables& tables) {
134 FillBlock(tables[0], OFF(blend_color), NUM(blend_color), BlendColor);
135
136 tables[0][OFF(independent_blend_enable)] = BlendIndependentEnabled;
137
138 for (std::size_t i = 0; i < Regs::NumRenderTargets; ++i) {
139 const std::size_t offset = OFF(independent_blend) + i * NUM(independent_blend[0]);
140 FillBlock(tables[0], offset, NUM(independent_blend[0]), BlendState0 + i);
141
142 tables[0][OFF(blend.enable) + i] = static_cast<u8>(BlendState0 + i);
143 }
144 FillBlock(tables[1], OFF(independent_blend), NUM(independent_blend), BlendStates);
145 FillBlock(tables[1], OFF(blend), NUM(blend), BlendStates);
146}
147
148void SetupDirtyPrimitiveRestart(Tables& tables) {
149 FillBlock(tables[0], OFF(primitive_restart), NUM(primitive_restart), PrimitiveRestart);
150}
151
152void SetupDirtyPolygonOffset(Tables& tables) {
153 auto& table = tables[0];
154 table[OFF(polygon_offset_fill_enable)] = PolygonOffset;
155 table[OFF(polygon_offset_line_enable)] = PolygonOffset;
156 table[OFF(polygon_offset_point_enable)] = PolygonOffset;
157 table[OFF(polygon_offset_factor)] = PolygonOffset;
158 table[OFF(polygon_offset_units)] = PolygonOffset;
159 table[OFF(polygon_offset_clamp)] = PolygonOffset;
160}
161
162void SetupDirtyMultisampleControl(Tables& tables) {
163 FillBlock(tables[0], OFF(multisample_control), NUM(multisample_control), MultisampleControl);
164}
165
166void SetupDirtyRasterizeEnable(Tables& tables) {
167 tables[0][OFF(rasterize_enable)] = RasterizeEnable;
168}
169
170void SetupDirtyFramebufferSRGB(Tables& tables) {
171 tables[0][OFF(framebuffer_srgb)] = FramebufferSRGB;
172}
173
174void SetupDirtyLogicOp(Tables& tables) {
175 FillBlock(tables[0], OFF(logic_op), NUM(logic_op), LogicOp);
176}
177
178void SetupDirtyFragmentClampColor(Tables& tables) {
179 tables[0][OFF(frag_color_clamp)] = FragmentClampColor;
180}
181
182void SetupDirtyPointSize(Tables& tables) {
183 tables[0][OFF(vp_point_size)] = PointSize;
184 tables[0][OFF(point_size)] = PointSize;
185 tables[0][OFF(point_sprite_enable)] = PointSize;
186}
187
188void SetupDirtyClipControl(Tables& tables) {
189 auto& table = tables[0];
190 table[OFF(screen_y_control)] = ClipControl;
191 table[OFF(depth_mode)] = ClipControl;
192}
193
194void SetupDirtyDepthClampEnabled(Tables& tables) {
195 tables[0][OFF(view_volume_clip_control)] = DepthClampEnabled;
196}
197
198void SetupDirtyMisc(Tables& tables) {
199 auto& table = tables[0];
200
201 table[OFF(clip_distance_enabled)] = ClipDistances;
202
203 table[OFF(front_face)] = FrontFace;
204
205 table[OFF(cull_test_enabled)] = CullTest;
206 table[OFF(cull_face)] = CullTest;
207}
208
209} // Anonymous namespace
210
211StateTracker::StateTracker(Core::System& system) : system{system} {}
212
213void StateTracker::Initialize() {
214 auto& dirty = system.GPU().Maxwell3D().dirty;
215 auto& tables = dirty.tables;
216 SetupDirtyRenderTargets(tables);
217 SetupDirtyColorMasks(tables);
218 SetupDirtyViewports(tables);
219 SetupDirtyScissors(tables);
220 SetupDirtyVertexArrays(tables);
221 SetupDirtyVertexFormat(tables);
222 SetupDirtyShaders(tables);
223 SetupDirtyPolygonModes(tables);
224 SetupDirtyDepthTest(tables);
225 SetupDirtyStencilTest(tables);
226 SetupDirtyAlphaTest(tables);
227 SetupDirtyBlend(tables);
228 SetupDirtyPrimitiveRestart(tables);
229 SetupDirtyPolygonOffset(tables);
230 SetupDirtyMultisampleControl(tables);
231 SetupDirtyRasterizeEnable(tables);
232 SetupDirtyFramebufferSRGB(tables);
233 SetupDirtyLogicOp(tables);
234 SetupDirtyFragmentClampColor(tables);
235 SetupDirtyPointSize(tables);
236 SetupDirtyClipControl(tables);
237 SetupDirtyDepthClampEnabled(tables);
238 SetupDirtyMisc(tables);
239
240 auto& store = dirty.on_write_stores;
241 store[VertexBuffers] = true;
242 for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) {
243 store[VertexBuffer0 + i] = true;
244 }
245}
246
247} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h
new file mode 100644
index 000000000..b882d75c3
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_state_tracker.h
@@ -0,0 +1,215 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <limits>
8
9#include <glad/glad.h>
10
11#include "common/common_types.h"
12#include "core/core.h"
13#include "video_core/dirty_flags.h"
14#include "video_core/engines/maxwell_3d.h"
15
16namespace Core {
17class System;
18}
19
20namespace OpenGL {
21
22namespace Dirty {
23
24enum : u8 {
25 First = VideoCommon::Dirty::LastCommonEntry,
26
27 VertexFormats,
28 VertexFormat0,
29 VertexFormat31 = VertexFormat0 + 31,
30
31 VertexBuffers,
32 VertexBuffer0,
33 VertexBuffer31 = VertexBuffer0 + 31,
34
35 VertexInstances,
36 VertexInstance0,
37 VertexInstance31 = VertexInstance0 + 31,
38
39 ViewportTransform,
40 Viewports,
41 Viewport0,
42 Viewport15 = Viewport0 + 15,
43
44 Scissors,
45 Scissor0,
46 Scissor15 = Scissor0 + 15,
47
48 ColorMaskCommon,
49 ColorMasks,
50 ColorMask0,
51 ColorMask7 = ColorMask0 + 7,
52
53 BlendColor,
54 BlendIndependentEnabled,
55 BlendStates,
56 BlendState0,
57 BlendState7 = BlendState0 + 7,
58
59 Shaders,
60 ClipDistances,
61
62 PolygonModes,
63 PolygonModeFront,
64 PolygonModeBack,
65
66 ColorMask,
67 FrontFace,
68 CullTest,
69 DepthMask,
70 DepthTest,
71 StencilTest,
72 AlphaTest,
73 PrimitiveRestart,
74 PolygonOffset,
75 MultisampleControl,
76 RasterizeEnable,
77 FramebufferSRGB,
78 LogicOp,
79 FragmentClampColor,
80 PointSize,
81 ClipControl,
82 DepthClampEnabled,
83
84 Last
85};
86static_assert(Last <= std::numeric_limits<u8>::max());
87
88} // namespace Dirty
89
90class StateTracker {
91public:
92 explicit StateTracker(Core::System& system);
93
94 void Initialize();
95
96 void BindIndexBuffer(GLuint new_index_buffer) {
97 if (index_buffer == new_index_buffer) {
98 return;
99 }
100 index_buffer = new_index_buffer;
101 glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, new_index_buffer);
102 }
103
104 void NotifyScreenDrawVertexArray() {
105 auto& flags = system.GPU().Maxwell3D().dirty.flags;
106 flags[OpenGL::Dirty::VertexFormats] = true;
107 flags[OpenGL::Dirty::VertexFormat0 + 0] = true;
108 flags[OpenGL::Dirty::VertexFormat0 + 1] = true;
109
110 flags[OpenGL::Dirty::VertexBuffers] = true;
111 flags[OpenGL::Dirty::VertexBuffer0] = true;
112
113 flags[OpenGL::Dirty::VertexInstances] = true;
114 flags[OpenGL::Dirty::VertexInstance0 + 0] = true;
115 flags[OpenGL::Dirty::VertexInstance0 + 1] = true;
116 }
117
118 void NotifyPolygonModes() {
119 auto& flags = system.GPU().Maxwell3D().dirty.flags;
120 flags[OpenGL::Dirty::PolygonModes] = true;
121 flags[OpenGL::Dirty::PolygonModeFront] = true;
122 flags[OpenGL::Dirty::PolygonModeBack] = true;
123 }
124
125 void NotifyViewport0() {
126 auto& flags = system.GPU().Maxwell3D().dirty.flags;
127 flags[OpenGL::Dirty::Viewports] = true;
128 flags[OpenGL::Dirty::Viewport0] = true;
129 }
130
131 void NotifyScissor0() {
132 auto& flags = system.GPU().Maxwell3D().dirty.flags;
133 flags[OpenGL::Dirty::Scissors] = true;
134 flags[OpenGL::Dirty::Scissor0] = true;
135 }
136
137 void NotifyColorMask0() {
138 auto& flags = system.GPU().Maxwell3D().dirty.flags;
139 flags[OpenGL::Dirty::ColorMasks] = true;
140 flags[OpenGL::Dirty::ColorMask0] = true;
141 }
142
143 void NotifyBlend0() {
144 auto& flags = system.GPU().Maxwell3D().dirty.flags;
145 flags[OpenGL::Dirty::BlendStates] = true;
146 flags[OpenGL::Dirty::BlendState0] = true;
147 }
148
149 void NotifyFramebuffer() {
150 auto& flags = system.GPU().Maxwell3D().dirty.flags;
151 flags[VideoCommon::Dirty::RenderTargets] = true;
152 }
153
154 void NotifyFrontFace() {
155 auto& flags = system.GPU().Maxwell3D().dirty.flags;
156 flags[OpenGL::Dirty::FrontFace] = true;
157 }
158
159 void NotifyCullTest() {
160 auto& flags = system.GPU().Maxwell3D().dirty.flags;
161 flags[OpenGL::Dirty::CullTest] = true;
162 }
163
164 void NotifyDepthMask() {
165 auto& flags = system.GPU().Maxwell3D().dirty.flags;
166 flags[OpenGL::Dirty::DepthMask] = true;
167 }
168
169 void NotifyDepthTest() {
170 auto& flags = system.GPU().Maxwell3D().dirty.flags;
171 flags[OpenGL::Dirty::DepthTest] = true;
172 }
173
174 void NotifyStencilTest() {
175 auto& flags = system.GPU().Maxwell3D().dirty.flags;
176 flags[OpenGL::Dirty::StencilTest] = true;
177 }
178
179 void NotifyPolygonOffset() {
180 auto& flags = system.GPU().Maxwell3D().dirty.flags;
181 flags[OpenGL::Dirty::PolygonOffset] = true;
182 }
183
184 void NotifyRasterizeEnable() {
185 auto& flags = system.GPU().Maxwell3D().dirty.flags;
186 flags[OpenGL::Dirty::RasterizeEnable] = true;
187 }
188
189 void NotifyFramebufferSRGB() {
190 auto& flags = system.GPU().Maxwell3D().dirty.flags;
191 flags[OpenGL::Dirty::FramebufferSRGB] = true;
192 }
193
194 void NotifyLogicOp() {
195 auto& flags = system.GPU().Maxwell3D().dirty.flags;
196 flags[OpenGL::Dirty::LogicOp] = true;
197 }
198
199 void NotifyClipControl() {
200 auto& flags = system.GPU().Maxwell3D().dirty.flags;
201 flags[OpenGL::Dirty::ClipControl] = true;
202 }
203
204 void NotifyAlphaTest() {
205 auto& flags = system.GPU().Maxwell3D().dirty.flags;
206 flags[OpenGL::Dirty::AlphaTest] = true;
207 }
208
209private:
210 Core::System& system;
211
212 GLuint index_buffer = 0;
213};
214
215} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
index 35ba334e4..6ec328c53 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -7,7 +7,6 @@
7#include "common/alignment.h" 7#include "common/alignment.h"
8#include "common/assert.h" 8#include "common/assert.h"
9#include "common/microprofile.h" 9#include "common/microprofile.h"
10#include "video_core/renderer_opengl/gl_state.h"
11#include "video_core/renderer_opengl/gl_stream_buffer.h" 10#include "video_core/renderer_opengl/gl_stream_buffer.h"
12 11
13MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", 12MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 5c1ae1418..f424e3000 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -10,7 +10,7 @@
10#include "core/core.h" 10#include "core/core.h"
11#include "video_core/morton.h" 11#include "video_core/morton.h"
12#include "video_core/renderer_opengl/gl_resource_manager.h" 12#include "video_core/renderer_opengl/gl_resource_manager.h"
13#include "video_core/renderer_opengl/gl_state.h" 13#include "video_core/renderer_opengl/gl_state_tracker.h"
14#include "video_core/renderer_opengl/gl_texture_cache.h" 14#include "video_core/renderer_opengl/gl_texture_cache.h"
15#include "video_core/renderer_opengl/utils.h" 15#include "video_core/renderer_opengl/utils.h"
16#include "video_core/texture_cache/surface_base.h" 16#include "video_core/texture_cache/surface_base.h"
@@ -53,6 +53,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format
53 {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, false}, // R8UI 53 {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, false}, // R8UI
54 {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F 54 {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F
55 {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, false}, // RGBA16U 55 {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, false}, // RGBA16U
56 {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT, false}, // RGBA16S
56 {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, false}, // RGBA16UI 57 {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, false}, // RGBA16UI
57 {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F 58 {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F
58 {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, false}, // RGBA32UI 59 {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, false}, // RGBA32UI
@@ -87,6 +88,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format
87 {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false}, // RG32UI 88 {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false}, // RG32UI
88 {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBX16F 89 {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBX16F
89 {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false}, // R32UI 90 {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false}, // R32UI
91 {GL_R32I, GL_RED_INTEGER, GL_INT, false}, // R32I
90 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8 92 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8
91 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5 93 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5
92 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X4 94 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X4
@@ -396,6 +398,7 @@ CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& p
396 const bool is_proxy) 398 const bool is_proxy)
397 : VideoCommon::ViewBase(params), surface{surface}, is_proxy{is_proxy} { 399 : VideoCommon::ViewBase(params), surface{surface}, is_proxy{is_proxy} {
398 target = GetTextureTarget(params.target); 400 target = GetTextureTarget(params.target);
401 format = GetFormatTuple(surface.GetSurfaceParams().pixel_format).internal_format;
399 if (!is_proxy) { 402 if (!is_proxy) {
400 texture_view = CreateTextureView(); 403 texture_view = CreateTextureView();
401 } 404 }
@@ -405,24 +408,36 @@ CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& p
405CachedSurfaceView::~CachedSurfaceView() = default; 408CachedSurfaceView::~CachedSurfaceView() = default;
406 409
407void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const { 410void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const {
408 ASSERT(params.num_layers == 1 && params.num_levels == 1); 411 ASSERT(params.num_levels == 1);
409 412
410 const auto& owner_params = surface.GetSurfaceParams(); 413 const GLuint texture = surface.GetTexture();
414 if (params.num_layers > 1) {
415 // Layered framebuffer attachments
416 UNIMPLEMENTED_IF(params.base_layer != 0);
411 417
412 switch (owner_params.target) { 418 switch (params.target) {
419 case SurfaceTarget::Texture2DArray:
420 glFramebufferTexture(target, attachment, texture, params.base_level);
421 break;
422 default:
423 UNIMPLEMENTED();
424 }
425 return;
426 }
427
428 const GLenum view_target = surface.GetTarget();
429 switch (surface.GetSurfaceParams().target) {
413 case SurfaceTarget::Texture1D: 430 case SurfaceTarget::Texture1D:
414 glFramebufferTexture1D(target, attachment, surface.GetTarget(), surface.GetTexture(), 431 glFramebufferTexture1D(target, attachment, view_target, texture, params.base_level);
415 params.base_level);
416 break; 432 break;
417 case SurfaceTarget::Texture2D: 433 case SurfaceTarget::Texture2D:
418 glFramebufferTexture2D(target, attachment, surface.GetTarget(), surface.GetTexture(), 434 glFramebufferTexture2D(target, attachment, view_target, texture, params.base_level);
419 params.base_level);
420 break; 435 break;
421 case SurfaceTarget::Texture1DArray: 436 case SurfaceTarget::Texture1DArray:
422 case SurfaceTarget::Texture2DArray: 437 case SurfaceTarget::Texture2DArray:
423 case SurfaceTarget::TextureCubemap: 438 case SurfaceTarget::TextureCubemap:
424 case SurfaceTarget::TextureCubeArray: 439 case SurfaceTarget::TextureCubeArray:
425 glFramebufferTextureLayer(target, attachment, surface.GetTexture(), params.base_level, 440 glFramebufferTextureLayer(target, attachment, texture, params.base_level,
426 params.base_layer); 441 params.base_layer);
427 break; 442 break;
428 default: 443 default:
@@ -454,25 +469,20 @@ void CachedSurfaceView::ApplySwizzle(SwizzleSource x_source, SwizzleSource y_sou
454} 469}
455 470
456OGLTextureView CachedSurfaceView::CreateTextureView() const { 471OGLTextureView CachedSurfaceView::CreateTextureView() const {
457 const auto& owner_params = surface.GetSurfaceParams();
458 OGLTextureView texture_view; 472 OGLTextureView texture_view;
459 texture_view.Create(); 473 texture_view.Create();
460 474
461 const GLuint handle{texture_view.handle}; 475 glTextureView(texture_view.handle, target, surface.texture.handle, format, params.base_level,
462 const FormatTuple& tuple{GetFormatTuple(owner_params.pixel_format)};
463
464 glTextureView(handle, target, surface.texture.handle, tuple.internal_format, params.base_level,
465 params.num_levels, params.base_layer, params.num_layers); 476 params.num_levels, params.base_layer, params.num_layers);
466 477 ApplyTextureDefaults(surface.GetSurfaceParams(), texture_view.handle);
467 ApplyTextureDefaults(owner_params, handle);
468 478
469 return texture_view; 479 return texture_view;
470} 480}
471 481
472TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, 482TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system,
473 VideoCore::RasterizerInterface& rasterizer, 483 VideoCore::RasterizerInterface& rasterizer,
474 const Device& device) 484 const Device& device, StateTracker& state_tracker)
475 : TextureCacheBase{system, rasterizer} { 485 : TextureCacheBase{system, rasterizer}, state_tracker{state_tracker} {
476 src_framebuffer.Create(); 486 src_framebuffer.Create();
477 dst_framebuffer.Create(); 487 dst_framebuffer.Create();
478} 488}
@@ -506,25 +516,26 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
506 const Tegra::Engines::Fermi2D::Config& copy_config) { 516 const Tegra::Engines::Fermi2D::Config& copy_config) {
507 const auto& src_params{src_view->GetSurfaceParams()}; 517 const auto& src_params{src_view->GetSurfaceParams()};
508 const auto& dst_params{dst_view->GetSurfaceParams()}; 518 const auto& dst_params{dst_view->GetSurfaceParams()};
519 UNIMPLEMENTED_IF(src_params.target == SurfaceTarget::Texture3D);
520 UNIMPLEMENTED_IF(dst_params.target == SurfaceTarget::Texture3D);
509 521
510 OpenGLState prev_state{OpenGLState::GetCurState()}; 522 state_tracker.NotifyScissor0();
511 SCOPE_EXIT({ 523 state_tracker.NotifyFramebuffer();
512 prev_state.AllDirty(); 524 state_tracker.NotifyRasterizeEnable();
513 prev_state.Apply(); 525 state_tracker.NotifyFramebufferSRGB();
514 });
515
516 OpenGLState state;
517 state.draw.read_framebuffer = src_framebuffer.handle;
518 state.draw.draw_framebuffer = dst_framebuffer.handle;
519 state.framebuffer_srgb.enabled = dst_params.srgb_conversion;
520 state.AllDirty();
521 state.Apply();
522 526
523 u32 buffers{}; 527 if (dst_params.srgb_conversion) {
528 glEnable(GL_FRAMEBUFFER_SRGB);
529 } else {
530 glDisable(GL_FRAMEBUFFER_SRGB);
531 }
532 glDisable(GL_RASTERIZER_DISCARD);
533 glDisablei(GL_SCISSOR_TEST, 0);
524 534
525 UNIMPLEMENTED_IF(src_params.target == SurfaceTarget::Texture3D); 535 glBindFramebuffer(GL_READ_FRAMEBUFFER, src_framebuffer.handle);
526 UNIMPLEMENTED_IF(dst_params.target == SurfaceTarget::Texture3D); 536 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_framebuffer.handle);
527 537
538 GLenum buffers = 0;
528 if (src_params.type == SurfaceType::ColorTexture) { 539 if (src_params.type == SurfaceType::ColorTexture) {
529 src_view->Attach(GL_COLOR_ATTACHMENT0, GL_READ_FRAMEBUFFER); 540 src_view->Attach(GL_COLOR_ATTACHMENT0, GL_READ_FRAMEBUFFER);
530 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 541 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 8e13ab38b..6658c6ffd 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -27,6 +27,7 @@ using VideoCommon::ViewParams;
27class CachedSurfaceView; 27class CachedSurfaceView;
28class CachedSurface; 28class CachedSurface;
29class TextureCacheOpenGL; 29class TextureCacheOpenGL;
30class StateTracker;
30 31
31using Surface = std::shared_ptr<CachedSurface>; 32using Surface = std::shared_ptr<CachedSurface>;
32using View = std::shared_ptr<CachedSurfaceView>; 33using View = std::shared_ptr<CachedSurfaceView>;
@@ -96,6 +97,10 @@ public:
96 return texture_view.handle; 97 return texture_view.handle;
97 } 98 }
98 99
100 GLenum GetFormat() const {
101 return format;
102 }
103
99 const SurfaceParams& GetSurfaceParams() const { 104 const SurfaceParams& GetSurfaceParams() const {
100 return surface.GetSurfaceParams(); 105 return surface.GetSurfaceParams();
101 } 106 }
@@ -113,6 +118,7 @@ private:
113 118
114 CachedSurface& surface; 119 CachedSurface& surface;
115 GLenum target{}; 120 GLenum target{};
121 GLenum format{};
116 122
117 OGLTextureView texture_view; 123 OGLTextureView texture_view;
118 u32 swizzle{}; 124 u32 swizzle{};
@@ -122,7 +128,7 @@ private:
122class TextureCacheOpenGL final : public TextureCacheBase { 128class TextureCacheOpenGL final : public TextureCacheBase {
123public: 129public:
124 explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer, 130 explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
125 const Device& device); 131 const Device& device, StateTracker& state_tracker);
126 ~TextureCacheOpenGL(); 132 ~TextureCacheOpenGL();
127 133
128protected: 134protected:
@@ -139,6 +145,8 @@ protected:
139private: 145private:
140 GLuint FetchPBO(std::size_t buffer_size); 146 GLuint FetchPBO(std::size_t buffer_size);
141 147
148 StateTracker& state_tracker;
149
142 OGLFramebuffer src_framebuffer; 150 OGLFramebuffer src_framebuffer;
143 OGLFramebuffer dst_framebuffer; 151 OGLFramebuffer dst_framebuffer;
144 std::unordered_map<u32, OGLBuffer> copy_pbo_cache; 152 std::unordered_map<u32, OGLBuffer> copy_pbo_cache;
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index 7ed505628..89f0e04ef 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -92,8 +92,32 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
92 } 92 }
93 case Maxwell::VertexAttribute::Type::UnsignedScaled: 93 case Maxwell::VertexAttribute::Type::UnsignedScaled:
94 switch (attrib.size) { 94 switch (attrib.size) {
95 case Maxwell::VertexAttribute::Size::Size_8:
95 case Maxwell::VertexAttribute::Size::Size_8_8: 96 case Maxwell::VertexAttribute::Size::Size_8_8:
97 case Maxwell::VertexAttribute::Size::Size_8_8_8:
98 case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
96 return GL_UNSIGNED_BYTE; 99 return GL_UNSIGNED_BYTE;
100 case Maxwell::VertexAttribute::Size::Size_16:
101 case Maxwell::VertexAttribute::Size::Size_16_16:
102 case Maxwell::VertexAttribute::Size::Size_16_16_16:
103 case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
104 return GL_UNSIGNED_SHORT;
105 default:
106 LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
107 return {};
108 }
109 case Maxwell::VertexAttribute::Type::SignedScaled:
110 switch (attrib.size) {
111 case Maxwell::VertexAttribute::Size::Size_8:
112 case Maxwell::VertexAttribute::Size::Size_8_8:
113 case Maxwell::VertexAttribute::Size::Size_8_8_8:
114 case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
115 return GL_BYTE;
116 case Maxwell::VertexAttribute::Size::Size_16:
117 case Maxwell::VertexAttribute::Size::Size_16_16:
118 case Maxwell::VertexAttribute::Size::Size_16_16_16:
119 case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
120 return GL_SHORT;
97 default: 121 default:
98 LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); 122 LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
99 return {}; 123 return {};
@@ -401,24 +425,24 @@ inline GLenum StencilOp(Maxwell::StencilOp stencil) {
401 return GL_KEEP; 425 return GL_KEEP;
402} 426}
403 427
404inline GLenum FrontFace(Maxwell::Cull::FrontFace front_face) { 428inline GLenum FrontFace(Maxwell::FrontFace front_face) {
405 switch (front_face) { 429 switch (front_face) {
406 case Maxwell::Cull::FrontFace::ClockWise: 430 case Maxwell::FrontFace::ClockWise:
407 return GL_CW; 431 return GL_CW;
408 case Maxwell::Cull::FrontFace::CounterClockWise: 432 case Maxwell::FrontFace::CounterClockWise:
409 return GL_CCW; 433 return GL_CCW;
410 } 434 }
411 LOG_ERROR(Render_OpenGL, "Unimplemented front face cull={}", static_cast<u32>(front_face)); 435 LOG_ERROR(Render_OpenGL, "Unimplemented front face cull={}", static_cast<u32>(front_face));
412 return GL_CCW; 436 return GL_CCW;
413} 437}
414 438
415inline GLenum CullFace(Maxwell::Cull::CullFace cull_face) { 439inline GLenum CullFace(Maxwell::CullFace cull_face) {
416 switch (cull_face) { 440 switch (cull_face) {
417 case Maxwell::Cull::CullFace::Front: 441 case Maxwell::CullFace::Front:
418 return GL_FRONT; 442 return GL_FRONT;
419 case Maxwell::Cull::CullFace::Back: 443 case Maxwell::CullFace::Back:
420 return GL_BACK; 444 return GL_BACK;
421 case Maxwell::Cull::CullFace::FrontAndBack: 445 case Maxwell::CullFace::FrontAndBack:
422 return GL_FRONT_AND_BACK; 446 return GL_FRONT_AND_BACK;
423 } 447 }
424 LOG_ERROR(Render_OpenGL, "Unimplemented cull face={}", static_cast<u32>(cull_face)); 448 LOG_ERROR(Render_OpenGL, "Unimplemented cull face={}", static_cast<u32>(cull_face));
@@ -464,5 +488,18 @@ inline GLenum LogicOp(Maxwell::LogicOperation operation) {
464 return GL_COPY; 488 return GL_COPY;
465} 489}
466 490
491inline GLenum PolygonMode(Maxwell::PolygonMode polygon_mode) {
492 switch (polygon_mode) {
493 case Maxwell::PolygonMode::Point:
494 return GL_POINT;
495 case Maxwell::PolygonMode::Line:
496 return GL_LINE;
497 case Maxwell::PolygonMode::Fill:
498 return GL_FILL;
499 }
500 UNREACHABLE_MSG("Invalid polygon mode={}", static_cast<int>(polygon_mode));
501 return GL_FILL;
502}
503
467} // namespace MaxwellToGL 504} // namespace MaxwellToGL
468} // namespace OpenGL 505} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index bba16afaf..fca5e3ec0 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -5,30 +5,54 @@
5#include <algorithm> 5#include <algorithm>
6#include <cstddef> 6#include <cstddef>
7#include <cstdlib> 7#include <cstdlib>
8#include <cstring>
8#include <memory> 9#include <memory>
10
9#include <glad/glad.h> 11#include <glad/glad.h>
12
10#include "common/assert.h" 13#include "common/assert.h"
11#include "common/logging/log.h" 14#include "common/logging/log.h"
15#include "common/microprofile.h"
12#include "common/telemetry.h" 16#include "common/telemetry.h"
13#include "core/core.h" 17#include "core/core.h"
14#include "core/core_timing.h" 18#include "core/core_timing.h"
15#include "core/frontend/emu_window.h" 19#include "core/frontend/emu_window.h"
16#include "core/frontend/scope_acquire_window_context.h"
17#include "core/memory.h" 20#include "core/memory.h"
18#include "core/perf_stats.h" 21#include "core/perf_stats.h"
19#include "core/settings.h" 22#include "core/settings.h"
20#include "core/telemetry_session.h" 23#include "core/telemetry_session.h"
21#include "video_core/morton.h" 24#include "video_core/morton.h"
22#include "video_core/renderer_opengl/gl_rasterizer.h" 25#include "video_core/renderer_opengl/gl_rasterizer.h"
26#include "video_core/renderer_opengl/gl_shader_manager.h"
23#include "video_core/renderer_opengl/renderer_opengl.h" 27#include "video_core/renderer_opengl/renderer_opengl.h"
24 28
25namespace OpenGL { 29namespace OpenGL {
26 30
27namespace { 31namespace {
28 32
29constexpr char vertex_shader[] = R"( 33// If the size of this is too small, it ends up creating a soft cap on FPS as the renderer will have
34// to wait on available presentation frames.
35constexpr std::size_t SWAP_CHAIN_SIZE = 3;
36
37struct Frame {
38 u32 width{}; /// Width of the frame (to detect resize)
39 u32 height{}; /// Height of the frame
40 bool color_reloaded{}; /// Texture attachment was recreated (ie: resized)
41 OpenGL::OGLRenderbuffer color{}; /// Buffer shared between the render/present FBO
42 OpenGL::OGLFramebuffer render{}; /// FBO created on the render thread
43 OpenGL::OGLFramebuffer present{}; /// FBO created on the present thread
44 GLsync render_fence{}; /// Fence created on the render thread
45 GLsync present_fence{}; /// Fence created on the presentation thread
46 bool is_srgb{}; /// Framebuffer is sRGB or RGB
47};
48
49constexpr char VERTEX_SHADER[] = R"(
30#version 430 core 50#version 430 core
31 51
52out gl_PerVertex {
53 vec4 gl_Position;
54};
55
32layout (location = 0) in vec2 vert_position; 56layout (location = 0) in vec2 vert_position;
33layout (location = 1) in vec2 vert_tex_coord; 57layout (location = 1) in vec2 vert_tex_coord;
34layout (location = 0) out vec2 frag_tex_coord; 58layout (location = 0) out vec2 frag_tex_coord;
@@ -49,7 +73,7 @@ void main() {
49} 73}
50)"; 74)";
51 75
52constexpr char fragment_shader[] = R"( 76constexpr char FRAGMENT_SHADER[] = R"(
53#version 430 core 77#version 430 core
54 78
55layout (location = 0) in vec2 frag_tex_coord; 79layout (location = 0) in vec2 frag_tex_coord;
@@ -58,7 +82,7 @@ layout (location = 0) out vec4 color;
58layout (binding = 0) uniform sampler2D color_texture; 82layout (binding = 0) uniform sampler2D color_texture;
59 83
60void main() { 84void main() {
61 color = texture(color_texture, frag_tex_coord); 85 color = vec4(texture(color_texture, frag_tex_coord).rgb, 1.0f);
62} 86}
63)"; 87)";
64 88
@@ -67,13 +91,31 @@ constexpr GLint TexCoordLocation = 1;
67constexpr GLint ModelViewMatrixLocation = 0; 91constexpr GLint ModelViewMatrixLocation = 0;
68 92
69struct ScreenRectVertex { 93struct ScreenRectVertex {
70 constexpr ScreenRectVertex(GLfloat x, GLfloat y, GLfloat u, GLfloat v) 94 constexpr ScreenRectVertex(u32 x, u32 y, GLfloat u, GLfloat v)
71 : position{{x, y}}, tex_coord{{u, v}} {} 95 : position{{static_cast<GLfloat>(x), static_cast<GLfloat>(y)}}, tex_coord{{u, v}} {}
72 96
73 std::array<GLfloat, 2> position; 97 std::array<GLfloat, 2> position;
74 std::array<GLfloat, 2> tex_coord; 98 std::array<GLfloat, 2> tex_coord;
75}; 99};
76 100
101/// Returns true if any debug tool is attached
102bool HasDebugTool() {
103 const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
104 if (nsight) {
105 return true;
106 }
107
108 GLint num_extensions;
109 glGetIntegerv(GL_NUM_EXTENSIONS, &num_extensions);
110 for (GLuint index = 0; index < static_cast<GLuint>(num_extensions); ++index) {
111 const auto name = reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, index));
112 if (!std::strcmp(name, "GL_EXT_debug_tool")) {
113 return true;
114 }
115 }
116 return false;
117}
118
77/** 119/**
78 * Defines a 1:1 pixel ortographic projection matrix with (0,0) on the top-left 120 * Defines a 1:1 pixel ortographic projection matrix with (0,0) on the top-left
79 * corner and (width, height) on the lower-bottom. 121 * corner and (width, height) on the lower-bottom.
@@ -157,22 +199,229 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit
157 199
158} // Anonymous namespace 200} // Anonymous namespace
159 201
202/**
203 * For smooth Vsync rendering, we want to always present the latest frame that the core generates,
204 * but also make sure that rendering happens at the pace that the frontend dictates. This is a
205 * helper class that the renderer uses to sync frames between the render thread and the presentation
206 * thread
207 */
208class FrameMailbox {
209public:
210 std::mutex swap_chain_lock;
211 std::condition_variable present_cv;
212 std::array<Frame, SWAP_CHAIN_SIZE> swap_chain{};
213 std::queue<Frame*> free_queue;
214 std::deque<Frame*> present_queue;
215 Frame* previous_frame{};
216
217 FrameMailbox() : has_debug_tool{HasDebugTool()} {
218 for (auto& frame : swap_chain) {
219 free_queue.push(&frame);
220 }
221 }
222
223 ~FrameMailbox() {
224 // lock the mutex and clear out the present and free_queues and notify any people who are
225 // blocked to prevent deadlock on shutdown
226 std::scoped_lock lock{swap_chain_lock};
227 std::queue<Frame*>().swap(free_queue);
228 present_queue.clear();
229 present_cv.notify_all();
230 }
231
232 void ReloadPresentFrame(Frame* frame, u32 height, u32 width) {
233 frame->present.Release();
234 frame->present.Create();
235 GLint previous_draw_fbo{};
236 glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &previous_draw_fbo);
237 glBindFramebuffer(GL_FRAMEBUFFER, frame->present.handle);
238 glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
239 frame->color.handle);
240 if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
241 LOG_CRITICAL(Render_OpenGL, "Failed to recreate present FBO!");
242 }
243 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, previous_draw_fbo);
244 frame->color_reloaded = false;
245 }
246
247 void ReloadRenderFrame(Frame* frame, u32 width, u32 height) {
248 // Recreate the color texture attachment
249 frame->color.Release();
250 frame->color.Create();
251 const GLenum internal_format = frame->is_srgb ? GL_SRGB8 : GL_RGB8;
252 glNamedRenderbufferStorage(frame->color.handle, internal_format, width, height);
253
254 // Recreate the FBO for the render target
255 frame->render.Release();
256 frame->render.Create();
257 glBindFramebuffer(GL_FRAMEBUFFER, frame->render.handle);
258 glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
259 frame->color.handle);
260 if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
261 LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!");
262 }
263
264 frame->width = width;
265 frame->height = height;
266 frame->color_reloaded = true;
267 }
268
269 Frame* GetRenderFrame() {
270 std::unique_lock lock{swap_chain_lock};
271
272 // If theres no free frames, we will reuse the oldest render frame
273 if (free_queue.empty()) {
274 auto frame = present_queue.back();
275 present_queue.pop_back();
276 return frame;
277 }
278
279 Frame* frame = free_queue.front();
280 free_queue.pop();
281 return frame;
282 }
283
284 void ReleaseRenderFrame(Frame* frame) {
285 std::unique_lock lock{swap_chain_lock};
286 present_queue.push_front(frame);
287 present_cv.notify_one();
288
289 DebugNotifyNextFrame();
290 }
291
292 Frame* TryGetPresentFrame(int timeout_ms) {
293 DebugWaitForNextFrame();
294
295 std::unique_lock lock{swap_chain_lock};
296 // wait for new entries in the present_queue
297 present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms),
298 [&] { return !present_queue.empty(); });
299 if (present_queue.empty()) {
300 // timed out waiting for a frame to draw so return the previous frame
301 return previous_frame;
302 }
303
304 // free the previous frame and add it back to the free queue
305 if (previous_frame) {
306 free_queue.push(previous_frame);
307 }
308
309 // the newest entries are pushed to the front of the queue
310 Frame* frame = present_queue.front();
311 present_queue.pop_front();
312 // remove all old entries from the present queue and move them back to the free_queue
313 for (auto f : present_queue) {
314 free_queue.push(f);
315 }
316 present_queue.clear();
317 previous_frame = frame;
318 return frame;
319 }
320
321private:
322 std::mutex debug_synch_mutex;
323 std::condition_variable debug_synch_condition;
324 std::atomic_int frame_for_debug{};
325 const bool has_debug_tool; // When true, using a GPU debugger, so keep frames in lock-step
326
327 /// Signal that a new frame is available (called from GPU thread)
328 void DebugNotifyNextFrame() {
329 if (!has_debug_tool) {
330 return;
331 }
332 frame_for_debug++;
333 std::lock_guard lock{debug_synch_mutex};
334 debug_synch_condition.notify_one();
335 }
336
337 /// Wait for a new frame to be available (called from presentation thread)
338 void DebugWaitForNextFrame() {
339 if (!has_debug_tool) {
340 return;
341 }
342 const int last_frame = frame_for_debug;
343 std::unique_lock lock{debug_synch_mutex};
344 debug_synch_condition.wait(lock,
345 [this, last_frame] { return frame_for_debug > last_frame; });
346 }
347};
348
160RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system) 349RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system)
161 : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system} {} 350 : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system},
351 frame_mailbox{std::make_unique<FrameMailbox>()} {}
162 352
163RendererOpenGL::~RendererOpenGL() = default; 353RendererOpenGL::~RendererOpenGL() = default;
164 354
355MICROPROFILE_DEFINE(OpenGL_RenderFrame, "OpenGL", "Render Frame", MP_RGB(128, 128, 64));
356MICROPROFILE_DEFINE(OpenGL_WaitPresent, "OpenGL", "Wait For Present", MP_RGB(128, 128, 128));
357
165void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { 358void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
166 // Maintain the rasterizer's state as a priority 359 render_window.PollEvents();
167 OpenGLState prev_state = OpenGLState::GetCurState(); 360
168 state.AllDirty(); 361 if (!framebuffer) {
169 state.Apply(); 362 return;
363 }
364
365 PrepareRendertarget(framebuffer);
366 RenderScreenshot();
367
368 Frame* frame;
369 {
370 MICROPROFILE_SCOPE(OpenGL_WaitPresent);
371
372 frame = frame_mailbox->GetRenderFrame();
373
374 // Clean up sync objects before drawing
375
376 // INTEL driver workaround. We can't delete the previous render sync object until we are
377 // sure that the presentation is done
378 if (frame->present_fence) {
379 glClientWaitSync(frame->present_fence, 0, GL_TIMEOUT_IGNORED);
380 }
381
382 // delete the draw fence if the frame wasn't presented
383 if (frame->render_fence) {
384 glDeleteSync(frame->render_fence);
385 frame->render_fence = 0;
386 }
387
388 // wait for the presentation to be done
389 if (frame->present_fence) {
390 glWaitSync(frame->present_fence, 0, GL_TIMEOUT_IGNORED);
391 glDeleteSync(frame->present_fence);
392 frame->present_fence = 0;
393 }
394 }
170 395
396 {
397 MICROPROFILE_SCOPE(OpenGL_RenderFrame);
398 const auto& layout = render_window.GetFramebufferLayout();
399
400 // Recreate the frame if the size of the window has changed
401 if (layout.width != frame->width || layout.height != frame->height ||
402 screen_info.display_srgb != frame->is_srgb) {
403 LOG_DEBUG(Render_OpenGL, "Reloading render frame");
404 frame->is_srgb = screen_info.display_srgb;
405 frame_mailbox->ReloadRenderFrame(frame, layout.width, layout.height);
406 }
407 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, frame->render.handle);
408 DrawScreen(layout);
409 // Create a fence for the frontend to wait on and swap this frame to OffTex
410 frame->render_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
411 glFlush();
412 frame_mailbox->ReleaseRenderFrame(frame);
413 m_current_frame++;
414 rasterizer->TickFrame();
415 }
416}
417
418void RendererOpenGL::PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer) {
171 if (framebuffer) { 419 if (framebuffer) {
172 // If framebuffer is provided, reload it from memory to a texture 420 // If framebuffer is provided, reload it from memory to a texture
173 if (screen_info.texture.width != static_cast<GLsizei>(framebuffer->width) || 421 if (screen_info.texture.width != static_cast<GLsizei>(framebuffer->width) ||
174 screen_info.texture.height != static_cast<GLsizei>(framebuffer->height) || 422 screen_info.texture.height != static_cast<GLsizei>(framebuffer->height) ||
175 screen_info.texture.pixel_format != framebuffer->pixel_format) { 423 screen_info.texture.pixel_format != framebuffer->pixel_format ||
424 gl_framebuffer_data.empty()) {
176 // Reallocate texture if the framebuffer size has changed. 425 // Reallocate texture if the framebuffer size has changed.
177 // This is expected to not happen very often and hence should not be a 426 // This is expected to not happen very often and hence should not be a
178 // performance problem. 427 // performance problem.
@@ -181,22 +430,7 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
181 430
182 // Load the framebuffer from memory, draw it to the screen, and swap buffers 431 // Load the framebuffer from memory, draw it to the screen, and swap buffers
183 LoadFBToScreenInfo(*framebuffer); 432 LoadFBToScreenInfo(*framebuffer);
184
185 if (renderer_settings.screenshot_requested)
186 CaptureScreenshot();
187
188 DrawScreen(render_window.GetFramebufferLayout());
189
190 rasterizer->TickFrame();
191
192 render_window.SwapBuffers();
193 } 433 }
194
195 render_window.PollEvents();
196
197 // Restore the rasterizer state
198 prev_state.AllDirty();
199 prev_state.Apply();
200} 434}
201 435
202void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) { 436void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) {
@@ -249,31 +483,24 @@ void RendererOpenGL::InitOpenGLObjects() {
249 glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, 483 glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue,
250 0.0f); 484 0.0f);
251 485
252 // Link shaders and get variable locations 486 // Create shader programs
253 shader.CreateFromSource(vertex_shader, nullptr, fragment_shader); 487 OGLShader vertex_shader;
254 state.draw.shader_program = shader.handle; 488 vertex_shader.Create(VERTEX_SHADER, GL_VERTEX_SHADER);
255 state.AllDirty(); 489
256 state.Apply(); 490 OGLShader fragment_shader;
491 fragment_shader.Create(FRAGMENT_SHADER, GL_FRAGMENT_SHADER);
492
493 vertex_program.Create(true, false, vertex_shader.handle);
494 fragment_program.Create(true, false, fragment_shader.handle);
495
496 // Create program pipeline
497 program_manager.Create();
257 498
258 // Generate VBO handle for drawing 499 // Generate VBO handle for drawing
259 vertex_buffer.Create(); 500 vertex_buffer.Create();
260 501
261 // Generate VAO
262 vertex_array.Create();
263 state.draw.vertex_array = vertex_array.handle;
264
265 // Attach vertex data to VAO 502 // Attach vertex data to VAO
266 glNamedBufferData(vertex_buffer.handle, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW); 503 glNamedBufferData(vertex_buffer.handle, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW);
267 glVertexArrayAttribFormat(vertex_array.handle, PositionLocation, 2, GL_FLOAT, GL_FALSE,
268 offsetof(ScreenRectVertex, position));
269 glVertexArrayAttribFormat(vertex_array.handle, TexCoordLocation, 2, GL_FLOAT, GL_FALSE,
270 offsetof(ScreenRectVertex, tex_coord));
271 glVertexArrayAttribBinding(vertex_array.handle, PositionLocation, 0);
272 glVertexArrayAttribBinding(vertex_array.handle, TexCoordLocation, 0);
273 glEnableVertexArrayAttrib(vertex_array.handle, PositionLocation);
274 glEnableVertexArrayAttrib(vertex_array.handle, TexCoordLocation);
275 glVertexArrayVertexBuffer(vertex_array.handle, 0, vertex_buffer.handle, 0,
276 sizeof(ScreenRectVertex));
277 504
278 // Allocate textures for the screen 505 // Allocate textures for the screen
279 screen_info.texture.resource.Create(GL_TEXTURE_2D); 506 screen_info.texture.resource.Create(GL_TEXTURE_2D);
@@ -306,7 +533,8 @@ void RendererOpenGL::CreateRasterizer() {
306 if (rasterizer) { 533 if (rasterizer) {
307 return; 534 return;
308 } 535 }
309 rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info); 536 rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info,
537 program_manager, state_tracker);
310} 538}
311 539
312void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, 540void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
@@ -345,8 +573,19 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
345 glTextureStorage2D(texture.resource.handle, 1, internal_format, texture.width, texture.height); 573 glTextureStorage2D(texture.resource.handle, 1, internal_format, texture.width, texture.height);
346} 574}
347 575
348void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w, 576void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
349 float h) { 577 if (renderer_settings.set_background_color) {
578 // Update background color before drawing
579 glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue,
580 0.0f);
581 }
582
583 // Set projection matrix
584 const std::array ortho_matrix =
585 MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
586 glProgramUniformMatrix3x2fv(vertex_program.handle, ModelViewMatrixLocation, 1, GL_FALSE,
587 std::data(ortho_matrix));
588
350 const auto& texcoords = screen_info.display_texcoords; 589 const auto& texcoords = screen_info.display_texcoords;
351 auto left = texcoords.left; 590 auto left = texcoords.left;
352 auto right = texcoords.right; 591 auto right = texcoords.right;
@@ -378,60 +617,129 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
378 static_cast<f32>(screen_info.texture.height); 617 static_cast<f32>(screen_info.texture.height);
379 } 618 }
380 619
620 const auto& screen = layout.screen;
381 const std::array vertices = { 621 const std::array vertices = {
382 ScreenRectVertex(x, y, texcoords.top * scale_u, left * scale_v), 622 ScreenRectVertex(screen.left, screen.top, texcoords.top * scale_u, left * scale_v),
383 ScreenRectVertex(x + w, y, texcoords.bottom * scale_u, left * scale_v), 623 ScreenRectVertex(screen.right, screen.top, texcoords.bottom * scale_u, left * scale_v),
384 ScreenRectVertex(x, y + h, texcoords.top * scale_u, right * scale_v), 624 ScreenRectVertex(screen.left, screen.bottom, texcoords.top * scale_u, right * scale_v),
385 ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v), 625 ScreenRectVertex(screen.right, screen.bottom, texcoords.bottom * scale_u, right * scale_v),
386 }; 626 };
387
388 state.textures[0] = screen_info.display_texture;
389 state.framebuffer_srgb.enabled = screen_info.display_srgb;
390 state.AllDirty();
391 state.Apply();
392 glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), std::data(vertices)); 627 glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), std::data(vertices));
393 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
394 // Restore default state
395 state.framebuffer_srgb.enabled = false;
396 state.textures[0] = 0;
397 state.AllDirty();
398 state.Apply();
399}
400 628
401void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { 629 // TODO: Signal state tracker about these changes
402 if (renderer_settings.set_background_color) { 630 state_tracker.NotifyScreenDrawVertexArray();
403 // Update background color before drawing 631 state_tracker.NotifyPolygonModes();
404 glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, 632 state_tracker.NotifyViewport0();
405 0.0f); 633 state_tracker.NotifyScissor0();
634 state_tracker.NotifyColorMask0();
635 state_tracker.NotifyBlend0();
636 state_tracker.NotifyFramebuffer();
637 state_tracker.NotifyFrontFace();
638 state_tracker.NotifyCullTest();
639 state_tracker.NotifyDepthTest();
640 state_tracker.NotifyStencilTest();
641 state_tracker.NotifyPolygonOffset();
642 state_tracker.NotifyRasterizeEnable();
643 state_tracker.NotifyFramebufferSRGB();
644 state_tracker.NotifyLogicOp();
645 state_tracker.NotifyClipControl();
646 state_tracker.NotifyAlphaTest();
647
648 program_manager.UseVertexShader(vertex_program.handle);
649 program_manager.UseGeometryShader(0);
650 program_manager.UseFragmentShader(fragment_program.handle);
651 program_manager.BindGraphicsPipeline();
652
653 glEnable(GL_CULL_FACE);
654 if (screen_info.display_srgb) {
655 glEnable(GL_FRAMEBUFFER_SRGB);
656 } else {
657 glDisable(GL_FRAMEBUFFER_SRGB);
406 } 658 }
659 glDisable(GL_COLOR_LOGIC_OP);
660 glDisable(GL_DEPTH_TEST);
661 glDisable(GL_STENCIL_TEST);
662 glDisable(GL_POLYGON_OFFSET_FILL);
663 glDisable(GL_RASTERIZER_DISCARD);
664 glDisable(GL_ALPHA_TEST);
665 glDisablei(GL_BLEND, 0);
666 glDisablei(GL_SCISSOR_TEST, 0);
667 glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
668 glCullFace(GL_BACK);
669 glFrontFace(GL_CW);
670 glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
671 glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
672 glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width),
673 static_cast<GLfloat>(layout.height));
674 glDepthRangeIndexed(0, 0.0, 0.0);
675
676 glEnableVertexAttribArray(PositionLocation);
677 glEnableVertexAttribArray(TexCoordLocation);
678 glVertexAttribDivisor(PositionLocation, 0);
679 glVertexAttribDivisor(TexCoordLocation, 0);
680 glVertexAttribFormat(PositionLocation, 2, GL_FLOAT, GL_FALSE,
681 offsetof(ScreenRectVertex, position));
682 glVertexAttribFormat(TexCoordLocation, 2, GL_FLOAT, GL_FALSE,
683 offsetof(ScreenRectVertex, tex_coord));
684 glVertexAttribBinding(PositionLocation, 0);
685 glVertexAttribBinding(TexCoordLocation, 0);
686 glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex));
687
688 glBindTextureUnit(0, screen_info.display_texture);
689 glBindSampler(0, 0);
407 690
408 const auto& screen = layout.screen;
409
410 glViewport(0, 0, layout.width, layout.height);
411 glClear(GL_COLOR_BUFFER_BIT); 691 glClear(GL_COLOR_BUFFER_BIT);
692 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
693}
412 694
413 // Set projection matrix 695void RendererOpenGL::TryPresent(int timeout_ms) {
414 const std::array ortho_matrix = 696 const auto& layout = render_window.GetFramebufferLayout();
415 MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height)); 697 auto frame = frame_mailbox->TryGetPresentFrame(timeout_ms);
416 glUniformMatrix3x2fv(ModelViewMatrixLocation, 1, GL_FALSE, ortho_matrix.data()); 698 if (!frame) {
699 LOG_DEBUG(Render_OpenGL, "TryGetPresentFrame returned no frame to present");
700 return;
701 }
417 702
418 DrawScreenTriangles(screen_info, static_cast<float>(screen.left), 703 // Clearing before a full overwrite of a fbo can signal to drivers that they can avoid a
419 static_cast<float>(screen.top), static_cast<float>(screen.GetWidth()), 704 // readback since we won't be doing any blending
420 static_cast<float>(screen.GetHeight())); 705 glClear(GL_COLOR_BUFFER_BIT);
421 706
422 m_current_frame++; 707 // Recreate the presentation FBO if the color attachment was changed
708 if (frame->color_reloaded) {
709 LOG_DEBUG(Render_OpenGL, "Reloading present frame");
710 frame_mailbox->ReloadPresentFrame(frame, layout.width, layout.height);
711 }
712 glWaitSync(frame->render_fence, 0, GL_TIMEOUT_IGNORED);
713 // INTEL workaround.
714 // Normally we could just delete the draw fence here, but due to driver bugs, we can just delete
715 // it on the emulation thread without too much penalty
716 // glDeleteSync(frame.render_sync);
717 // frame.render_sync = 0;
718
719 glBindFramebuffer(GL_READ_FRAMEBUFFER, frame->present.handle);
720 glBlitFramebuffer(0, 0, frame->width, frame->height, 0, 0, layout.width, layout.height,
721 GL_COLOR_BUFFER_BIT, GL_LINEAR);
722
723 // Insert fence for the main thread to block on
724 frame->present_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
725 glFlush();
726
727 glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
423} 728}
424 729
425void RendererOpenGL::UpdateFramerate() {} 730void RendererOpenGL::RenderScreenshot() {
731 if (!renderer_settings.screenshot_requested) {
732 return;
733 }
734
735 GLint old_read_fb;
736 GLint old_draw_fb;
737 glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb);
738 glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb);
426 739
427void RendererOpenGL::CaptureScreenshot() {
428 // Draw the current frame to the screenshot framebuffer 740 // Draw the current frame to the screenshot framebuffer
429 screenshot_framebuffer.Create(); 741 screenshot_framebuffer.Create();
430 GLuint old_read_fb = state.draw.read_framebuffer; 742 glBindFramebuffer(GL_FRAMEBUFFER, screenshot_framebuffer.handle);
431 GLuint old_draw_fb = state.draw.draw_framebuffer;
432 state.draw.read_framebuffer = state.draw.draw_framebuffer = screenshot_framebuffer.handle;
433 state.AllDirty();
434 state.Apply();
435 743
436 Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; 744 Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
437 745
@@ -448,19 +756,16 @@ void RendererOpenGL::CaptureScreenshot() {
448 renderer_settings.screenshot_bits); 756 renderer_settings.screenshot_bits);
449 757
450 screenshot_framebuffer.Release(); 758 screenshot_framebuffer.Release();
451 state.draw.read_framebuffer = old_read_fb;
452 state.draw.draw_framebuffer = old_draw_fb;
453 state.AllDirty();
454 state.Apply();
455 glDeleteRenderbuffers(1, &renderbuffer); 759 glDeleteRenderbuffers(1, &renderbuffer);
456 760
761 glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb);
762 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb);
763
457 renderer_settings.screenshot_complete_callback(); 764 renderer_settings.screenshot_complete_callback();
458 renderer_settings.screenshot_requested = false; 765 renderer_settings.screenshot_requested = false;
459} 766}
460 767
461bool RendererOpenGL::Init() { 768bool RendererOpenGL::Init() {
462 Core::Frontend::ScopeAcquireWindowContext acquire_context{render_window};
463
464 if (GLAD_GL_KHR_debug) { 769 if (GLAD_GL_KHR_debug) {
465 glEnable(GL_DEBUG_OUTPUT); 770 glEnable(GL_DEBUG_OUTPUT);
466 glDebugMessageCallback(DebugHandler, nullptr); 771 glDebugMessageCallback(DebugHandler, nullptr);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index b56328a7f..33073ce5b 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -10,7 +10,8 @@
10#include "common/math_util.h" 10#include "common/math_util.h"
11#include "video_core/renderer_base.h" 11#include "video_core/renderer_base.h"
12#include "video_core/renderer_opengl/gl_resource_manager.h" 12#include "video_core/renderer_opengl/gl_resource_manager.h"
13#include "video_core/renderer_opengl/gl_state.h" 13#include "video_core/renderer_opengl/gl_shader_manager.h"
14#include "video_core/renderer_opengl/gl_state_tracker.h"
14 15
15namespace Core { 16namespace Core {
16class System; 17class System;
@@ -44,19 +45,23 @@ struct ScreenInfo {
44 TextureInfo texture; 45 TextureInfo texture;
45}; 46};
46 47
48struct PresentationTexture {
49 u32 width = 0;
50 u32 height = 0;
51 OGLTexture texture;
52};
53
54class FrameMailbox;
55
47class RendererOpenGL final : public VideoCore::RendererBase { 56class RendererOpenGL final : public VideoCore::RendererBase {
48public: 57public:
49 explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system); 58 explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system);
50 ~RendererOpenGL() override; 59 ~RendererOpenGL() override;
51 60
52 /// Swap buffers (render frame)
53 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
54
55 /// Initialize the renderer
56 bool Init() override; 61 bool Init() override;
57
58 /// Shutdown the renderer
59 void ShutDown() override; 62 void ShutDown() override;
63 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
64 void TryPresent(int timeout_ms) override;
60 65
61private: 66private:
62 /// Initializes the OpenGL state and creates persistent objects. 67 /// Initializes the OpenGL state and creates persistent objects.
@@ -72,12 +77,7 @@ private:
72 /// Draws the emulated screens to the emulator window. 77 /// Draws the emulated screens to the emulator window.
73 void DrawScreen(const Layout::FramebufferLayout& layout); 78 void DrawScreen(const Layout::FramebufferLayout& layout);
74 79
75 void DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w, float h); 80 void RenderScreenshot();
76
77 /// Updates the framerate.
78 void UpdateFramerate();
79
80 void CaptureScreenshot();
81 81
82 /// Loads framebuffer from emulated memory into the active OpenGL texture. 82 /// Loads framebuffer from emulated memory into the active OpenGL texture.
83 void LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer); 83 void LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer);
@@ -87,26 +87,34 @@ private:
87 void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a, 87 void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
88 const TextureInfo& texture); 88 const TextureInfo& texture);
89 89
90 void PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer);
91
90 Core::Frontend::EmuWindow& emu_window; 92 Core::Frontend::EmuWindow& emu_window;
91 Core::System& system; 93 Core::System& system;
92 94
93 OpenGLState state; 95 StateTracker state_tracker{system};
94 96
95 // OpenGL object IDs 97 // OpenGL object IDs
96 OGLVertexArray vertex_array;
97 OGLBuffer vertex_buffer; 98 OGLBuffer vertex_buffer;
98 OGLProgram shader; 99 OGLProgram vertex_program;
100 OGLProgram fragment_program;
99 OGLFramebuffer screenshot_framebuffer; 101 OGLFramebuffer screenshot_framebuffer;
100 102
101 /// Display information for Switch screen 103 /// Display information for Switch screen
102 ScreenInfo screen_info; 104 ScreenInfo screen_info;
103 105
106 /// Global dummy shader pipeline
107 GLShader::ProgramManager program_manager;
108
104 /// OpenGL framebuffer data 109 /// OpenGL framebuffer data
105 std::vector<u8> gl_framebuffer_data; 110 std::vector<u8> gl_framebuffer_data;
106 111
107 /// Used for transforming the framebuffer orientation 112 /// Used for transforming the framebuffer orientation
108 Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags; 113 Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags;
109 Common::Rectangle<int> framebuffer_crop_rect; 114 Common::Rectangle<int> framebuffer_crop_rect;
115
116 /// Frame presentation mailbox
117 std::unique_ptr<FrameMailbox> frame_mailbox;
110}; 118};
111 119
112} // namespace OpenGL 120} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp
index ac99e6385..b751086fa 100644
--- a/src/video_core/renderer_opengl/utils.cpp
+++ b/src/video_core/renderer_opengl/utils.cpp
@@ -9,6 +9,7 @@
9#include <glad/glad.h> 9#include <glad/glad.h>
10 10
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "video_core/renderer_opengl/gl_state_tracker.h"
12#include "video_core/renderer_opengl/utils.h" 13#include "video_core/renderer_opengl/utils.h"
13 14
14namespace OpenGL { 15namespace OpenGL {
@@ -20,12 +21,12 @@ struct VertexArrayPushBuffer::Entry {
20 GLsizei stride{}; 21 GLsizei stride{};
21}; 22};
22 23
23VertexArrayPushBuffer::VertexArrayPushBuffer() = default; 24VertexArrayPushBuffer::VertexArrayPushBuffer(StateTracker& state_tracker)
25 : state_tracker{state_tracker} {}
24 26
25VertexArrayPushBuffer::~VertexArrayPushBuffer() = default; 27VertexArrayPushBuffer::~VertexArrayPushBuffer() = default;
26 28
27void VertexArrayPushBuffer::Setup(GLuint vao_) { 29void VertexArrayPushBuffer::Setup() {
28 vao = vao_;
29 index_buffer = nullptr; 30 index_buffer = nullptr;
30 vertex_buffers.clear(); 31 vertex_buffers.clear();
31} 32}
@@ -41,13 +42,11 @@ void VertexArrayPushBuffer::SetVertexBuffer(GLuint binding_index, const GLuint*
41 42
42void VertexArrayPushBuffer::Bind() { 43void VertexArrayPushBuffer::Bind() {
43 if (index_buffer) { 44 if (index_buffer) {
44 glVertexArrayElementBuffer(vao, *index_buffer); 45 state_tracker.BindIndexBuffer(*index_buffer);
45 } 46 }
46 47
47 // TODO(Rodrigo): Find a way to ARB_multi_bind this
48 for (const auto& entry : vertex_buffers) { 48 for (const auto& entry : vertex_buffers) {
49 glVertexArrayVertexBuffer(vao, entry.binding_index, *entry.buffer, entry.offset, 49 glBindVertexBuffer(entry.binding_index, *entry.buffer, entry.offset, entry.stride);
50 entry.stride);
51 } 50 }
52} 51}
53 52
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h
index 3ad7c02d4..47ee3177b 100644
--- a/src/video_core/renderer_opengl/utils.h
+++ b/src/video_core/renderer_opengl/utils.h
@@ -11,12 +11,14 @@
11 11
12namespace OpenGL { 12namespace OpenGL {
13 13
14class StateTracker;
15
14class VertexArrayPushBuffer final { 16class VertexArrayPushBuffer final {
15public: 17public:
16 explicit VertexArrayPushBuffer(); 18 explicit VertexArrayPushBuffer(StateTracker& state_tracker);
17 ~VertexArrayPushBuffer(); 19 ~VertexArrayPushBuffer();
18 20
19 void Setup(GLuint vao_); 21 void Setup();
20 22
21 void SetIndexBuffer(const GLuint* buffer); 23 void SetIndexBuffer(const GLuint* buffer);
22 24
@@ -28,7 +30,8 @@ public:
28private: 30private:
29 struct Entry; 31 struct Entry;
30 32
31 GLuint vao{}; 33 StateTracker& state_tracker;
34
32 const GLuint* index_buffer{}; 35 const GLuint* index_buffer{};
33 std::vector<Entry> vertex_buffers; 36 std::vector<Entry> vertex_buffers;
34}; 37};
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index 4e3ff231e..2bb376555 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -112,19 +112,18 @@ constexpr FixedPipelineState::Rasterizer GetRasterizerState(const Maxwell& regs)
112 const auto& clip = regs.view_volume_clip_control; 112 const auto& clip = regs.view_volume_clip_control;
113 const bool depth_clamp_enabled = clip.depth_clamp_near == 1 || clip.depth_clamp_far == 1; 113 const bool depth_clamp_enabled = clip.depth_clamp_near == 1 || clip.depth_clamp_far == 1;
114 114
115 Maxwell::Cull::FrontFace front_face = regs.cull.front_face; 115 Maxwell::FrontFace front_face = regs.front_face;
116 if (regs.screen_y_control.triangle_rast_flip != 0 && 116 if (regs.screen_y_control.triangle_rast_flip != 0 &&
117 regs.viewport_transform[0].scale_y > 0.0f) { 117 regs.viewport_transform[0].scale_y > 0.0f) {
118 if (front_face == Maxwell::Cull::FrontFace::CounterClockWise) 118 if (front_face == Maxwell::FrontFace::CounterClockWise)
119 front_face = Maxwell::Cull::FrontFace::ClockWise; 119 front_face = Maxwell::FrontFace::ClockWise;
120 else if (front_face == Maxwell::Cull::FrontFace::ClockWise) 120 else if (front_face == Maxwell::FrontFace::ClockWise)
121 front_face = Maxwell::Cull::FrontFace::CounterClockWise; 121 front_face = Maxwell::FrontFace::CounterClockWise;
122 } 122 }
123 123
124 const bool gl_ndc = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne; 124 const bool gl_ndc = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne;
125 return FixedPipelineState::Rasterizer(regs.cull.enabled, depth_bias_enabled, 125 return FixedPipelineState::Rasterizer(regs.cull_test_enabled, depth_bias_enabled,
126 depth_clamp_enabled, gl_ndc, regs.cull.cull_face, 126 depth_clamp_enabled, gl_ndc, regs.cull_face, front_face);
127 front_face);
128} 127}
129 128
130} // Anonymous namespace 129} // Anonymous namespace
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
index 87056ef37..4c8ba7f90 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@@ -171,8 +171,8 @@ struct FixedPipelineState {
171 171
172 struct Rasterizer { 172 struct Rasterizer {
173 constexpr Rasterizer(bool cull_enable, bool depth_bias_enable, bool depth_clamp_enable, 173 constexpr Rasterizer(bool cull_enable, bool depth_bias_enable, bool depth_clamp_enable,
174 bool ndc_minus_one_to_one, Maxwell::Cull::CullFace cull_face, 174 bool ndc_minus_one_to_one, Maxwell::CullFace cull_face,
175 Maxwell::Cull::FrontFace front_face) 175 Maxwell::FrontFace front_face)
176 : cull_enable{cull_enable}, depth_bias_enable{depth_bias_enable}, 176 : cull_enable{cull_enable}, depth_bias_enable{depth_bias_enable},
177 depth_clamp_enable{depth_clamp_enable}, ndc_minus_one_to_one{ndc_minus_one_to_one}, 177 depth_clamp_enable{depth_clamp_enable}, ndc_minus_one_to_one{ndc_minus_one_to_one},
178 cull_face{cull_face}, front_face{front_face} {} 178 cull_face{cull_face}, front_face{front_face} {}
@@ -182,8 +182,8 @@ struct FixedPipelineState {
182 bool depth_bias_enable; 182 bool depth_bias_enable;
183 bool depth_clamp_enable; 183 bool depth_clamp_enable;
184 bool ndc_minus_one_to_one; 184 bool ndc_minus_one_to_one;
185 Maxwell::Cull::CullFace cull_face; 185 Maxwell::CullFace cull_face;
186 Maxwell::Cull::FrontFace front_face; 186 Maxwell::FrontFace front_face;
187 187
188 std::size_t Hash() const noexcept; 188 std::size_t Hash() const noexcept;
189 189
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 5403c3ab7..f93447610 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -120,11 +120,12 @@ struct FormatTuple {
120 {vk::Format::eA8B8G8R8UintPack32, Attachable | Storage}, // ABGR8UI 120 {vk::Format::eA8B8G8R8UintPack32, Attachable | Storage}, // ABGR8UI
121 {vk::Format::eB5G6R5UnormPack16, {}}, // B5G6R5U 121 {vk::Format::eB5G6R5UnormPack16, {}}, // B5G6R5U
122 {vk::Format::eA2B10G10R10UnormPack32, Attachable | Storage}, // A2B10G10R10U 122 {vk::Format::eA2B10G10R10UnormPack32, Attachable | Storage}, // A2B10G10R10U
123 {vk::Format::eA1R5G5B5UnormPack16, Attachable | Storage}, // A1B5G5R5U (flipped with swizzle) 123 {vk::Format::eA1R5G5B5UnormPack16, Attachable}, // A1B5G5R5U (flipped with swizzle)
124 {vk::Format::eR8Unorm, Attachable | Storage}, // R8U 124 {vk::Format::eR8Unorm, Attachable | Storage}, // R8U
125 {vk::Format::eR8Uint, Attachable | Storage}, // R8UI 125 {vk::Format::eR8Uint, Attachable | Storage}, // R8UI
126 {vk::Format::eR16G16B16A16Sfloat, Attachable | Storage}, // RGBA16F 126 {vk::Format::eR16G16B16A16Sfloat, Attachable | Storage}, // RGBA16F
127 {vk::Format::eR16G16B16A16Unorm, Attachable | Storage}, // RGBA16U 127 {vk::Format::eR16G16B16A16Unorm, Attachable | Storage}, // RGBA16U
128 {vk::Format::eR16G16B16A16Snorm, Attachable | Storage}, // RGBA16S
128 {vk::Format::eR16G16B16A16Uint, Attachable | Storage}, // RGBA16UI 129 {vk::Format::eR16G16B16A16Uint, Attachable | Storage}, // RGBA16UI
129 {vk::Format::eB10G11R11UfloatPack32, Attachable | Storage}, // R11FG11FB10F 130 {vk::Format::eB10G11R11UfloatPack32, Attachable | Storage}, // R11FG11FB10F
130 {vk::Format::eR32G32B32A32Uint, Attachable | Storage}, // RGBA32UI 131 {vk::Format::eR32G32B32A32Uint, Attachable | Storage}, // RGBA32UI
@@ -159,6 +160,7 @@ struct FormatTuple {
159 {vk::Format::eR32G32Uint, Attachable | Storage}, // RG32UI 160 {vk::Format::eR32G32Uint, Attachable | Storage}, // RG32UI
160 {vk::Format::eUndefined, {}}, // RGBX16F 161 {vk::Format::eUndefined, {}}, // RGBX16F
161 {vk::Format::eR32Uint, Attachable | Storage}, // R32UI 162 {vk::Format::eR32Uint, Attachable | Storage}, // R32UI
163 {vk::Format::eR32Sint, Attachable | Storage}, // R32I
162 {vk::Format::eAstc8x8UnormBlock, {}}, // ASTC_2D_8X8 164 {vk::Format::eAstc8x8UnormBlock, {}}, // ASTC_2D_8X8
163 {vk::Format::eUndefined, {}}, // ASTC_2D_8X5 165 {vk::Format::eUndefined, {}}, // ASTC_2D_8X5
164 {vk::Format::eUndefined, {}}, // ASTC_2D_5X4 166 {vk::Format::eUndefined, {}}, // ASTC_2D_5X4
@@ -255,6 +257,8 @@ vk::ShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) {
255 return vk::ShaderStageFlagBits::eGeometry; 257 return vk::ShaderStageFlagBits::eGeometry;
256 case Tegra::Engines::ShaderType::Fragment: 258 case Tegra::Engines::ShaderType::Fragment:
257 return vk::ShaderStageFlagBits::eFragment; 259 return vk::ShaderStageFlagBits::eFragment;
260 case Tegra::Engines::ShaderType::Compute:
261 return vk::ShaderStageFlagBits::eCompute;
258 } 262 }
259 UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage)); 263 UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage));
260 return {}; 264 return {};
@@ -330,6 +334,8 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr
330 return vk::Format::eR16G16B16Unorm; 334 return vk::Format::eR16G16B16Unorm;
331 case Maxwell::VertexAttribute::Size::Size_16_16_16_16: 335 case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
332 return vk::Format::eR16G16B16A16Unorm; 336 return vk::Format::eR16G16B16A16Unorm;
337 case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
338 return vk::Format::eA2B10G10R10UnormPack32;
333 default: 339 default:
334 break; 340 break;
335 } 341 }
@@ -363,6 +369,10 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr
363 return vk::Format::eR8G8B8A8Uint; 369 return vk::Format::eR8G8B8A8Uint;
364 case Maxwell::VertexAttribute::Size::Size_32: 370 case Maxwell::VertexAttribute::Size::Size_32:
365 return vk::Format::eR32Uint; 371 return vk::Format::eR32Uint;
372 case Maxwell::VertexAttribute::Size::Size_32_32:
373 return vk::Format::eR32G32Uint;
374 case Maxwell::VertexAttribute::Size::Size_32_32_32:
375 return vk::Format::eR32G32B32Uint;
366 case Maxwell::VertexAttribute::Size::Size_32_32_32_32: 376 case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
367 return vk::Format::eR32G32B32A32Uint; 377 return vk::Format::eR32G32B32A32Uint;
368 default: 378 default:
@@ -370,8 +380,22 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr
370 } 380 }
371 case Maxwell::VertexAttribute::Type::UnsignedScaled: 381 case Maxwell::VertexAttribute::Type::UnsignedScaled:
372 switch (size) { 382 switch (size) {
383 case Maxwell::VertexAttribute::Size::Size_8:
384 return vk::Format::eR8Uscaled;
373 case Maxwell::VertexAttribute::Size::Size_8_8: 385 case Maxwell::VertexAttribute::Size::Size_8_8:
374 return vk::Format::eR8G8Uscaled; 386 return vk::Format::eR8G8Uscaled;
387 case Maxwell::VertexAttribute::Size::Size_8_8_8:
388 return vk::Format::eR8G8B8Uscaled;
389 case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
390 return vk::Format::eR8G8B8A8Uscaled;
391 case Maxwell::VertexAttribute::Size::Size_16:
392 return vk::Format::eR16Uscaled;
393 case Maxwell::VertexAttribute::Size::Size_16_16:
394 return vk::Format::eR16G16Uscaled;
395 case Maxwell::VertexAttribute::Size::Size_16_16_16:
396 return vk::Format::eR16G16B16Uscaled;
397 case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
398 return vk::Format::eR16G16B16A16Uscaled;
375 default: 399 default:
376 break; 400 break;
377 } 401 }
@@ -571,24 +595,24 @@ vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor) {
571 return {}; 595 return {};
572} 596}
573 597
574vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face) { 598vk::FrontFace FrontFace(Maxwell::FrontFace front_face) {
575 switch (front_face) { 599 switch (front_face) {
576 case Maxwell::Cull::FrontFace::ClockWise: 600 case Maxwell::FrontFace::ClockWise:
577 return vk::FrontFace::eClockwise; 601 return vk::FrontFace::eClockwise;
578 case Maxwell::Cull::FrontFace::CounterClockWise: 602 case Maxwell::FrontFace::CounterClockWise:
579 return vk::FrontFace::eCounterClockwise; 603 return vk::FrontFace::eCounterClockwise;
580 } 604 }
581 UNIMPLEMENTED_MSG("Unimplemented front face={}", static_cast<u32>(front_face)); 605 UNIMPLEMENTED_MSG("Unimplemented front face={}", static_cast<u32>(front_face));
582 return {}; 606 return {};
583} 607}
584 608
585vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face) { 609vk::CullModeFlags CullFace(Maxwell::CullFace cull_face) {
586 switch (cull_face) { 610 switch (cull_face) {
587 case Maxwell::Cull::CullFace::Front: 611 case Maxwell::CullFace::Front:
588 return vk::CullModeFlagBits::eFront; 612 return vk::CullModeFlagBits::eFront;
589 case Maxwell::Cull::CullFace::Back: 613 case Maxwell::CullFace::Back:
590 return vk::CullModeFlagBits::eBack; 614 return vk::CullModeFlagBits::eBack;
591 case Maxwell::Cull::CullFace::FrontAndBack: 615 case Maxwell::CullFace::FrontAndBack:
592 return vk::CullModeFlagBits::eFrontAndBack; 616 return vk::CullModeFlagBits::eFrontAndBack;
593 } 617 }
594 UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face)); 618 UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face));
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
index 7e9678b7b..24f6ab544 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -54,9 +54,9 @@ vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation);
54 54
55vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor); 55vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor);
56 56
57vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face); 57vk::FrontFace FrontFace(Maxwell::FrontFace front_face);
58 58
59vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face); 59vk::CullModeFlags CullFace(Maxwell::CullFace cull_face);
60 60
61vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle); 61vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle);
62 62
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index d5032b432..42bb01418 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -27,6 +27,7 @@
27#include "video_core/renderer_vulkan/vk_rasterizer.h" 27#include "video_core/renderer_vulkan/vk_rasterizer.h"
28#include "video_core/renderer_vulkan/vk_resource_manager.h" 28#include "video_core/renderer_vulkan/vk_resource_manager.h"
29#include "video_core/renderer_vulkan/vk_scheduler.h" 29#include "video_core/renderer_vulkan/vk_scheduler.h"
30#include "video_core/renderer_vulkan/vk_state_tracker.h"
30#include "video_core/renderer_vulkan/vk_swapchain.h" 31#include "video_core/renderer_vulkan/vk_swapchain.h"
31 32
32namespace Vulkan { 33namespace Vulkan {
@@ -106,8 +107,14 @@ RendererVulkan::~RendererVulkan() {
106} 107}
107 108
108void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { 109void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
110 render_window.PollEvents();
111
112 if (!framebuffer) {
113 return;
114 }
115
109 const auto& layout = render_window.GetFramebufferLayout(); 116 const auto& layout = render_window.GetFramebufferLayout();
110 if (framebuffer && layout.width > 0 && layout.height > 0 && render_window.IsShown()) { 117 if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) {
111 const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; 118 const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
112 const bool use_accelerated = 119 const bool use_accelerated =
113 rasterizer->AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); 120 rasterizer->AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
@@ -128,13 +135,16 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
128 blit_screen->Recreate(); 135 blit_screen->Recreate();
129 } 136 }
130 137
131 render_window.SwapBuffers();
132 rasterizer->TickFrame(); 138 rasterizer->TickFrame();
133 } 139 }
134 140
135 render_window.PollEvents(); 141 render_window.PollEvents();
136} 142}
137 143
144void RendererVulkan::TryPresent(int /*timeout_ms*/) {
145 // TODO (bunnei): ImplementMe
146}
147
138bool RendererVulkan::Init() { 148bool RendererVulkan::Init() {
139 PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr{}; 149 PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr{};
140 render_window.RetrieveVulkanHandlers(&vkGetInstanceProcAddr, &instance, &surface); 150 render_window.RetrieveVulkanHandlers(&vkGetInstanceProcAddr, &instance, &surface);
@@ -168,10 +178,13 @@ bool RendererVulkan::Init() {
168 swapchain = std::make_unique<VKSwapchain>(surface, *device); 178 swapchain = std::make_unique<VKSwapchain>(surface, *device);
169 swapchain->Create(framebuffer.width, framebuffer.height, false); 179 swapchain->Create(framebuffer.width, framebuffer.height, false);
170 180
171 scheduler = std::make_unique<VKScheduler>(*device, *resource_manager); 181 state_tracker = std::make_unique<StateTracker>(system);
182
183 scheduler = std::make_unique<VKScheduler>(*device, *resource_manager, *state_tracker);
172 184
173 rasterizer = std::make_unique<RasterizerVulkan>(system, render_window, screen_info, *device, 185 rasterizer = std::make_unique<RasterizerVulkan>(system, render_window, screen_info, *device,
174 *resource_manager, *memory_manager, *scheduler); 186 *resource_manager, *memory_manager,
187 *state_tracker, *scheduler);
175 188
176 blit_screen = std::make_unique<VKBlitScreen>(system, render_window, *rasterizer, *device, 189 blit_screen = std::make_unique<VKBlitScreen>(system, render_window, *rasterizer, *device,
177 *resource_manager, *memory_manager, *swapchain, 190 *resource_manager, *memory_manager, *swapchain,
@@ -262,4 +275,4 @@ void RendererVulkan::Report() const {
262 telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions); 275 telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions);
263} 276}
264 277
265} // namespace Vulkan \ No newline at end of file 278} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index a472c5dc9..3da08d2e4 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -4,8 +4,10 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <memory>
7#include <optional> 8#include <optional>
8#include <vector> 9#include <vector>
10
9#include "video_core/renderer_base.h" 11#include "video_core/renderer_base.h"
10#include "video_core/renderer_vulkan/declarations.h" 12#include "video_core/renderer_vulkan/declarations.h"
11 13
@@ -15,6 +17,7 @@ class System;
15 17
16namespace Vulkan { 18namespace Vulkan {
17 19
20class StateTracker;
18class VKBlitScreen; 21class VKBlitScreen;
19class VKDevice; 22class VKDevice;
20class VKFence; 23class VKFence;
@@ -36,14 +39,10 @@ public:
36 explicit RendererVulkan(Core::Frontend::EmuWindow& window, Core::System& system); 39 explicit RendererVulkan(Core::Frontend::EmuWindow& window, Core::System& system);
37 ~RendererVulkan() override; 40 ~RendererVulkan() override;
38 41
39 /// Swap buffers (render frame)
40 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
41
42 /// Initialize the renderer
43 bool Init() override; 42 bool Init() override;
44
45 /// Shutdown the renderer
46 void ShutDown() override; 43 void ShutDown() override;
44 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
45 void TryPresent(int timeout_ms) override;
47 46
48private: 47private:
49 std::optional<vk::DebugUtilsMessengerEXT> CreateDebugCallback( 48 std::optional<vk::DebugUtilsMessengerEXT> CreateDebugCallback(
@@ -65,6 +64,7 @@ private:
65 std::unique_ptr<VKSwapchain> swapchain; 64 std::unique_ptr<VKSwapchain> swapchain;
66 std::unique_ptr<VKMemoryManager> memory_manager; 65 std::unique_ptr<VKMemoryManager> memory_manager;
67 std::unique_ptr<VKResourceManager> resource_manager; 66 std::unique_ptr<VKResourceManager> resource_manager;
67 std::unique_ptr<StateTracker> state_tracker;
68 std::unique_ptr<VKScheduler> scheduler; 68 std::unique_ptr<VKScheduler> scheduler;
69 std::unique_ptr<VKBlitScreen> blit_screen; 69 std::unique_ptr<VKBlitScreen> blit_screen;
70}; 70};
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index 9d5b8de7a..60f57d83e 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -73,7 +73,7 @@ UniqueDescriptorUpdateTemplate VKComputePipeline::CreateDescriptorUpdateTemplate
73 std::vector<vk::DescriptorUpdateTemplateEntry> template_entries; 73 std::vector<vk::DescriptorUpdateTemplateEntry> template_entries;
74 u32 binding = 0; 74 u32 binding = 0;
75 u32 offset = 0; 75 u32 offset = 0;
76 FillDescriptorUpdateTemplateEntries(device, entries, binding, offset, template_entries); 76 FillDescriptorUpdateTemplateEntries(entries, binding, offset, template_entries);
77 if (template_entries.empty()) { 77 if (template_entries.empty()) {
78 // If the shader doesn't use descriptor sets, skip template creation. 78 // If the shader doesn't use descriptor sets, skip template creation.
79 return UniqueDescriptorUpdateTemplate{}; 79 return UniqueDescriptorUpdateTemplate{};
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index d1da4f9d3..28d2fbc4f 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -107,8 +107,7 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
107 features.occlusionQueryPrecise = true; 107 features.occlusionQueryPrecise = true;
108 features.fragmentStoresAndAtomics = true; 108 features.fragmentStoresAndAtomics = true;
109 features.shaderImageGatherExtended = true; 109 features.shaderImageGatherExtended = true;
110 features.shaderStorageImageReadWithoutFormat = 110 features.shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported;
111 is_shader_storage_img_read_without_format_supported;
112 features.shaderStorageImageWriteWithoutFormat = true; 111 features.shaderStorageImageWriteWithoutFormat = true;
113 features.textureCompressionASTC_LDR = is_optimal_astc_supported; 112 features.textureCompressionASTC_LDR = is_optimal_astc_supported;
114 113
@@ -148,6 +147,15 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
148 LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes"); 147 LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes");
149 } 148 }
150 149
150 vk::PhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback;
151 if (ext_transform_feedback) {
152 transform_feedback.transformFeedback = true;
153 transform_feedback.geometryStreams = true;
154 SetNext(next, transform_feedback);
155 } else {
156 LOG_INFO(Render_Vulkan, "Device doesn't support transform feedbacks");
157 }
158
151 if (!ext_depth_range_unrestricted) { 159 if (!ext_depth_range_unrestricted) {
152 LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); 160 LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
153 } 161 }
@@ -385,7 +393,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
385 } 393 }
386 }; 394 };
387 395
388 extensions.reserve(14); 396 extensions.reserve(15);
389 extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); 397 extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
390 extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME); 398 extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME);
391 extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME); 399 extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME);
@@ -397,18 +405,22 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
397 405
398 [[maybe_unused]] const bool nsight = 406 [[maybe_unused]] const bool nsight =
399 std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); 407 std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
400 bool khr_shader_float16_int8{}; 408 bool has_khr_shader_float16_int8{};
401 bool ext_subgroup_size_control{}; 409 bool has_ext_subgroup_size_control{};
410 bool has_ext_transform_feedback{};
402 for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { 411 for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
403 Test(extension, khr_uniform_buffer_standard_layout, 412 Test(extension, khr_uniform_buffer_standard_layout,
404 VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); 413 VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
405 Test(extension, khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); 414 Test(extension, has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME,
415 false);
406 Test(extension, ext_depth_range_unrestricted, 416 Test(extension, ext_depth_range_unrestricted,
407 VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); 417 VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
408 Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); 418 Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
409 Test(extension, ext_shader_viewport_index_layer, 419 Test(extension, ext_shader_viewport_index_layer,
410 VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true); 420 VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true);
411 Test(extension, ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, 421 Test(extension, has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME,
422 false);
423 Test(extension, has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME,
412 false); 424 false);
413 if (Settings::values.renderer_debug) { 425 if (Settings::values.renderer_debug) {
414 Test(extension, nv_device_diagnostic_checkpoints, 426 Test(extension, nv_device_diagnostic_checkpoints,
@@ -416,13 +428,13 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
416 } 428 }
417 } 429 }
418 430
419 if (khr_shader_float16_int8) { 431 if (has_khr_shader_float16_int8) {
420 is_float16_supported = 432 is_float16_supported =
421 GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16; 433 GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16;
422 extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); 434 extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
423 } 435 }
424 436
425 if (ext_subgroup_size_control) { 437 if (has_ext_subgroup_size_control) {
426 const auto features = 438 const auto features =
427 GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi); 439 GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi);
428 const auto properties = 440 const auto properties =
@@ -439,6 +451,20 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
439 is_warp_potentially_bigger = true; 451 is_warp_potentially_bigger = true;
440 } 452 }
441 453
454 if (has_ext_transform_feedback) {
455 const auto features =
456 GetFeatures<vk::PhysicalDeviceTransformFeedbackFeaturesEXT>(physical, dldi);
457 const auto properties =
458 GetProperties<vk::PhysicalDeviceTransformFeedbackPropertiesEXT>(physical, dldi);
459
460 if (features.transformFeedback && features.geometryStreams &&
461 properties.maxTransformFeedbackStreams >= 4 && properties.maxTransformFeedbackBuffers &&
462 properties.transformFeedbackQueries && properties.transformFeedbackDraw) {
463 extensions.push_back(VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME);
464 ext_transform_feedback = true;
465 }
466 }
467
442 return extensions; 468 return extensions;
443} 469}
444 470
@@ -467,8 +493,7 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK
467 493
468void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) { 494void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) {
469 const auto supported_features{physical.getFeatures(dldi)}; 495 const auto supported_features{physical.getFeatures(dldi)};
470 is_shader_storage_img_read_without_format_supported = 496 is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat;
471 supported_features.shaderStorageImageReadWithoutFormat;
472 is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi); 497 is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi);
473} 498}
474 499
@@ -510,6 +535,7 @@ std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperti
510 vk::Format::eR32G32Sfloat, 535 vk::Format::eR32G32Sfloat,
511 vk::Format::eR32G32Uint, 536 vk::Format::eR32G32Uint,
512 vk::Format::eR16G16B16A16Uint, 537 vk::Format::eR16G16B16A16Uint,
538 vk::Format::eR16G16B16A16Snorm,
513 vk::Format::eR16G16B16A16Unorm, 539 vk::Format::eR16G16B16A16Unorm,
514 vk::Format::eR16G16Unorm, 540 vk::Format::eR16G16Unorm,
515 vk::Format::eR16G16Snorm, 541 vk::Format::eR16G16Snorm,
@@ -523,6 +549,7 @@ std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperti
523 vk::Format::eB10G11R11UfloatPack32, 549 vk::Format::eB10G11R11UfloatPack32,
524 vk::Format::eR32Sfloat, 550 vk::Format::eR32Sfloat,
525 vk::Format::eR32Uint, 551 vk::Format::eR32Uint,
552 vk::Format::eR32Sint,
526 vk::Format::eR16Sfloat, 553 vk::Format::eR16Sfloat,
527 vk::Format::eR16G16B16A16Sfloat, 554 vk::Format::eR16G16B16A16Sfloat,
528 vk::Format::eB8G8R8A8Unorm, 555 vk::Format::eB8G8R8A8Unorm,
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h
index 2c27ad730..6e656517f 100644
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -122,11 +122,6 @@ public:
122 return properties.limits.maxPushConstantsSize; 122 return properties.limits.maxPushConstantsSize;
123 } 123 }
124 124
125 /// Returns true if Shader storage Image Read Without Format supported.
126 bool IsShaderStorageImageReadWithoutFormatSupported() const {
127 return is_shader_storage_img_read_without_format_supported;
128 }
129
130 /// Returns true if ASTC is natively supported. 125 /// Returns true if ASTC is natively supported.
131 bool IsOptimalAstcSupported() const { 126 bool IsOptimalAstcSupported() const {
132 return is_optimal_astc_supported; 127 return is_optimal_astc_supported;
@@ -147,6 +142,11 @@ public:
147 return (guest_warp_stages & stage) != vk::ShaderStageFlags{}; 142 return (guest_warp_stages & stage) != vk::ShaderStageFlags{};
148 } 143 }
149 144
145 /// Returns true if formatless image load is supported.
146 bool IsFormatlessImageLoadSupported() const {
147 return is_formatless_image_load_supported;
148 }
149
150 /// Returns true if the device supports VK_EXT_scalar_block_layout. 150 /// Returns true if the device supports VK_EXT_scalar_block_layout.
151 bool IsKhrUniformBufferStandardLayoutSupported() const { 151 bool IsKhrUniformBufferStandardLayoutSupported() const {
152 return khr_uniform_buffer_standard_layout; 152 return khr_uniform_buffer_standard_layout;
@@ -167,6 +167,11 @@ public:
167 return ext_shader_viewport_index_layer; 167 return ext_shader_viewport_index_layer;
168 } 168 }
169 169
170 /// Returns true if the device supports VK_EXT_transform_feedback.
171 bool IsExtTransformFeedbackSupported() const {
172 return ext_transform_feedback;
173 }
174
170 /// Returns true if the device supports VK_NV_device_diagnostic_checkpoints. 175 /// Returns true if the device supports VK_NV_device_diagnostic_checkpoints.
171 bool IsNvDeviceDiagnosticCheckpoints() const { 176 bool IsNvDeviceDiagnosticCheckpoints() const {
172 return nv_device_diagnostic_checkpoints; 177 return nv_device_diagnostic_checkpoints;
@@ -214,26 +219,26 @@ private:
214 static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties( 219 static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties(
215 const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical); 220 const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical);
216 221
217 const vk::PhysicalDevice physical; ///< Physical device. 222 const vk::PhysicalDevice physical; ///< Physical device.
218 vk::DispatchLoaderDynamic dld; ///< Device function pointers. 223 vk::DispatchLoaderDynamic dld; ///< Device function pointers.
219 vk::PhysicalDeviceProperties properties; ///< Device properties. 224 vk::PhysicalDeviceProperties properties; ///< Device properties.
220 UniqueDevice logical; ///< Logical device. 225 UniqueDevice logical; ///< Logical device.
221 vk::Queue graphics_queue; ///< Main graphics queue. 226 vk::Queue graphics_queue; ///< Main graphics queue.
222 vk::Queue present_queue; ///< Main present queue. 227 vk::Queue present_queue; ///< Main present queue.
223 u32 graphics_family{}; ///< Main graphics queue family index. 228 u32 graphics_family{}; ///< Main graphics queue family index.
224 u32 present_family{}; ///< Main present queue family index. 229 u32 present_family{}; ///< Main present queue family index.
225 vk::DriverIdKHR driver_id{}; ///< Driver ID. 230 vk::DriverIdKHR driver_id{}; ///< Driver ID.
226 vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced. 231 vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.ed
227 bool is_optimal_astc_supported{}; ///< Support for native ASTC. 232 bool is_optimal_astc_supported{}; ///< Support for native ASTC.
228 bool is_float16_supported{}; ///< Support for float16 arithmetics. 233 bool is_float16_supported{}; ///< Support for float16 arithmetics.
229 bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. 234 bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
235 bool is_formatless_image_load_supported{}; ///< Support for shader image read without format.
230 bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. 236 bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.
231 bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. 237 bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
232 bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. 238 bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
233 bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. 239 bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
240 bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
234 bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints. 241 bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints.
235 bool is_shader_storage_img_read_without_format_supported{}; ///< Support for shader storage
236 ///< image read without format
237 242
238 // Telemetry parameters 243 // Telemetry parameters
239 std::string vendor_name; ///< Device's driver name. 244 std::string vendor_name; ///< Device's driver name.
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index b155dfb49..6a02403c1 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -97,8 +97,7 @@ UniqueDescriptorUpdateTemplate VKGraphicsPipeline::CreateDescriptorUpdateTemplat
97 u32 offset = 0; 97 u32 offset = 0;
98 for (const auto& stage : program) { 98 for (const auto& stage : program) {
99 if (stage) { 99 if (stage) {
100 FillDescriptorUpdateTemplateEntries(device, stage->entries, binding, offset, 100 FillDescriptorUpdateTemplateEntries(stage->entries, binding, offset, template_entries);
101 template_entries);
102 } 101 }
103 } 102 }
104 if (template_entries.empty()) { 103 if (template_entries.empty()) {
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 7ddf7d3ee..557b9d662 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -36,6 +36,13 @@ using Tegra::Engines::ShaderType;
36 36
37namespace { 37namespace {
38 38
39// C++20's using enum
40constexpr auto eUniformBuffer = vk::DescriptorType::eUniformBuffer;
41constexpr auto eStorageBuffer = vk::DescriptorType::eStorageBuffer;
42constexpr auto eUniformTexelBuffer = vk::DescriptorType::eUniformTexelBuffer;
43constexpr auto eCombinedImageSampler = vk::DescriptorType::eCombinedImageSampler;
44constexpr auto eStorageImage = vk::DescriptorType::eStorageImage;
45
39constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ 46constexpr VideoCommon::Shader::CompilerSettings compiler_settings{
40 VideoCommon::Shader::CompileDepth::FullDecompile}; 47 VideoCommon::Shader::CompileDepth::FullDecompile};
41 48
@@ -119,23 +126,32 @@ ShaderType GetShaderType(Maxwell::ShaderProgram program) {
119 } 126 }
120} 127}
121 128
129template <vk::DescriptorType descriptor_type, class Container>
130void AddBindings(std::vector<vk::DescriptorSetLayoutBinding>& bindings, u32& binding,
131 vk::ShaderStageFlags stage_flags, const Container& container) {
132 const u32 num_entries = static_cast<u32>(std::size(container));
133 for (std::size_t i = 0; i < num_entries; ++i) {
134 u32 count = 1;
135 if constexpr (descriptor_type == eCombinedImageSampler) {
136 // Combined image samplers can be arrayed.
137 count = container[i].Size();
138 }
139 bindings.emplace_back(binding++, descriptor_type, count, stage_flags, nullptr);
140 }
141}
142
122u32 FillDescriptorLayout(const ShaderEntries& entries, 143u32 FillDescriptorLayout(const ShaderEntries& entries,
123 std::vector<vk::DescriptorSetLayoutBinding>& bindings, 144 std::vector<vk::DescriptorSetLayoutBinding>& bindings,
124 Maxwell::ShaderProgram program_type, u32 base_binding) { 145 Maxwell::ShaderProgram program_type, u32 base_binding) {
125 const ShaderType stage = GetStageFromProgram(program_type); 146 const ShaderType stage = GetStageFromProgram(program_type);
126 const vk::ShaderStageFlags stage_flags = MaxwellToVK::ShaderStage(stage); 147 const vk::ShaderStageFlags flags = MaxwellToVK::ShaderStage(stage);
127 148
128 u32 binding = base_binding; 149 u32 binding = base_binding;
129 const auto AddBindings = [&](vk::DescriptorType descriptor_type, std::size_t num_entries) { 150 AddBindings<eUniformBuffer>(bindings, binding, flags, entries.const_buffers);
130 for (std::size_t i = 0; i < num_entries; ++i) { 151 AddBindings<eStorageBuffer>(bindings, binding, flags, entries.global_buffers);
131 bindings.emplace_back(binding++, descriptor_type, 1, stage_flags, nullptr); 152 AddBindings<eUniformTexelBuffer>(bindings, binding, flags, entries.texel_buffers);
132 } 153 AddBindings<eCombinedImageSampler>(bindings, binding, flags, entries.samplers);
133 }; 154 AddBindings<eStorageImage>(bindings, binding, flags, entries.images);
134 AddBindings(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size());
135 AddBindings(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size());
136 AddBindings(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size());
137 AddBindings(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size());
138 AddBindings(vk::DescriptorType::eStorageImage, entries.images.size());
139 return binding; 155 return binding;
140} 156}
141 157
@@ -145,8 +161,8 @@ CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stag
145 GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, 161 GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr,
146 ProgramCode program_code, u32 main_offset) 162 ProgramCode program_code, u32 main_offset)
147 : RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr}, 163 : RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr},
148 program_code{std::move(program_code)}, locker{stage, GetEngine(system, stage)}, 164 program_code{std::move(program_code)}, registry{stage, GetEngine(system, stage)},
149 shader_ir{this->program_code, main_offset, compiler_settings, locker}, 165 shader_ir{this->program_code, main_offset, compiler_settings, registry},
150 entries{GenerateShaderEntries(shader_ir)} {} 166 entries{GenerateShaderEntries(shader_ir)} {}
151 167
152CachedShader::~CachedShader() = default; 168CachedShader::~CachedShader() = default;
@@ -163,24 +179,19 @@ Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine(
163VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, 179VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
164 const VKDevice& device, VKScheduler& scheduler, 180 const VKDevice& device, VKScheduler& scheduler,
165 VKDescriptorPool& descriptor_pool, 181 VKDescriptorPool& descriptor_pool,
166 VKUpdateDescriptorQueue& update_descriptor_queue) 182 VKUpdateDescriptorQueue& update_descriptor_queue,
183 VKRenderPassCache& renderpass_cache)
167 : RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler}, 184 : RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler},
168 descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue}, 185 descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue},
169 renderpass_cache(device) {} 186 renderpass_cache{renderpass_cache} {}
170 187
171VKPipelineCache::~VKPipelineCache() = default; 188VKPipelineCache::~VKPipelineCache() = default;
172 189
173std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { 190std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
174 const auto& gpu = system.GPU().Maxwell3D(); 191 const auto& gpu = system.GPU().Maxwell3D();
175 auto& dirty = system.GPU().Maxwell3D().dirty.shaders;
176 if (!dirty) {
177 return last_shaders;
178 }
179 dirty = false;
180 192
181 std::array<Shader, Maxwell::MaxShaderProgram> shaders; 193 std::array<Shader, Maxwell::MaxShaderProgram> shaders;
182 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { 194 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
183 const auto& shader_config = gpu.regs.shader_config[index];
184 const auto program{static_cast<Maxwell::ShaderProgram>(index)}; 195 const auto program{static_cast<Maxwell::ShaderProgram>(index)};
185 196
186 // Skip stages that are not enabled 197 // Skip stages that are not enabled
@@ -262,9 +273,9 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
262 specialization.workgroup_size = key.workgroup_size; 273 specialization.workgroup_size = key.workgroup_size;
263 specialization.shared_memory_size = key.shared_memory_size; 274 specialization.shared_memory_size = key.shared_memory_size;
264 275
265 const SPIRVShader spirv_shader{ 276 const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute,
266 Decompile(device, shader->GetIR(), ShaderType::Compute, specialization), 277 shader->GetRegistry(), specialization),
267 shader->GetEntries()}; 278 shader->GetEntries()};
268 entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool, 279 entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool,
269 update_descriptor_queue, spirv_shader); 280 update_descriptor_queue, spirv_shader);
270 return *entry; 281 return *entry;
@@ -313,8 +324,7 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
313 const auto& gpu = system.GPU().Maxwell3D(); 324 const auto& gpu = system.GPU().Maxwell3D();
314 325
315 Specialization specialization; 326 Specialization specialization;
316 specialization.primitive_topology = fixed_state.input_assembly.topology; 327 if (fixed_state.input_assembly.topology == Maxwell::PrimitiveTopology::Points) {
317 if (specialization.primitive_topology == Maxwell::PrimitiveTopology::Points) {
318 ASSERT(fixed_state.input_assembly.point_size != 0.0f); 328 ASSERT(fixed_state.input_assembly.point_size != 0.0f);
319 specialization.point_size = fixed_state.input_assembly.point_size; 329 specialization.point_size = fixed_state.input_assembly.point_size;
320 } 330 }
@@ -322,9 +332,6 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
322 specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type; 332 specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type;
323 } 333 }
324 specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; 334 specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one;
325 specialization.tessellation.primitive = fixed_state.tessellation.primitive;
326 specialization.tessellation.spacing = fixed_state.tessellation.spacing;
327 specialization.tessellation.clockwise = fixed_state.tessellation.clockwise;
328 335
329 SPIRVProgram program; 336 SPIRVProgram program;
330 std::vector<vk::DescriptorSetLayoutBinding> bindings; 337 std::vector<vk::DescriptorSetLayoutBinding> bindings;
@@ -345,8 +352,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
345 const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 352 const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
346 const auto program_type = GetShaderType(program_enum); 353 const auto program_type = GetShaderType(program_enum);
347 const auto& entries = shader->GetEntries(); 354 const auto& entries = shader->GetEntries();
348 program[stage] = {Decompile(device, shader->GetIR(), program_type, specialization), 355 program[stage] = {
349 entries}; 356 Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization),
357 entries};
350 358
351 if (program_enum == Maxwell::ShaderProgram::VertexA) { 359 if (program_enum == Maxwell::ShaderProgram::VertexA) {
352 // VertexB was combined with VertexA, so we skip the VertexB iteration 360 // VertexB was combined with VertexA, so we skip the VertexB iteration
@@ -361,32 +369,45 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
361 return {std::move(program), std::move(bindings)}; 369 return {std::move(program), std::move(bindings)};
362} 370}
363 371
364void FillDescriptorUpdateTemplateEntries( 372template <vk::DescriptorType descriptor_type, class Container>
365 const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset, 373void AddEntry(std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries, u32& binding,
366 std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries) { 374 u32& offset, const Container& container) {
367 static constexpr auto entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry)); 375 static constexpr u32 entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry));
368 const auto AddEntry = [&](vk::DescriptorType descriptor_type, std::size_t count_) { 376 const u32 count = static_cast<u32>(std::size(container));
369 const u32 count = static_cast<u32>(count_); 377
370 if (descriptor_type == vk::DescriptorType::eUniformTexelBuffer && 378 if constexpr (descriptor_type == eCombinedImageSampler) {
371 device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) { 379 for (u32 i = 0; i < count; ++i) {
372 // Nvidia has a bug where updating multiple uniform texels at once causes the driver to 380 const u32 num_samplers = container[i].Size();
373 // crash. 381 template_entries.emplace_back(binding, 0, num_samplers, descriptor_type, offset,
374 for (u32 i = 0; i < count; ++i) { 382 entry_size);
375 template_entries.emplace_back(binding + i, 0, 1, descriptor_type, 383 ++binding;
376 offset + i * entry_size, entry_size); 384 offset += num_samplers * entry_size;
377 }
378 } else if (count != 0) {
379 template_entries.emplace_back(binding, 0, count, descriptor_type, offset, entry_size);
380 } 385 }
381 offset += count * entry_size; 386 return;
382 binding += count; 387 }
383 };
384 388
385 AddEntry(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size()); 389 if constexpr (descriptor_type == eUniformTexelBuffer) {
386 AddEntry(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size()); 390 // Nvidia has a bug where updating multiple uniform texels at once causes the driver to
387 AddEntry(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size()); 391 // crash.
388 AddEntry(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size()); 392 for (u32 i = 0; i < count; ++i) {
389 AddEntry(vk::DescriptorType::eStorageImage, entries.images.size()); 393 template_entries.emplace_back(binding + i, 0, 1, descriptor_type,
394 offset + i * entry_size, entry_size);
395 }
396 } else if (count > 0) {
397 template_entries.emplace_back(binding, 0, count, descriptor_type, offset, entry_size);
398 }
399 offset += count * entry_size;
400 binding += count;
401}
402
403void FillDescriptorUpdateTemplateEntries(
404 const ShaderEntries& entries, u32& binding, u32& offset,
405 std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries) {
406 AddEntry<eUniformBuffer>(template_entries, offset, binding, entries.const_buffers);
407 AddEntry<eStorageBuffer>(template_entries, offset, binding, entries.global_buffers);
408 AddEntry<eUniformTexelBuffer>(template_entries, offset, binding, entries.texel_buffers);
409 AddEntry<eCombinedImageSampler>(template_entries, offset, binding, entries.samplers);
410 AddEntry<eStorageImage>(template_entries, offset, binding, entries.images);
390} 411}
391 412
392} // namespace Vulkan 413} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 8678fc9c3..c4c112290 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -25,7 +25,7 @@
25#include "video_core/renderer_vulkan/vk_renderpass_cache.h" 25#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
26#include "video_core/renderer_vulkan/vk_resource_manager.h" 26#include "video_core/renderer_vulkan/vk_resource_manager.h"
27#include "video_core/renderer_vulkan/vk_shader_decompiler.h" 27#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
28#include "video_core/shader/const_buffer_locker.h" 28#include "video_core/shader/registry.h"
29#include "video_core/shader/shader_ir.h" 29#include "video_core/shader/shader_ir.h"
30#include "video_core/surface.h" 30#include "video_core/surface.h"
31 31
@@ -132,6 +132,10 @@ public:
132 return shader_ir; 132 return shader_ir;
133 } 133 }
134 134
135 const VideoCommon::Shader::Registry& GetRegistry() const {
136 return registry;
137 }
138
135 const VideoCommon::Shader::ShaderIR& GetIR() const { 139 const VideoCommon::Shader::ShaderIR& GetIR() const {
136 return shader_ir; 140 return shader_ir;
137 } 141 }
@@ -147,7 +151,7 @@ private:
147 GPUVAddr gpu_addr{}; 151 GPUVAddr gpu_addr{};
148 VAddr cpu_addr{}; 152 VAddr cpu_addr{};
149 ProgramCode program_code; 153 ProgramCode program_code;
150 VideoCommon::Shader::ConstBufferLocker locker; 154 VideoCommon::Shader::Registry registry;
151 VideoCommon::Shader::ShaderIR shader_ir; 155 VideoCommon::Shader::ShaderIR shader_ir;
152 ShaderEntries entries; 156 ShaderEntries entries;
153}; 157};
@@ -157,7 +161,8 @@ public:
157 explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, 161 explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
158 const VKDevice& device, VKScheduler& scheduler, 162 const VKDevice& device, VKScheduler& scheduler,
159 VKDescriptorPool& descriptor_pool, 163 VKDescriptorPool& descriptor_pool,
160 VKUpdateDescriptorQueue& update_descriptor_queue); 164 VKUpdateDescriptorQueue& update_descriptor_queue,
165 VKRenderPassCache& renderpass_cache);
161 ~VKPipelineCache(); 166 ~VKPipelineCache();
162 167
163 std::array<Shader, Maxwell::MaxShaderProgram> GetShaders(); 168 std::array<Shader, Maxwell::MaxShaderProgram> GetShaders();
@@ -180,8 +185,7 @@ private:
180 VKScheduler& scheduler; 185 VKScheduler& scheduler;
181 VKDescriptorPool& descriptor_pool; 186 VKDescriptorPool& descriptor_pool;
182 VKUpdateDescriptorQueue& update_descriptor_queue; 187 VKUpdateDescriptorQueue& update_descriptor_queue;
183 188 VKRenderPassCache& renderpass_cache;
184 VKRenderPassCache renderpass_cache;
185 189
186 std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; 190 std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
187 191
@@ -194,7 +198,7 @@ private:
194}; 198};
195 199
196void FillDescriptorUpdateTemplateEntries( 200void FillDescriptorUpdateTemplateEntries(
197 const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset, 201 const ShaderEntries& entries, u32& binding, u32& offset,
198 std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries); 202 std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries);
199 203
200} // namespace Vulkan 204} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 31c078f6a..58c69b786 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -36,6 +36,7 @@
36#include "video_core/renderer_vulkan/vk_sampler_cache.h" 36#include "video_core/renderer_vulkan/vk_sampler_cache.h"
37#include "video_core/renderer_vulkan/vk_scheduler.h" 37#include "video_core/renderer_vulkan/vk_scheduler.h"
38#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 38#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
39#include "video_core/renderer_vulkan/vk_state_tracker.h"
39#include "video_core/renderer_vulkan/vk_texture_cache.h" 40#include "video_core/renderer_vulkan/vk_texture_cache.h"
40#include "video_core/renderer_vulkan/vk_update_descriptor.h" 41#include "video_core/renderer_vulkan/vk_update_descriptor.h"
41 42
@@ -105,17 +106,20 @@ void TransitionImages(const std::vector<ImageView>& views, vk::PipelineStageFlag
105 106
106template <typename Engine, typename Entry> 107template <typename Engine, typename Entry>
107Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, 108Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
108 std::size_t stage) { 109 std::size_t stage, std::size_t index = 0) {
109 const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage); 110 const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage);
110 if (entry.IsBindless()) { 111 if (entry.IsBindless()) {
111 const Tegra::Texture::TextureHandle tex_handle = 112 const Tegra::Texture::TextureHandle tex_handle =
112 engine.AccessConstBuffer32(stage_type, entry.GetBuffer(), entry.GetOffset()); 113 engine.AccessConstBuffer32(stage_type, entry.GetBuffer(), entry.GetOffset());
113 return engine.GetTextureInfo(tex_handle); 114 return engine.GetTextureInfo(tex_handle);
114 } 115 }
116 const auto& gpu_profile = engine.AccessGuestDriverProfile();
117 const u32 entry_offset = static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
118 const u32 offset = entry.GetOffset() + entry_offset;
115 if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) { 119 if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
116 return engine.GetStageTexture(stage_type, entry.GetOffset()); 120 return engine.GetStageTexture(stage_type, offset);
117 } else { 121 } else {
118 return engine.GetTexture(entry.GetOffset()); 122 return engine.GetTexture(offset);
119 } 123 }
120} 124}
121 125
@@ -277,17 +281,19 @@ void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf,
277RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& renderer, 281RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& renderer,
278 VKScreenInfo& screen_info, const VKDevice& device, 282 VKScreenInfo& screen_info, const VKDevice& device,
279 VKResourceManager& resource_manager, 283 VKResourceManager& resource_manager,
280 VKMemoryManager& memory_manager, VKScheduler& scheduler) 284 VKMemoryManager& memory_manager, StateTracker& state_tracker,
285 VKScheduler& scheduler)
281 : RasterizerAccelerated{system.Memory()}, system{system}, render_window{renderer}, 286 : RasterizerAccelerated{system.Memory()}, system{system}, render_window{renderer},
282 screen_info{screen_info}, device{device}, resource_manager{resource_manager}, 287 screen_info{screen_info}, device{device}, resource_manager{resource_manager},
283 memory_manager{memory_manager}, scheduler{scheduler}, 288 memory_manager{memory_manager}, state_tracker{state_tracker}, scheduler{scheduler},
284 staging_pool(device, memory_manager, scheduler), descriptor_pool(device), 289 staging_pool(device, memory_manager, scheduler), descriptor_pool(device),
285 update_descriptor_queue(device, scheduler), 290 update_descriptor_queue(device, scheduler), renderpass_cache(device),
286 quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), 291 quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
287 uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), 292 uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
288 texture_cache(system, *this, device, resource_manager, memory_manager, scheduler, 293 texture_cache(system, *this, device, resource_manager, memory_manager, scheduler,
289 staging_pool), 294 staging_pool),
290 pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue), 295 pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue,
296 renderpass_cache),
291 buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), 297 buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
292 sampler_cache(device), query_cache(system, *this, device, scheduler) { 298 sampler_cache(device), query_cache(system, *this, device, scheduler) {
293 scheduler.SetQueryCache(query_cache); 299 scheduler.SetQueryCache(query_cache);
@@ -342,6 +348,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
342 [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); }); 348 [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); });
343 } 349 }
344 350
351 BeginTransformFeedback();
352
345 const auto pipeline_layout = pipeline.GetLayout(); 353 const auto pipeline_layout = pipeline.GetLayout();
346 const auto descriptor_set = pipeline.CommitDescriptorSet(); 354 const auto descriptor_set = pipeline.CommitDescriptorSet();
347 scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) { 355 scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) {
@@ -351,18 +359,23 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
351 } 359 }
352 draw_params.Draw(cmdbuf, dld); 360 draw_params.Draw(cmdbuf, dld);
353 }); 361 });
362
363 EndTransformFeedback();
354} 364}
355 365
356void RasterizerVulkan::Clear() { 366void RasterizerVulkan::Clear() {
357 MICROPROFILE_SCOPE(Vulkan_Clearing); 367 MICROPROFILE_SCOPE(Vulkan_Clearing);
358 368
359 query_cache.UpdateCounters();
360
361 const auto& gpu = system.GPU().Maxwell3D(); 369 const auto& gpu = system.GPU().Maxwell3D();
362 if (!system.GPU().Maxwell3D().ShouldExecute()) { 370 if (!system.GPU().Maxwell3D().ShouldExecute()) {
363 return; 371 return;
364 } 372 }
365 373
374 sampled_views.clear();
375 image_views.clear();
376
377 query_cache.UpdateCounters();
378
366 const auto& regs = gpu.regs; 379 const auto& regs = gpu.regs;
367 const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || 380 const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
368 regs.clear_buffers.A; 381 regs.clear_buffers.A;
@@ -371,52 +384,54 @@ void RasterizerVulkan::Clear() {
371 if (!use_color && !use_depth && !use_stencil) { 384 if (!use_color && !use_depth && !use_stencil) {
372 return; 385 return;
373 } 386 }
374 // Clearing images requires to be out of a renderpass
375 scheduler.RequestOutsideRenderPassOperationContext();
376 387
377 // TODO(Rodrigo): Implement clears rendering a quad or using beginning a renderpass. 388 [[maybe_unused]] const auto texceptions = UpdateAttachments();
389 DEBUG_ASSERT(texceptions.none());
390 SetupImageTransitions(0, color_attachments, zeta_attachment);
378 391
379 if (use_color) { 392 const vk::RenderPass renderpass = renderpass_cache.GetRenderPass(GetRenderPassParams(0));
380 View color_view; 393 const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass);
381 { 394 scheduler.RequestRenderpass({renderpass, framebuffer, {{0, 0}, render_area}, 0, nullptr});
382 MICROPROFILE_SCOPE(Vulkan_RenderTargets);
383 color_view = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT.Value(), false);
384 }
385 395
386 color_view->Transition(vk::ImageLayout::eTransferDstOptimal, 396 const auto& scissor = regs.scissor_test[0];
387 vk::PipelineStageFlagBits::eTransfer, 397 const vk::Offset2D scissor_offset(scissor.min_x, scissor.min_y);
388 vk::AccessFlagBits::eTransferWrite); 398 vk::Extent2D scissor_extent{scissor.max_x - scissor.min_x, scissor.max_y - scissor.min_y};
399 scissor_extent.width = std::min(scissor_extent.width, render_area.width);
400 scissor_extent.height = std::min(scissor_extent.height, render_area.height);
389 401
402 const u32 layer = regs.clear_buffers.layer;
403 const vk::ClearRect clear_rect({scissor_offset, scissor_extent}, layer, 1);
404
405 if (use_color) {
390 const std::array clear_color = {regs.clear_color[0], regs.clear_color[1], 406 const std::array clear_color = {regs.clear_color[0], regs.clear_color[1],
391 regs.clear_color[2], regs.clear_color[3]}; 407 regs.clear_color[2], regs.clear_color[3]};
392 const vk::ClearColorValue clear(clear_color); 408 const vk::ClearValue clear_value{clear_color};
393 scheduler.Record([image = color_view->GetImage(), 409 const u32 color_attachment = regs.clear_buffers.RT;
394 subresource = color_view->GetImageSubresourceRange(), 410 scheduler.Record([color_attachment, clear_value, clear_rect](auto cmdbuf, auto& dld) {
395 clear](auto cmdbuf, auto& dld) { 411 const vk::ClearAttachment attachment(vk::ImageAspectFlagBits::eColor, color_attachment,
396 cmdbuf.clearColorImage(image, vk::ImageLayout::eTransferDstOptimal, clear, subresource, 412 clear_value);
397 dld); 413 cmdbuf.clearAttachments(1, &attachment, 1, &clear_rect, dld);
398 }); 414 });
399 } 415 }
400 if (use_depth || use_stencil) {
401 View zeta_surface;
402 {
403 MICROPROFILE_SCOPE(Vulkan_RenderTargets);
404 zeta_surface = texture_cache.GetDepthBufferSurface(false);
405 }
406 416
407 zeta_surface->Transition(vk::ImageLayout::eTransferDstOptimal, 417 if (!use_depth && !use_stencil) {
408 vk::PipelineStageFlagBits::eTransfer, 418 return;
409 vk::AccessFlagBits::eTransferWrite); 419 }
410 420 vk::ImageAspectFlags aspect_flags;
411 const vk::ClearDepthStencilValue clear(regs.clear_depth, 421 if (use_depth) {
412 static_cast<u32>(regs.clear_stencil)); 422 aspect_flags |= vk::ImageAspectFlagBits::eDepth;
413 scheduler.Record([image = zeta_surface->GetImage(), 423 }
414 subresource = zeta_surface->GetImageSubresourceRange(), 424 if (use_stencil) {
415 clear](auto cmdbuf, auto& dld) { 425 aspect_flags |= vk::ImageAspectFlagBits::eStencil;
416 cmdbuf.clearDepthStencilImage(image, vk::ImageLayout::eTransferDstOptimal, clear,
417 subresource, dld);
418 });
419 } 426 }
427
428 scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil,
429 clear_rect, aspect_flags](auto cmdbuf, auto& dld) {
430 const vk::ClearDepthStencilValue clear_zeta(clear_depth, clear_stencil);
431 const vk::ClearValue clear_value{clear_zeta};
432 const vk::ClearAttachment attachment(aspect_flags, 0, clear_value);
433 cmdbuf.clearAttachments(1, &attachment, 1, &clear_rect, dld);
434 });
420} 435}
421 436
422void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { 437void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
@@ -533,8 +548,6 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
533 548
534 // Verify that the cached surface is the same size and format as the requested framebuffer 549 // Verify that the cached surface is the same size and format as the requested framebuffer
535 const auto& params{surface->GetSurfaceParams()}; 550 const auto& params{surface->GetSurfaceParams()};
536 const auto& pixel_format{
537 VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)};
538 ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); 551 ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
539 ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); 552 ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
540 553
@@ -545,6 +558,10 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
545 return true; 558 return true;
546} 559}
547 560
561void RasterizerVulkan::SetupDirtyFlags() {
562 state_tracker.Initialize();
563}
564
548void RasterizerVulkan::FlushWork() { 565void RasterizerVulkan::FlushWork() {
549 static constexpr u32 DRAWS_TO_DISPATCH = 4096; 566 static constexpr u32 DRAWS_TO_DISPATCH = 4096;
550 567
@@ -568,9 +585,9 @@ void RasterizerVulkan::FlushWork() {
568 585
569RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() { 586RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
570 MICROPROFILE_SCOPE(Vulkan_RenderTargets); 587 MICROPROFILE_SCOPE(Vulkan_RenderTargets);
571 auto& dirty = system.GPU().Maxwell3D().dirty; 588 auto& dirty = system.GPU().Maxwell3D().dirty.flags;
572 const bool update_rendertargets = dirty.render_settings; 589 const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets];
573 dirty.render_settings = false; 590 dirty[VideoCommon::Dirty::RenderTargets] = false;
574 591
575 texture_cache.GuardRenderTargets(true); 592 texture_cache.GuardRenderTargets(true);
576 593
@@ -611,33 +628,34 @@ bool RasterizerVulkan::WalkAttachmentOverlaps(const CachedSurfaceView& attachmen
611std::tuple<vk::Framebuffer, vk::Extent2D> RasterizerVulkan::ConfigureFramebuffers( 628std::tuple<vk::Framebuffer, vk::Extent2D> RasterizerVulkan::ConfigureFramebuffers(
612 vk::RenderPass renderpass) { 629 vk::RenderPass renderpass) {
613 FramebufferCacheKey key{renderpass, std::numeric_limits<u32>::max(), 630 FramebufferCacheKey key{renderpass, std::numeric_limits<u32>::max(),
614 std::numeric_limits<u32>::max()}; 631 std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()};
615 632
616 const auto MarkAsModifiedAndPush = [&](const View& view) { 633 const auto try_push = [&](const View& view) {
617 if (view == nullptr) { 634 if (!view) {
618 return false; 635 return false;
619 } 636 }
620 key.views.push_back(view->GetHandle()); 637 key.views.push_back(view->GetHandle());
621 key.width = std::min(key.width, view->GetWidth()); 638 key.width = std::min(key.width, view->GetWidth());
622 key.height = std::min(key.height, view->GetHeight()); 639 key.height = std::min(key.height, view->GetHeight());
640 key.layers = std::min(key.layers, view->GetNumLayers());
623 return true; 641 return true;
624 }; 642 };
625 643
626 for (std::size_t index = 0; index < std::size(color_attachments); ++index) { 644 for (std::size_t index = 0; index < std::size(color_attachments); ++index) {
627 if (MarkAsModifiedAndPush(color_attachments[index])) { 645 if (try_push(color_attachments[index])) {
628 texture_cache.MarkColorBufferInUse(index); 646 texture_cache.MarkColorBufferInUse(index);
629 } 647 }
630 } 648 }
631 if (MarkAsModifiedAndPush(zeta_attachment)) { 649 if (try_push(zeta_attachment)) {
632 texture_cache.MarkDepthBufferInUse(); 650 texture_cache.MarkDepthBufferInUse();
633 } 651 }
634 652
635 const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key); 653 const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key);
636 auto& framebuffer = fbentry->second; 654 auto& framebuffer = fbentry->second;
637 if (is_cache_miss) { 655 if (is_cache_miss) {
638 const vk::FramebufferCreateInfo framebuffer_ci({}, key.renderpass, 656 const vk::FramebufferCreateInfo framebuffer_ci(
639 static_cast<u32>(key.views.size()), 657 {}, key.renderpass, static_cast<u32>(key.views.size()), key.views.data(), key.width,
640 key.views.data(), key.width, key.height, 1); 658 key.height, key.layers);
641 const auto dev = device.GetLogical(); 659 const auto dev = device.GetLogical();
642 const auto& dld = device.GetDispatchLoader(); 660 const auto& dld = device.GetDispatchLoader();
643 framebuffer = dev.createFramebufferUnique(framebuffer_ci, nullptr, dld); 661 framebuffer = dev.createFramebufferUnique(framebuffer_ci, nullptr, dld);
@@ -719,13 +737,51 @@ void RasterizerVulkan::SetupImageTransitions(
719} 737}
720 738
721void RasterizerVulkan::UpdateDynamicStates() { 739void RasterizerVulkan::UpdateDynamicStates() {
722 auto& gpu = system.GPU().Maxwell3D(); 740 auto& regs = system.GPU().Maxwell3D().regs;
723 UpdateViewportsState(gpu); 741 UpdateViewportsState(regs);
724 UpdateScissorsState(gpu); 742 UpdateScissorsState(regs);
725 UpdateDepthBias(gpu); 743 UpdateDepthBias(regs);
726 UpdateBlendConstants(gpu); 744 UpdateBlendConstants(regs);
727 UpdateDepthBounds(gpu); 745 UpdateDepthBounds(regs);
728 UpdateStencilFaces(gpu); 746 UpdateStencilFaces(regs);
747}
748
749void RasterizerVulkan::BeginTransformFeedback() {
750 const auto& regs = system.GPU().Maxwell3D().regs;
751 if (regs.tfb_enabled == 0) {
752 return;
753 }
754
755 UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
756 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
757 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
758
759 UNIMPLEMENTED_IF(regs.tfb_bindings[1].buffer_enable);
760 UNIMPLEMENTED_IF(regs.tfb_bindings[2].buffer_enable);
761 UNIMPLEMENTED_IF(regs.tfb_bindings[3].buffer_enable);
762
763 const auto& binding = regs.tfb_bindings[0];
764 UNIMPLEMENTED_IF(binding.buffer_enable == 0);
765 UNIMPLEMENTED_IF(binding.buffer_offset != 0);
766
767 const GPUVAddr gpu_addr = binding.Address();
768 const std::size_t size = binding.buffer_size;
769 const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
770
771 scheduler.Record([buffer = *buffer, offset = offset, size](auto cmdbuf, auto& dld) {
772 cmdbuf.bindTransformFeedbackBuffersEXT(0, {buffer}, {offset}, {size}, dld);
773 cmdbuf.beginTransformFeedbackEXT(0, {}, {}, dld);
774 });
775}
776
777void RasterizerVulkan::EndTransformFeedback() {
778 const auto& regs = system.GPU().Maxwell3D().regs;
779 if (regs.tfb_enabled == 0) {
780 return;
781 }
782
783 scheduler.Record(
784 [](auto cmdbuf, auto& dld) { cmdbuf.endTransformFeedbackEXT(0, {}, {}, dld); });
729} 785}
730 786
731void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, 787void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
@@ -835,14 +891,16 @@ void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::
835 MICROPROFILE_SCOPE(Vulkan_Textures); 891 MICROPROFILE_SCOPE(Vulkan_Textures);
836 const auto& gpu = system.GPU().Maxwell3D(); 892 const auto& gpu = system.GPU().Maxwell3D();
837 for (const auto& entry : entries.samplers) { 893 for (const auto& entry : entries.samplers) {
838 const auto texture = GetTextureInfo(gpu, entry, stage); 894 for (std::size_t i = 0; i < entry.Size(); ++i) {
839 SetupTexture(texture, entry); 895 const auto texture = GetTextureInfo(gpu, entry, stage, i);
896 SetupTexture(texture, entry);
897 }
840 } 898 }
841} 899}
842 900
843void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) { 901void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) {
844 MICROPROFILE_SCOPE(Vulkan_Images); 902 MICROPROFILE_SCOPE(Vulkan_Images);
845 const auto& gpu = system.GPU().KeplerCompute(); 903 const auto& gpu = system.GPU().Maxwell3D();
846 for (const auto& entry : entries.images) { 904 for (const auto& entry : entries.images) {
847 const auto tic = GetTextureInfo(gpu, entry, stage).tic; 905 const auto tic = GetTextureInfo(gpu, entry, stage).tic;
848 SetupImage(tic, entry); 906 SetupImage(tic, entry);
@@ -885,8 +943,10 @@ void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
885 MICROPROFILE_SCOPE(Vulkan_Textures); 943 MICROPROFILE_SCOPE(Vulkan_Textures);
886 const auto& gpu = system.GPU().KeplerCompute(); 944 const auto& gpu = system.GPU().KeplerCompute();
887 for (const auto& entry : entries.samplers) { 945 for (const auto& entry : entries.samplers) {
888 const auto texture = GetTextureInfo(gpu, entry, ComputeShaderIndex); 946 for (std::size_t i = 0; i < entry.Size(); ++i) {
889 SetupTexture(texture, entry); 947 const auto texture = GetTextureInfo(gpu, entry, ComputeShaderIndex, i);
948 SetupTexture(texture, entry);
949 }
890 } 950 }
891} 951}
892 952
@@ -901,6 +961,13 @@ void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
901 961
902void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry, 962void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry,
903 const Tegra::Engines::ConstBufferInfo& buffer) { 963 const Tegra::Engines::ConstBufferInfo& buffer) {
964 if (!buffer.enabled) {
965 // Set values to zero to unbind buffers
966 update_descriptor_queue.AddBuffer(buffer_cache.GetEmptyBuffer(sizeof(float)), 0,
967 sizeof(float));
968 return;
969 }
970
904 // Align the size to avoid bad std140 interactions 971 // Align the size to avoid bad std140 interactions
905 const std::size_t size = 972 const std::size_t size =
906 Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float)); 973 Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));
@@ -971,12 +1038,10 @@ void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const Ima
971 image_views.push_back(ImageView{std::move(view), image_layout}); 1038 image_views.push_back(ImageView{std::move(view), image_layout});
972} 1039}
973 1040
974void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D& gpu) { 1041void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) {
975 if (!gpu.dirty.viewport_transform && scheduler.TouchViewports()) { 1042 if (!state_tracker.TouchViewports()) {
976 return; 1043 return;
977 } 1044 }
978 gpu.dirty.viewport_transform = false;
979 const auto& regs = gpu.regs;
980 const std::array viewports{ 1045 const std::array viewports{
981 GetViewportState(device, regs, 0), GetViewportState(device, regs, 1), 1046 GetViewportState(device, regs, 0), GetViewportState(device, regs, 1),
982 GetViewportState(device, regs, 2), GetViewportState(device, regs, 3), 1047 GetViewportState(device, regs, 2), GetViewportState(device, regs, 3),
@@ -991,12 +1056,10 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D& gpu) {
991 }); 1056 });
992} 1057}
993 1058
994void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D& gpu) { 1059void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs) {
995 if (!gpu.dirty.scissor_test && scheduler.TouchScissors()) { 1060 if (!state_tracker.TouchScissors()) {
996 return; 1061 return;
997 } 1062 }
998 gpu.dirty.scissor_test = false;
999 const auto& regs = gpu.regs;
1000 const std::array scissors = { 1063 const std::array scissors = {
1001 GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2), 1064 GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2),
1002 GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5), 1065 GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5),
@@ -1009,46 +1072,39 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D& gpu) {
1009 }); 1072 });
1010} 1073}
1011 1074
1012void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D& gpu) { 1075void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs) {
1013 if (!gpu.dirty.polygon_offset && scheduler.TouchDepthBias()) { 1076 if (!state_tracker.TouchDepthBias()) {
1014 return; 1077 return;
1015 } 1078 }
1016 gpu.dirty.polygon_offset = false;
1017 const auto& regs = gpu.regs;
1018 scheduler.Record([constant = regs.polygon_offset_units, clamp = regs.polygon_offset_clamp, 1079 scheduler.Record([constant = regs.polygon_offset_units, clamp = regs.polygon_offset_clamp,
1019 factor = regs.polygon_offset_factor](auto cmdbuf, auto& dld) { 1080 factor = regs.polygon_offset_factor](auto cmdbuf, auto& dld) {
1020 cmdbuf.setDepthBias(constant, clamp, factor / 2.0f, dld); 1081 cmdbuf.setDepthBias(constant, clamp, factor / 2.0f, dld);
1021 }); 1082 });
1022} 1083}
1023 1084
1024void RasterizerVulkan::UpdateBlendConstants(Tegra::Engines::Maxwell3D& gpu) { 1085void RasterizerVulkan::UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs) {
1025 if (!gpu.dirty.blend_state && scheduler.TouchBlendConstants()) { 1086 if (!state_tracker.TouchBlendConstants()) {
1026 return; 1087 return;
1027 } 1088 }
1028 gpu.dirty.blend_state = false; 1089 const std::array blend_color = {regs.blend_color.r, regs.blend_color.g, regs.blend_color.b,
1029 const std::array blend_color = {gpu.regs.blend_color.r, gpu.regs.blend_color.g, 1090 regs.blend_color.a};
1030 gpu.regs.blend_color.b, gpu.regs.blend_color.a};
1031 scheduler.Record([blend_color](auto cmdbuf, auto& dld) { 1091 scheduler.Record([blend_color](auto cmdbuf, auto& dld) {
1032 cmdbuf.setBlendConstants(blend_color.data(), dld); 1092 cmdbuf.setBlendConstants(blend_color.data(), dld);
1033 }); 1093 });
1034} 1094}
1035 1095
1036void RasterizerVulkan::UpdateDepthBounds(Tegra::Engines::Maxwell3D& gpu) { 1096void RasterizerVulkan::UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs) {
1037 if (!gpu.dirty.depth_bounds_values && scheduler.TouchDepthBounds()) { 1097 if (!state_tracker.TouchDepthBounds()) {
1038 return; 1098 return;
1039 } 1099 }
1040 gpu.dirty.depth_bounds_values = false;
1041 const auto& regs = gpu.regs;
1042 scheduler.Record([min = regs.depth_bounds[0], max = regs.depth_bounds[1]]( 1100 scheduler.Record([min = regs.depth_bounds[0], max = regs.depth_bounds[1]](
1043 auto cmdbuf, auto& dld) { cmdbuf.setDepthBounds(min, max, dld); }); 1101 auto cmdbuf, auto& dld) { cmdbuf.setDepthBounds(min, max, dld); });
1044} 1102}
1045 1103
1046void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D& gpu) { 1104void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs) {
1047 if (!gpu.dirty.stencil_test && scheduler.TouchStencilValues()) { 1105 if (!state_tracker.TouchStencilProperties()) {
1048 return; 1106 return;
1049 } 1107 }
1050 gpu.dirty.stencil_test = false;
1051 const auto& regs = gpu.regs;
1052 if (regs.stencil_two_side_enable) { 1108 if (regs.stencil_two_side_enable) {
1053 // Separate values per face 1109 // Separate values per face
1054 scheduler.Record( 1110 scheduler.Record(
@@ -1099,7 +1155,7 @@ std::size_t RasterizerVulkan::CalculateVertexArraysSize() const {
1099 // This implementation assumes that all attributes are used in the shader. 1155 // This implementation assumes that all attributes are used in the shader.
1100 const GPUVAddr start{regs.vertex_array[index].StartAddress()}; 1156 const GPUVAddr start{regs.vertex_array[index].StartAddress()};
1101 const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; 1157 const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
1102 DEBUG_ASSERT(end > start); 1158 DEBUG_ASSERT(end >= start);
1103 1159
1104 size += (end - start + 1) * regs.vertex_array[index].enable; 1160 size += (end - start + 1) * regs.vertex_array[index].enable;
1105 } 1161 }
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 138903d60..3185868e9 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -56,6 +56,7 @@ struct FramebufferCacheKey {
56 vk::RenderPass renderpass{}; 56 vk::RenderPass renderpass{};
57 u32 width = 0; 57 u32 width = 0;
58 u32 height = 0; 58 u32 height = 0;
59 u32 layers = 0;
59 ImageViewsPack views; 60 ImageViewsPack views;
60 61
61 std::size_t Hash() const noexcept { 62 std::size_t Hash() const noexcept {
@@ -66,12 +67,17 @@ struct FramebufferCacheKey {
66 } 67 }
67 boost::hash_combine(hash, width); 68 boost::hash_combine(hash, width);
68 boost::hash_combine(hash, height); 69 boost::hash_combine(hash, height);
70 boost::hash_combine(hash, layers);
69 return hash; 71 return hash;
70 } 72 }
71 73
72 bool operator==(const FramebufferCacheKey& rhs) const noexcept { 74 bool operator==(const FramebufferCacheKey& rhs) const noexcept {
73 return std::tie(renderpass, views, width, height) == 75 return std::tie(renderpass, views, width, height, layers) ==
74 std::tie(rhs.renderpass, rhs.views, rhs.width, rhs.height); 76 std::tie(rhs.renderpass, rhs.views, rhs.width, rhs.height, rhs.layers);
77 }
78
79 bool operator!=(const FramebufferCacheKey& rhs) const noexcept {
80 return !operator==(rhs);
75 } 81 }
76}; 82};
77 83
@@ -90,6 +96,7 @@ struct hash<Vulkan::FramebufferCacheKey> {
90 96
91namespace Vulkan { 97namespace Vulkan {
92 98
99class StateTracker;
93class BufferBindings; 100class BufferBindings;
94 101
95struct ImageView { 102struct ImageView {
@@ -102,7 +109,7 @@ public:
102 explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window, 109 explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window,
103 VKScreenInfo& screen_info, const VKDevice& device, 110 VKScreenInfo& screen_info, const VKDevice& device,
104 VKResourceManager& resource_manager, VKMemoryManager& memory_manager, 111 VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
105 VKScheduler& scheduler); 112 StateTracker& state_tracker, VKScheduler& scheduler);
106 ~RasterizerVulkan() override; 113 ~RasterizerVulkan() override;
107 114
108 void Draw(bool is_indexed, bool is_instanced) override; 115 void Draw(bool is_indexed, bool is_instanced) override;
@@ -121,6 +128,7 @@ public:
121 const Tegra::Engines::Fermi2D::Config& copy_config) override; 128 const Tegra::Engines::Fermi2D::Config& copy_config) override;
122 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, 129 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
123 u32 pixel_stride) override; 130 u32 pixel_stride) override;
131 void SetupDirtyFlags() override;
124 132
125 /// Maximum supported size that a constbuffer can have in bytes. 133 /// Maximum supported size that a constbuffer can have in bytes.
126 static constexpr std::size_t MaxConstbufferSize = 0x10000; 134 static constexpr std::size_t MaxConstbufferSize = 0x10000;
@@ -161,6 +169,10 @@ private:
161 169
162 void UpdateDynamicStates(); 170 void UpdateDynamicStates();
163 171
172 void BeginTransformFeedback();
173
174 void EndTransformFeedback();
175
164 bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment); 176 bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment);
165 177
166 void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, 178 void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
@@ -209,12 +221,12 @@ private:
209 221
210 void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); 222 void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
211 223
212 void UpdateViewportsState(Tegra::Engines::Maxwell3D& gpu); 224 void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
213 void UpdateScissorsState(Tegra::Engines::Maxwell3D& gpu); 225 void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs);
214 void UpdateDepthBias(Tegra::Engines::Maxwell3D& gpu); 226 void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs);
215 void UpdateBlendConstants(Tegra::Engines::Maxwell3D& gpu); 227 void UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs);
216 void UpdateDepthBounds(Tegra::Engines::Maxwell3D& gpu); 228 void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs);
217 void UpdateStencilFaces(Tegra::Engines::Maxwell3D& gpu); 229 void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs);
218 230
219 std::size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const; 231 std::size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const;
220 232
@@ -235,11 +247,13 @@ private:
235 const VKDevice& device; 247 const VKDevice& device;
236 VKResourceManager& resource_manager; 248 VKResourceManager& resource_manager;
237 VKMemoryManager& memory_manager; 249 VKMemoryManager& memory_manager;
250 StateTracker& state_tracker;
238 VKScheduler& scheduler; 251 VKScheduler& scheduler;
239 252
240 VKStagingBufferPool staging_pool; 253 VKStagingBufferPool staging_pool;
241 VKDescriptorPool descriptor_pool; 254 VKDescriptorPool descriptor_pool;
242 VKUpdateDescriptorQueue update_descriptor_queue; 255 VKUpdateDescriptorQueue update_descriptor_queue;
256 VKRenderPassCache renderpass_cache;
243 QuadArrayPass quad_array_pass; 257 QuadArrayPass quad_array_pass;
244 Uint8Pass uint8_pass; 258 Uint8Pass uint8_pass;
245 259
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 92bd6c344..b61d4fe63 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -2,6 +2,12 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <memory>
6#include <mutex>
7#include <optional>
8#include <thread>
9#include <utility>
10
5#include "common/assert.h" 11#include "common/assert.h"
6#include "common/microprofile.h" 12#include "common/microprofile.h"
7#include "video_core/renderer_vulkan/declarations.h" 13#include "video_core/renderer_vulkan/declarations.h"
@@ -9,6 +15,7 @@
9#include "video_core/renderer_vulkan/vk_query_cache.h" 15#include "video_core/renderer_vulkan/vk_query_cache.h"
10#include "video_core/renderer_vulkan/vk_resource_manager.h" 16#include "video_core/renderer_vulkan/vk_resource_manager.h"
11#include "video_core/renderer_vulkan/vk_scheduler.h" 17#include "video_core/renderer_vulkan/vk_scheduler.h"
18#include "video_core/renderer_vulkan/vk_state_tracker.h"
12 19
13namespace Vulkan { 20namespace Vulkan {
14 21
@@ -29,9 +36,10 @@ void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf,
29 last = nullptr; 36 last = nullptr;
30} 37}
31 38
32VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager) 39VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager,
33 : device{device}, resource_manager{resource_manager}, next_fence{ 40 StateTracker& state_tracker)
34 &resource_manager.CommitFence()} { 41 : device{device}, resource_manager{resource_manager}, state_tracker{state_tracker},
42 next_fence{&resource_manager.CommitFence()} {
35 AcquireNewChunk(); 43 AcquireNewChunk();
36 AllocateNewContext(); 44 AllocateNewContext();
37 worker_thread = std::thread(&VKScheduler::WorkerThread, this); 45 worker_thread = std::thread(&VKScheduler::WorkerThread, this);
@@ -157,12 +165,7 @@ void VKScheduler::AllocateNewContext() {
157 165
158void VKScheduler::InvalidateState() { 166void VKScheduler::InvalidateState() {
159 state.graphics_pipeline = nullptr; 167 state.graphics_pipeline = nullptr;
160 state.viewports = false; 168 state_tracker.InvalidateCommandBufferState();
161 state.scissors = false;
162 state.depth_bias = false;
163 state.blend_constants = false;
164 state.depth_bounds = false;
165 state.stencil_values = false;
166} 169}
167 170
168void VKScheduler::EndPendingOperations() { 171void VKScheduler::EndPendingOperations() {
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index 62fd7858b..c7cc291c3 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -17,6 +17,7 @@
17 17
18namespace Vulkan { 18namespace Vulkan {
19 19
20class StateTracker;
20class VKDevice; 21class VKDevice;
21class VKFence; 22class VKFence;
22class VKQueryCache; 23class VKQueryCache;
@@ -43,7 +44,8 @@ private:
43/// OpenGL-like operations on Vulkan command buffers. 44/// OpenGL-like operations on Vulkan command buffers.
44class VKScheduler { 45class VKScheduler {
45public: 46public:
46 explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager); 47 explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager,
48 StateTracker& state_tracker);
47 ~VKScheduler(); 49 ~VKScheduler();
48 50
49 /// Sends the current execution context to the GPU. 51 /// Sends the current execution context to the GPU.
@@ -74,36 +76,6 @@ public:
74 query_cache = &query_cache_; 76 query_cache = &query_cache_;
75 } 77 }
76 78
77 /// Returns true when viewports have been set in the current command buffer.
78 bool TouchViewports() {
79 return std::exchange(state.viewports, true);
80 }
81
82 /// Returns true when scissors have been set in the current command buffer.
83 bool TouchScissors() {
84 return std::exchange(state.scissors, true);
85 }
86
87 /// Returns true when depth bias have been set in the current command buffer.
88 bool TouchDepthBias() {
89 return std::exchange(state.depth_bias, true);
90 }
91
92 /// Returns true when blend constants have been set in the current command buffer.
93 bool TouchBlendConstants() {
94 return std::exchange(state.blend_constants, true);
95 }
96
97 /// Returns true when depth bounds have been set in the current command buffer.
98 bool TouchDepthBounds() {
99 return std::exchange(state.depth_bounds, true);
100 }
101
102 /// Returns true when stencil values have been set in the current command buffer.
103 bool TouchStencilValues() {
104 return std::exchange(state.stencil_values, true);
105 }
106
107 /// Send work to a separate thread. 79 /// Send work to a separate thread.
108 template <typename T> 80 template <typename T>
109 void Record(T&& command) { 81 void Record(T&& command) {
@@ -217,6 +189,8 @@ private:
217 189
218 const VKDevice& device; 190 const VKDevice& device;
219 VKResourceManager& resource_manager; 191 VKResourceManager& resource_manager;
192 StateTracker& state_tracker;
193
220 VKQueryCache* query_cache = nullptr; 194 VKQueryCache* query_cache = nullptr;
221 195
222 vk::CommandBuffer current_cmdbuf; 196 vk::CommandBuffer current_cmdbuf;
@@ -226,12 +200,6 @@ private:
226 struct State { 200 struct State {
227 std::optional<vk::RenderPassBeginInfo> renderpass; 201 std::optional<vk::RenderPassBeginInfo> renderpass;
228 vk::Pipeline graphics_pipeline; 202 vk::Pipeline graphics_pipeline;
229 bool viewports = false;
230 bool scissors = false;
231 bool depth_bias = false;
232 bool blend_constants = false;
233 bool depth_bounds = false;
234 bool stencil_values = false;
235 } state; 203 } state;
236 204
237 std::unique_ptr<CommandChunk> chunk; 205 std::unique_ptr<CommandChunk> chunk;
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 2da622d15..51ecb5567 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -5,7 +5,9 @@
5#include <functional> 5#include <functional>
6#include <limits> 6#include <limits>
7#include <map> 7#include <map>
8#include <optional>
8#include <type_traits> 9#include <type_traits>
10#include <unordered_map>
9#include <utility> 11#include <utility>
10 12
11#include <fmt/format.h> 13#include <fmt/format.h>
@@ -24,6 +26,7 @@
24#include "video_core/renderer_vulkan/vk_shader_decompiler.h" 26#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
25#include "video_core/shader/node.h" 27#include "video_core/shader/node.h"
26#include "video_core/shader/shader_ir.h" 28#include "video_core/shader/shader_ir.h"
29#include "video_core/shader/transform_feedback.h"
27 30
28namespace Vulkan { 31namespace Vulkan {
29 32
@@ -69,8 +72,9 @@ struct TexelBuffer {
69 72
70struct SampledImage { 73struct SampledImage {
71 Id image_type{}; 74 Id image_type{};
72 Id sampled_image_type{}; 75 Id sampler_type{};
73 Id sampler{}; 76 Id sampler_pointer_type{};
77 Id variable{};
74}; 78};
75 79
76struct StorageImage { 80struct StorageImage {
@@ -92,6 +96,12 @@ struct VertexIndices {
92 std::optional<u32> clip_distances; 96 std::optional<u32> clip_distances;
93}; 97};
94 98
99struct GenericVaryingDescription {
100 Id id = nullptr;
101 u32 first_element = 0;
102 bool is_scalar = false;
103};
104
95spv::Dim GetSamplerDim(const Sampler& sampler) { 105spv::Dim GetSamplerDim(const Sampler& sampler) {
96 ASSERT(!sampler.IsBuffer()); 106 ASSERT(!sampler.IsBuffer());
97 switch (sampler.GetType()) { 107 switch (sampler.GetType()) {
@@ -265,9 +275,13 @@ bool IsPrecise(Operation operand) {
265class SPIRVDecompiler final : public Sirit::Module { 275class SPIRVDecompiler final : public Sirit::Module {
266public: 276public:
267 explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderType stage, 277 explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderType stage,
268 const Specialization& specialization) 278 const Registry& registry, const Specialization& specialization)
269 : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()}, 279 : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()},
270 specialization{specialization} { 280 registry{registry}, specialization{specialization} {
281 if (stage != ShaderType::Compute) {
282 transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
283 }
284
271 AddCapability(spv::Capability::Shader); 285 AddCapability(spv::Capability::Shader);
272 AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess); 286 AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess);
273 AddCapability(spv::Capability::ImageQuery); 287 AddCapability(spv::Capability::ImageQuery);
@@ -285,6 +299,15 @@ public:
285 AddExtension("SPV_KHR_variable_pointers"); 299 AddExtension("SPV_KHR_variable_pointers");
286 AddExtension("SPV_KHR_shader_draw_parameters"); 300 AddExtension("SPV_KHR_shader_draw_parameters");
287 301
302 if (!transform_feedback.empty()) {
303 if (device.IsExtTransformFeedbackSupported()) {
304 AddCapability(spv::Capability::TransformFeedback);
305 } else {
306 LOG_ERROR(Render_Vulkan, "Shader requires transform feedbacks but these are not "
307 "supported on this device");
308 }
309 }
310
288 if (ir.UsesLayer() || ir.UsesViewportIndex()) { 311 if (ir.UsesLayer() || ir.UsesViewportIndex()) {
289 if (ir.UsesViewportIndex()) { 312 if (ir.UsesViewportIndex()) {
290 AddCapability(spv::Capability::MultiViewport); 313 AddCapability(spv::Capability::MultiViewport);
@@ -295,7 +318,7 @@ public:
295 } 318 }
296 } 319 }
297 320
298 if (device.IsShaderStorageImageReadWithoutFormatSupported()) { 321 if (device.IsFormatlessImageLoadSupported()) {
299 AddCapability(spv::Capability::StorageImageReadWithoutFormat); 322 AddCapability(spv::Capability::StorageImageReadWithoutFormat);
300 } 323 }
301 324
@@ -317,25 +340,29 @@ public:
317 AddExecutionMode(main, spv::ExecutionMode::OutputVertices, 340 AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
318 header.common2.threads_per_input_primitive); 341 header.common2.threads_per_input_primitive);
319 break; 342 break;
320 case ShaderType::TesselationEval: 343 case ShaderType::TesselationEval: {
344 const auto& info = registry.GetGraphicsInfo();
321 AddCapability(spv::Capability::Tessellation); 345 AddCapability(spv::Capability::Tessellation);
322 AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces); 346 AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces);
323 AddExecutionMode(main, GetExecutionMode(specialization.tessellation.primitive)); 347 AddExecutionMode(main, GetExecutionMode(info.tessellation_primitive));
324 AddExecutionMode(main, GetExecutionMode(specialization.tessellation.spacing)); 348 AddExecutionMode(main, GetExecutionMode(info.tessellation_spacing));
325 AddExecutionMode(main, specialization.tessellation.clockwise 349 AddExecutionMode(main, info.tessellation_clockwise
326 ? spv::ExecutionMode::VertexOrderCw 350 ? spv::ExecutionMode::VertexOrderCw
327 : spv::ExecutionMode::VertexOrderCcw); 351 : spv::ExecutionMode::VertexOrderCcw);
328 break; 352 break;
329 case ShaderType::Geometry: 353 }
354 case ShaderType::Geometry: {
355 const auto& info = registry.GetGraphicsInfo();
330 AddCapability(spv::Capability::Geometry); 356 AddCapability(spv::Capability::Geometry);
331 AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces); 357 AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces);
332 AddExecutionMode(main, GetExecutionMode(specialization.primitive_topology)); 358 AddExecutionMode(main, GetExecutionMode(info.primitive_topology));
333 AddExecutionMode(main, GetExecutionMode(header.common3.output_topology)); 359 AddExecutionMode(main, GetExecutionMode(header.common3.output_topology));
334 AddExecutionMode(main, spv::ExecutionMode::OutputVertices, 360 AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
335 header.common4.max_output_vertices); 361 header.common4.max_output_vertices);
336 // TODO(Rodrigo): Where can we get this info from? 362 // TODO(Rodrigo): Where can we get this info from?
337 AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U); 363 AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U);
338 break; 364 break;
365 }
339 case ShaderType::Fragment: 366 case ShaderType::Fragment:
340 AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces); 367 AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces);
341 AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); 368 AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft);
@@ -544,7 +571,8 @@ private:
544 if (stage != ShaderType::Geometry) { 571 if (stage != ShaderType::Geometry) {
545 return; 572 return;
546 } 573 }
547 const u32 num_input = GetNumPrimitiveTopologyVertices(specialization.primitive_topology); 574 const auto& info = registry.GetGraphicsInfo();
575 const u32 num_input = GetNumPrimitiveTopologyVertices(info.primitive_topology);
548 DeclareInputVertexArray(num_input); 576 DeclareInputVertexArray(num_input);
549 DeclareOutputVertex(); 577 DeclareOutputVertex();
550 } 578 }
@@ -741,12 +769,34 @@ private:
741 } 769 }
742 770
743 void DeclareOutputAttributes() { 771 void DeclareOutputAttributes() {
772 if (stage == ShaderType::Compute || stage == ShaderType::Fragment) {
773 return;
774 }
775
776 UNIMPLEMENTED_IF(registry.GetGraphicsInfo().tfb_enabled && stage != ShaderType::Vertex);
744 for (const auto index : ir.GetOutputAttributes()) { 777 for (const auto index : ir.GetOutputAttributes()) {
745 if (!IsGenericAttribute(index)) { 778 if (!IsGenericAttribute(index)) {
746 continue; 779 continue;
747 } 780 }
748 const u32 location = GetGenericAttributeLocation(index); 781 DeclareOutputAttribute(index);
749 Id type = t_float4; 782 }
783 }
784
785 void DeclareOutputAttribute(Attribute::Index index) {
786 static constexpr std::string_view swizzle = "xyzw";
787
788 const u32 location = GetGenericAttributeLocation(index);
789 u8 element = 0;
790 while (element < 4) {
791 const std::size_t remainder = 4 - element;
792
793 std::size_t num_components = remainder;
794 const std::optional tfb = GetTransformFeedbackInfo(index, element);
795 if (tfb) {
796 num_components = tfb->components;
797 }
798
799 Id type = GetTypeVectorDefinitionLut(Type::Float).at(num_components - 1);
750 Id varying_default = v_varying_default; 800 Id varying_default = v_varying_default;
751 if (IsOutputAttributeArray()) { 801 if (IsOutputAttributeArray()) {
752 const u32 num = GetNumOutputVertices(); 802 const u32 num = GetNumOutputVertices();
@@ -759,15 +809,47 @@ private:
759 } 809 }
760 type = TypePointer(spv::StorageClass::Output, type); 810 type = TypePointer(spv::StorageClass::Output, type);
761 811
812 std::string name = fmt::format("out_attr{}", location);
813 if (num_components < 4 || element > 0) {
814 name = fmt::format("{}_{}", name, swizzle.substr(element, num_components));
815 }
816
762 const Id id = OpVariable(type, spv::StorageClass::Output, varying_default); 817 const Id id = OpVariable(type, spv::StorageClass::Output, varying_default);
763 Name(AddGlobalVariable(id), fmt::format("out_attr{}", location)); 818 Name(AddGlobalVariable(id), name);
764 output_attributes.emplace(index, id); 819
820 GenericVaryingDescription description;
821 description.id = id;
822 description.first_element = element;
823 description.is_scalar = num_components == 1;
824 for (u32 i = 0; i < num_components; ++i) {
825 const u8 offset = static_cast<u8>(static_cast<u32>(index) * 4 + element + i);
826 output_attributes.emplace(offset, description);
827 }
765 interfaces.push_back(id); 828 interfaces.push_back(id);
766 829
767 Decorate(id, spv::Decoration::Location, location); 830 Decorate(id, spv::Decoration::Location, location);
831 if (element > 0) {
832 Decorate(id, spv::Decoration::Component, static_cast<u32>(element));
833 }
834 if (tfb && device.IsExtTransformFeedbackSupported()) {
835 Decorate(id, spv::Decoration::XfbBuffer, static_cast<u32>(tfb->buffer));
836 Decorate(id, spv::Decoration::XfbStride, static_cast<u32>(tfb->stride));
837 Decorate(id, spv::Decoration::Offset, static_cast<u32>(tfb->offset));
838 }
839
840 element = static_cast<u8>(static_cast<std::size_t>(element) + num_components);
768 } 841 }
769 } 842 }
770 843
844 std::optional<VaryingTFB> GetTransformFeedbackInfo(Attribute::Index index, u8 element = 0) {
845 const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
846 const auto it = transform_feedback.find(location);
847 if (it == transform_feedback.end()) {
848 return {};
849 }
850 return it->second;
851 }
852
771 u32 DeclareConstantBuffers(u32 binding) { 853 u32 DeclareConstantBuffers(u32 binding) {
772 for (const auto& [index, size] : ir.GetConstantBuffers()) { 854 for (const auto& [index, size] : ir.GetConstantBuffers()) {
773 const Id type = device.IsKhrUniformBufferStandardLayoutSupported() ? t_cbuf_scalar_ubo 855 const Id type = device.IsKhrUniformBufferStandardLayoutSupported() ? t_cbuf_scalar_ubo
@@ -833,16 +915,20 @@ private:
833 constexpr int sampled = 1; 915 constexpr int sampled = 1;
834 constexpr auto format = spv::ImageFormat::Unknown; 916 constexpr auto format = spv::ImageFormat::Unknown;
835 const Id image_type = TypeImage(t_float, dim, depth, arrayed, ms, sampled, format); 917 const Id image_type = TypeImage(t_float, dim, depth, arrayed, ms, sampled, format);
836 const Id sampled_image_type = TypeSampledImage(image_type); 918 const Id sampler_type = TypeSampledImage(image_type);
837 const Id pointer_type = 919 const Id sampler_pointer_type =
838 TypePointer(spv::StorageClass::UniformConstant, sampled_image_type); 920 TypePointer(spv::StorageClass::UniformConstant, sampler_type);
921 const Id type = sampler.IsIndexed()
922 ? TypeArray(sampler_type, Constant(t_uint, sampler.Size()))
923 : sampler_type;
924 const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, type);
839 const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant); 925 const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
840 AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.GetIndex()))); 926 AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.GetIndex())));
841 Decorate(id, spv::Decoration::Binding, binding++); 927 Decorate(id, spv::Decoration::Binding, binding++);
842 Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); 928 Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
843 929
844 sampled_images.emplace(sampler.GetIndex(), 930 sampled_images.emplace(sampler.GetIndex(), SampledImage{image_type, sampler_type,
845 SampledImage{image_type, sampled_image_type, id}); 931 sampler_pointer_type, id});
846 } 932 }
847 return binding; 933 return binding;
848 } 934 }
@@ -893,7 +979,7 @@ private:
893 u32 GetNumInputVertices() const { 979 u32 GetNumInputVertices() const {
894 switch (stage) { 980 switch (stage) {
895 case ShaderType::Geometry: 981 case ShaderType::Geometry:
896 return GetNumPrimitiveTopologyVertices(specialization.primitive_topology); 982 return GetNumPrimitiveTopologyVertices(registry.GetGraphicsInfo().primitive_topology);
897 case ShaderType::TesselationControl: 983 case ShaderType::TesselationControl:
898 case ShaderType::TesselationEval: 984 case ShaderType::TesselationEval:
899 return NumInputPatches; 985 return NumInputPatches;
@@ -1341,8 +1427,14 @@ private:
1341 } 1427 }
1342 default: 1428 default:
1343 if (IsGenericAttribute(attribute)) { 1429 if (IsGenericAttribute(attribute)) {
1344 const Id composite = output_attributes.at(attribute); 1430 const u8 offset = static_cast<u8>(static_cast<u8>(attribute) * 4 + element);
1345 return {ArrayPass(t_out_float, composite, {element}), Type::Float}; 1431 const GenericVaryingDescription description = output_attributes.at(offset);
1432 const Id composite = description.id;
1433 std::vector<u32> indices;
1434 if (!description.is_scalar) {
1435 indices.push_back(element - description.first_element);
1436 }
1437 return {ArrayPass(t_out_float, composite, indices), Type::Float};
1346 } 1438 }
1347 UNIMPLEMENTED_MSG("Unhandled output attribute: {}", 1439 UNIMPLEMENTED_MSG("Unhandled output attribute: {}",
1348 static_cast<u32>(attribute)); 1440 static_cast<u32>(attribute));
@@ -1525,7 +1617,12 @@ private:
1525 ASSERT(!meta.sampler.IsBuffer()); 1617 ASSERT(!meta.sampler.IsBuffer());
1526 1618
1527 const auto& entry = sampled_images.at(meta.sampler.GetIndex()); 1619 const auto& entry = sampled_images.at(meta.sampler.GetIndex());
1528 return OpLoad(entry.sampled_image_type, entry.sampler); 1620 Id sampler = entry.variable;
1621 if (meta.sampler.IsIndexed()) {
1622 const Id index = AsInt(Visit(meta.index));
1623 sampler = OpAccessChain(entry.sampler_pointer_type, sampler, index);
1624 }
1625 return OpLoad(entry.sampler_type, sampler);
1529 } 1626 }
1530 1627
1531 Id GetTextureImage(Operation operation) { 1628 Id GetTextureImage(Operation operation) {
@@ -1783,7 +1880,7 @@ private:
1783 } 1880 }
1784 1881
1785 Expression ImageLoad(Operation operation) { 1882 Expression ImageLoad(Operation operation) {
1786 if (!device.IsShaderStorageImageReadWithoutFormatSupported()) { 1883 if (!device.IsFormatlessImageLoadSupported()) {
1787 return {v_float_zero, Type::Float}; 1884 return {v_float_zero, Type::Float};
1788 } 1885 }
1789 1886
@@ -2211,16 +2308,14 @@ private:
2211 switch (specialization.attribute_types.at(location)) { 2308 switch (specialization.attribute_types.at(location)) {
2212 case Maxwell::VertexAttribute::Type::SignedNorm: 2309 case Maxwell::VertexAttribute::Type::SignedNorm:
2213 case Maxwell::VertexAttribute::Type::UnsignedNorm: 2310 case Maxwell::VertexAttribute::Type::UnsignedNorm:
2311 case Maxwell::VertexAttribute::Type::UnsignedScaled:
2312 case Maxwell::VertexAttribute::Type::SignedScaled:
2214 case Maxwell::VertexAttribute::Type::Float: 2313 case Maxwell::VertexAttribute::Type::Float:
2215 return {Type::Float, t_in_float, t_in_float4}; 2314 return {Type::Float, t_in_float, t_in_float4};
2216 case Maxwell::VertexAttribute::Type::SignedInt: 2315 case Maxwell::VertexAttribute::Type::SignedInt:
2217 return {Type::Int, t_in_int, t_in_int4}; 2316 return {Type::Int, t_in_int, t_in_int4};
2218 case Maxwell::VertexAttribute::Type::UnsignedInt: 2317 case Maxwell::VertexAttribute::Type::UnsignedInt:
2219 return {Type::Uint, t_in_uint, t_in_uint4}; 2318 return {Type::Uint, t_in_uint, t_in_uint4};
2220 case Maxwell::VertexAttribute::Type::UnsignedScaled:
2221 case Maxwell::VertexAttribute::Type::SignedScaled:
2222 UNIMPLEMENTED();
2223 return {Type::Float, t_in_float, t_in_float4};
2224 default: 2319 default:
2225 UNREACHABLE(); 2320 UNREACHABLE();
2226 return {Type::Float, t_in_float, t_in_float4}; 2321 return {Type::Float, t_in_float, t_in_float4};
@@ -2250,11 +2345,11 @@ private:
2250 std::array<Id, 4> GetTypeVectorDefinitionLut(Type type) const { 2345 std::array<Id, 4> GetTypeVectorDefinitionLut(Type type) const {
2251 switch (type) { 2346 switch (type) {
2252 case Type::Float: 2347 case Type::Float:
2253 return {nullptr, t_float2, t_float3, t_float4}; 2348 return {t_float, t_float2, t_float3, t_float4};
2254 case Type::Int: 2349 case Type::Int:
2255 return {nullptr, t_int2, t_int3, t_int4}; 2350 return {t_int, t_int2, t_int3, t_int4};
2256 case Type::Uint: 2351 case Type::Uint:
2257 return {nullptr, t_uint2, t_uint3, t_uint4}; 2352 return {t_uint, t_uint2, t_uint3, t_uint4};
2258 default: 2353 default:
2259 UNIMPLEMENTED(); 2354 UNIMPLEMENTED();
2260 return {}; 2355 return {};
@@ -2487,7 +2582,9 @@ private:
2487 const ShaderIR& ir; 2582 const ShaderIR& ir;
2488 const ShaderType stage; 2583 const ShaderType stage;
2489 const Tegra::Shader::Header header; 2584 const Tegra::Shader::Header header;
2585 const Registry& registry;
2490 const Specialization& specialization; 2586 const Specialization& specialization;
2587 std::unordered_map<u8, VaryingTFB> transform_feedback;
2491 2588
2492 const Id t_void = Name(TypeVoid(), "void"); 2589 const Id t_void = Name(TypeVoid(), "void");
2493 2590
@@ -2576,7 +2673,7 @@ private:
2576 Id shared_memory{}; 2673 Id shared_memory{};
2577 std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{}; 2674 std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{};
2578 std::map<Attribute::Index, Id> input_attributes; 2675 std::map<Attribute::Index, Id> input_attributes;
2579 std::map<Attribute::Index, Id> output_attributes; 2676 std::unordered_map<u8, GenericVaryingDescription> output_attributes;
2580 std::map<u32, Id> constant_buffers; 2677 std::map<u32, Id> constant_buffers;
2581 std::map<GlobalMemoryBase, Id> global_buffers; 2678 std::map<GlobalMemoryBase, Id> global_buffers;
2582 std::map<u32, TexelBuffer> texel_buffers; 2679 std::map<u32, TexelBuffer> texel_buffers;
@@ -2862,8 +2959,9 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
2862} 2959}
2863 2960
2864std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, 2961std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
2865 ShaderType stage, const Specialization& specialization) { 2962 ShaderType stage, const VideoCommon::Shader::Registry& registry,
2866 return SPIRVDecompiler(device, ir, stage, specialization).Assemble(); 2963 const Specialization& specialization) {
2964 return SPIRVDecompiler(device, ir, stage, registry, specialization).Assemble();
2867} 2965}
2868 2966
2869} // namespace Vulkan 2967} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
index f5dc14d9e..ffea4709e 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -15,6 +15,7 @@
15#include "common/common_types.h" 15#include "common/common_types.h"
16#include "video_core/engines/maxwell_3d.h" 16#include "video_core/engines/maxwell_3d.h"
17#include "video_core/engines/shader_type.h" 17#include "video_core/engines/shader_type.h"
18#include "video_core/shader/registry.h"
18#include "video_core/shader/shader_ir.h" 19#include "video_core/shader/shader_ir.h"
19 20
20namespace Vulkan { 21namespace Vulkan {
@@ -91,17 +92,9 @@ struct Specialization final {
91 u32 shared_memory_size{}; 92 u32 shared_memory_size{};
92 93
93 // Graphics specific 94 // Graphics specific
94 Maxwell::PrimitiveTopology primitive_topology{};
95 std::optional<float> point_size{}; 95 std::optional<float> point_size{};
96 std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{}; 96 std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
97 bool ndc_minus_one_to_one{}; 97 bool ndc_minus_one_to_one{};
98
99 // Tessellation specific
100 struct {
101 Maxwell::TessellationPrimitive primitive{};
102 Maxwell::TessellationSpacing spacing{};
103 bool clockwise{};
104 } tessellation;
105}; 98};
106// Old gcc versions don't consider this trivially copyable. 99// Old gcc versions don't consider this trivially copyable.
107// static_assert(std::is_trivially_copyable_v<Specialization>); 100// static_assert(std::is_trivially_copyable_v<Specialization>);
@@ -114,6 +107,8 @@ struct SPIRVShader {
114ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir); 107ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir);
115 108
116std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, 109std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
117 Tegra::Engines::ShaderType stage, const Specialization& specialization); 110 Tegra::Engines::ShaderType stage,
111 const VideoCommon::Shader::Registry& registry,
112 const Specialization& specialization);
118 113
119} // namespace Vulkan 114} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index 171d78afc..374959f82 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -73,7 +73,8 @@ VKBuffer* VKStagingBufferPool::TryGetReservedBuffer(std::size_t size, bool host_
73VKBuffer& VKStagingBufferPool::CreateStagingBuffer(std::size_t size, bool host_visible) { 73VKBuffer& VKStagingBufferPool::CreateStagingBuffer(std::size_t size, bool host_visible) {
74 const auto usage = 74 const auto usage =
75 vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst | 75 vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst |
76 vk::BufferUsageFlagBits::eStorageBuffer | vk::BufferUsageFlagBits::eIndexBuffer; 76 vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer |
77 vk::BufferUsageFlagBits::eIndexBuffer;
77 const u32 log2 = Common::Log2Ceil64(size); 78 const u32 log2 = Common::Log2Ceil64(size);
78 const vk::BufferCreateInfo buffer_ci({}, 1ULL << log2, usage, vk::SharingMode::eExclusive, 0, 79 const vk::BufferCreateInfo buffer_ci({}, 1ULL << log2, usage, vk::SharingMode::eExclusive, 0,
79 nullptr); 80 nullptr);
@@ -99,7 +100,6 @@ void VKStagingBufferPool::ReleaseCache(bool host_visible) {
99} 100}
100 101
101u64 VKStagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, std::size_t log2) { 102u64 VKStagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, std::size_t log2) {
102 static constexpr u64 epochs_to_destroy = 180;
103 static constexpr std::size_t deletions_per_tick = 16; 103 static constexpr std::size_t deletions_per_tick = 16;
104 104
105 auto& staging = cache[log2]; 105 auto& staging = cache[log2];
@@ -107,6 +107,7 @@ u64 VKStagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, std::size_t lo
107 const std::size_t old_size = entries.size(); 107 const std::size_t old_size = entries.size();
108 108
109 const auto is_deleteable = [this](const auto& entry) { 109 const auto is_deleteable = [this](const auto& entry) {
110 static constexpr u64 epochs_to_destroy = 180;
110 return entry.last_epoch + epochs_to_destroy < epoch && !entry.watch.IsUsed(); 111 return entry.last_epoch + epochs_to_destroy < epoch && !entry.watch.IsUsed();
111 }; 112 };
112 const std::size_t begin_offset = staging.delete_index; 113 const std::size_t begin_offset = staging.delete_index;
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
new file mode 100644
index 000000000..94a89e388
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
@@ -0,0 +1,99 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <cstddef>
7#include <iterator>
8
9#include "common/common_types.h"
10#include "core/core.h"
11#include "video_core/dirty_flags.h"
12#include "video_core/engines/maxwell_3d.h"
13#include "video_core/gpu.h"
14#include "video_core/renderer_vulkan/vk_state_tracker.h"
15
16#define OFF(field_name) MAXWELL3D_REG_INDEX(field_name)
17#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / sizeof(u32))
18
19namespace Vulkan {
20
21namespace {
22
23using namespace Dirty;
24using namespace VideoCommon::Dirty;
25using Tegra::Engines::Maxwell3D;
26using Regs = Maxwell3D::Regs;
27using Tables = Maxwell3D::DirtyState::Tables;
28using Table = Maxwell3D::DirtyState::Table;
29using Flags = Maxwell3D::DirtyState::Flags;
30
31Flags MakeInvalidationFlags() {
32 Flags flags{};
33 flags[Viewports] = true;
34 flags[Scissors] = true;
35 flags[DepthBias] = true;
36 flags[BlendConstants] = true;
37 flags[DepthBounds] = true;
38 flags[StencilProperties] = true;
39 return flags;
40}
41
42void SetupDirtyViewports(Tables& tables) {
43 FillBlock(tables[0], OFF(viewport_transform), NUM(viewport_transform), Viewports);
44 FillBlock(tables[0], OFF(viewports), NUM(viewports), Viewports);
45 tables[0][OFF(viewport_transform_enabled)] = Viewports;
46}
47
48void SetupDirtyScissors(Tables& tables) {
49 FillBlock(tables[0], OFF(scissor_test), NUM(scissor_test), Scissors);
50}
51
52void SetupDirtyDepthBias(Tables& tables) {
53 auto& table = tables[0];
54 table[OFF(polygon_offset_units)] = DepthBias;
55 table[OFF(polygon_offset_clamp)] = DepthBias;
56 table[OFF(polygon_offset_factor)] = DepthBias;
57}
58
59void SetupDirtyBlendConstants(Tables& tables) {
60 FillBlock(tables[0], OFF(blend_color), NUM(blend_color), BlendConstants);
61}
62
63void SetupDirtyDepthBounds(Tables& tables) {
64 FillBlock(tables[0], OFF(depth_bounds), NUM(depth_bounds), DepthBounds);
65}
66
67void SetupDirtyStencilProperties(Tables& tables) {
68 auto& table = tables[0];
69 table[OFF(stencil_two_side_enable)] = StencilProperties;
70 table[OFF(stencil_front_func_ref)] = StencilProperties;
71 table[OFF(stencil_front_mask)] = StencilProperties;
72 table[OFF(stencil_front_func_mask)] = StencilProperties;
73 table[OFF(stencil_back_func_ref)] = StencilProperties;
74 table[OFF(stencil_back_mask)] = StencilProperties;
75 table[OFF(stencil_back_func_mask)] = StencilProperties;
76}
77
78} // Anonymous namespace
79
80StateTracker::StateTracker(Core::System& system)
81 : system{system}, invalidation_flags{MakeInvalidationFlags()} {}
82
83void StateTracker::Initialize() {
84 auto& dirty = system.GPU().Maxwell3D().dirty;
85 auto& tables = dirty.tables;
86 SetupDirtyRenderTargets(tables);
87 SetupDirtyViewports(tables);
88 SetupDirtyScissors(tables);
89 SetupDirtyDepthBias(tables);
90 SetupDirtyBlendConstants(tables);
91 SetupDirtyDepthBounds(tables);
92 SetupDirtyStencilProperties(tables);
93}
94
95void StateTracker::InvalidateCommandBufferState() {
96 system.GPU().Maxwell3D().dirty.flags |= invalidation_flags;
97}
98
99} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h
new file mode 100644
index 000000000..03bc415b2
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.h
@@ -0,0 +1,79 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstddef>
8#include <limits>
9
10#include "common/common_types.h"
11#include "core/core.h"
12#include "video_core/dirty_flags.h"
13#include "video_core/engines/maxwell_3d.h"
14
15namespace Vulkan {
16
17namespace Dirty {
18
19enum : u8 {
20 First = VideoCommon::Dirty::LastCommonEntry,
21
22 Viewports,
23 Scissors,
24 DepthBias,
25 BlendConstants,
26 DepthBounds,
27 StencilProperties,
28
29 Last
30};
31static_assert(Last <= std::numeric_limits<u8>::max());
32
33} // namespace Dirty
34
35class StateTracker {
36public:
37 explicit StateTracker(Core::System& system);
38
39 void Initialize();
40
41 void InvalidateCommandBufferState();
42
43 bool TouchViewports() {
44 return Exchange(Dirty::Viewports, false);
45 }
46
47 bool TouchScissors() {
48 return Exchange(Dirty::Scissors, false);
49 }
50
51 bool TouchDepthBias() {
52 return Exchange(Dirty::DepthBias, false);
53 }
54
55 bool TouchBlendConstants() {
56 return Exchange(Dirty::BlendConstants, false);
57 }
58
59 bool TouchDepthBounds() {
60 return Exchange(Dirty::DepthBounds, false);
61 }
62
63 bool TouchStencilProperties() {
64 return Exchange(Dirty::StencilProperties, false);
65 }
66
67private:
68 bool Exchange(std::size_t id, bool new_value) const noexcept {
69 auto& flags = system.GPU().Maxwell3D().dirty.flags;
70 const bool is_dirty = flags[id];
71 flags[id] = new_value;
72 return is_dirty;
73 }
74
75 Core::System& system;
76 Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags;
77};
78
79} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
index f47b691a8..9e73fa9cd 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -141,11 +141,6 @@ void VKSwapchain::CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities
141 141
142 const vk::SurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats, srgb)}; 142 const vk::SurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats, srgb)};
143 const vk::PresentModeKHR present_mode{ChooseSwapPresentMode(present_modes)}; 143 const vk::PresentModeKHR present_mode{ChooseSwapPresentMode(present_modes)};
144 extent = ChooseSwapExtent(capabilities, width, height);
145
146 current_width = extent.width;
147 current_height = extent.height;
148 current_srgb = srgb;
149 144
150 u32 requested_image_count{capabilities.minImageCount + 1}; 145 u32 requested_image_count{capabilities.minImageCount + 1};
151 if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) { 146 if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) {
@@ -153,10 +148,9 @@ void VKSwapchain::CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities
153 } 148 }
154 149
155 vk::SwapchainCreateInfoKHR swapchain_ci( 150 vk::SwapchainCreateInfoKHR swapchain_ci(
156 {}, surface, requested_image_count, surface_format.format, surface_format.colorSpace, 151 {}, surface, requested_image_count, surface_format.format, surface_format.colorSpace, {}, 1,
157 extent, 1, vk::ImageUsageFlagBits::eColorAttachment, {}, {}, {}, 152 vk::ImageUsageFlagBits::eColorAttachment, {}, {}, {}, capabilities.currentTransform,
158 capabilities.currentTransform, vk::CompositeAlphaFlagBitsKHR::eOpaque, present_mode, false, 153 vk::CompositeAlphaFlagBitsKHR::eOpaque, present_mode, false, {});
159 {});
160 154
161 const u32 graphics_family{device.GetGraphicsFamily()}; 155 const u32 graphics_family{device.GetGraphicsFamily()};
162 const u32 present_family{device.GetPresentFamily()}; 156 const u32 present_family{device.GetPresentFamily()};
@@ -169,9 +163,18 @@ void VKSwapchain::CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities
169 swapchain_ci.imageSharingMode = vk::SharingMode::eExclusive; 163 swapchain_ci.imageSharingMode = vk::SharingMode::eExclusive;
170 } 164 }
171 165
166 // Request the size again to reduce the possibility of a TOCTOU race condition.
167 const auto updated_capabilities = physical_device.getSurfaceCapabilitiesKHR(surface, dld);
168 swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height);
169 // Don't add code within this and the swapchain creation.
172 const auto dev{device.GetLogical()}; 170 const auto dev{device.GetLogical()};
173 swapchain = dev.createSwapchainKHRUnique(swapchain_ci, nullptr, dld); 171 swapchain = dev.createSwapchainKHRUnique(swapchain_ci, nullptr, dld);
174 172
173 extent = swapchain_ci.imageExtent;
174 current_width = extent.width;
175 current_height = extent.height;
176 current_srgb = srgb;
177
175 images = dev.getSwapchainImagesKHR(*swapchain, dld); 178 images = dev.getSwapchainImagesKHR(*swapchain, dld);
176 image_count = static_cast<u32>(images.size()); 179 image_count = static_cast<u32>(images.size());
177 image_format = surface_format.format; 180 image_format = surface_format.format;
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 51b0d38a6..26175921b 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -22,6 +22,7 @@
22#include "video_core/renderer_vulkan/vk_device.h" 22#include "video_core/renderer_vulkan/vk_device.h"
23#include "video_core/renderer_vulkan/vk_memory_manager.h" 23#include "video_core/renderer_vulkan/vk_memory_manager.h"
24#include "video_core/renderer_vulkan/vk_rasterizer.h" 24#include "video_core/renderer_vulkan/vk_rasterizer.h"
25#include "video_core/renderer_vulkan/vk_scheduler.h"
25#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 26#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
26#include "video_core/renderer_vulkan/vk_texture_cache.h" 27#include "video_core/renderer_vulkan/vk_texture_cache.h"
27#include "video_core/surface.h" 28#include "video_core/surface.h"
@@ -51,6 +52,9 @@ vk::ImageType SurfaceTargetToImage(SurfaceTarget target) {
51 return vk::ImageType::e2D; 52 return vk::ImageType::e2D;
52 case SurfaceTarget::Texture3D: 53 case SurfaceTarget::Texture3D:
53 return vk::ImageType::e3D; 54 return vk::ImageType::e3D;
55 case SurfaceTarget::TextureBuffer:
56 UNREACHABLE();
57 return {};
54 } 58 }
55 UNREACHABLE_MSG("Unknown texture target={}", static_cast<u32>(target)); 59 UNREACHABLE_MSG("Unknown texture target={}", static_cast<u32>(target));
56 return {}; 60 return {};
@@ -272,7 +276,6 @@ void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) {
272 276
273 for (u32 level = 0; level < params.num_levels; ++level) { 277 for (u32 level = 0; level < params.num_levels; ++level) {
274 vk::BufferImageCopy copy = GetBufferImageCopy(level); 278 vk::BufferImageCopy copy = GetBufferImageCopy(level);
275 const auto& dld = device.GetDispatchLoader();
276 if (image->GetAspectMask() == 279 if (image->GetAspectMask() ==
277 (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil)) { 280 (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil)) {
278 vk::BufferImageCopy depth = copy; 281 vk::BufferImageCopy depth = copy;
@@ -421,7 +424,6 @@ void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface,
421 dst_base_layer, num_layers, copy_params.dest_level, 1, vk::PipelineStageFlagBits::eTransfer, 424 dst_base_layer, num_layers, copy_params.dest_level, 1, vk::PipelineStageFlagBits::eTransfer,
422 vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eTransferDstOptimal); 425 vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eTransferDstOptimal);
423 426
424 const auto& dld{device.GetDispatchLoader()};
425 const vk::ImageSubresourceLayers src_subresource( 427 const vk::ImageSubresourceLayers src_subresource(
426 src_surface->GetAspectMask(), copy_params.source_level, copy_params.source_z, num_layers); 428 src_surface->GetAspectMask(), copy_params.source_level, copy_params.source_z, num_layers);
427 const vk::ImageSubresourceLayers dst_subresource( 429 const vk::ImageSubresourceLayers dst_subresource(
@@ -457,7 +459,6 @@ void VKTextureCache::ImageBlit(View& src_view, View& dst_view,
457 dst_view->GetImageSubresourceLayers(), {dst_top_left, dst_bot_right}); 459 dst_view->GetImageSubresourceLayers(), {dst_top_left, dst_bot_right});
458 const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear; 460 const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear;
459 461
460 const auto& dld{device.GetDispatchLoader()};
461 scheduler.Record([src_image = src_view->GetImage(), dst_image = dst_view->GetImage(), blit, 462 scheduler.Record([src_image = src_view->GetImage(), dst_image = dst_view->GetImage(), blit,
462 is_linear](auto cmdbuf, auto& dld) { 463 is_linear](auto cmdbuf, auto& dld) {
463 cmdbuf.blitImage(src_image, vk::ImageLayout::eTransferSrcOptimal, dst_image, 464 cmdbuf.blitImage(src_image, vk::ImageLayout::eTransferSrcOptimal, dst_image,
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index d3edbe80c..22e3d34de 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -151,6 +151,10 @@ public:
151 return params.GetMipHeight(base_level); 151 return params.GetMipHeight(base_level);
152 } 152 }
153 153
154 u32 GetNumLayers() const {
155 return num_layers;
156 }
157
154 bool IsBufferView() const { 158 bool IsBufferView() const {
155 return buffer_view; 159 return buffer_view;
156 } 160 }
diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp
deleted file mode 100644
index 0638be8cb..000000000
--- a/src/video_core/shader/const_buffer_locker.cpp
+++ /dev/null
@@ -1,126 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <tuple>
7
8#include "common/common_types.h"
9#include "video_core/engines/maxwell_3d.h"
10#include "video_core/engines/shader_type.h"
11#include "video_core/shader/const_buffer_locker.h"
12
13namespace VideoCommon::Shader {
14
15using Tegra::Engines::SamplerDescriptor;
16
17ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage)
18 : stage{shader_stage} {}
19
20ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage,
21 Tegra::Engines::ConstBufferEngineInterface& engine)
22 : stage{shader_stage}, engine{&engine} {}
23
24ConstBufferLocker::~ConstBufferLocker() = default;
25
26std::optional<u32> ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) {
27 const std::pair<u32, u32> key = {buffer, offset};
28 const auto iter = keys.find(key);
29 if (iter != keys.end()) {
30 return iter->second;
31 }
32 if (!engine) {
33 return std::nullopt;
34 }
35 const u32 value = engine->AccessConstBuffer32(stage, buffer, offset);
36 keys.emplace(key, value);
37 return value;
38}
39
40std::optional<SamplerDescriptor> ConstBufferLocker::ObtainBoundSampler(u32 offset) {
41 const u32 key = offset;
42 const auto iter = bound_samplers.find(key);
43 if (iter != bound_samplers.end()) {
44 return iter->second;
45 }
46 if (!engine) {
47 return std::nullopt;
48 }
49 const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset);
50 bound_samplers.emplace(key, value);
51 return value;
52}
53
54std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindlessSampler(
55 u32 buffer, u32 offset) {
56 const std::pair key = {buffer, offset};
57 const auto iter = bindless_samplers.find(key);
58 if (iter != bindless_samplers.end()) {
59 return iter->second;
60 }
61 if (!engine) {
62 return std::nullopt;
63 }
64 const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset);
65 bindless_samplers.emplace(key, value);
66 return value;
67}
68
69std::optional<u32> ConstBufferLocker::ObtainBoundBuffer() {
70 if (bound_buffer_saved) {
71 return bound_buffer;
72 }
73 if (!engine) {
74 return std::nullopt;
75 }
76 bound_buffer_saved = true;
77 bound_buffer = engine->GetBoundBuffer();
78 return bound_buffer;
79}
80
81void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) {
82 keys.insert_or_assign({buffer, offset}, value);
83}
84
85void ConstBufferLocker::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) {
86 bound_samplers.insert_or_assign(offset, sampler);
87}
88
89void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) {
90 bindless_samplers.insert_or_assign({buffer, offset}, sampler);
91}
92
93void ConstBufferLocker::SetBoundBuffer(u32 buffer) {
94 bound_buffer_saved = true;
95 bound_buffer = buffer;
96}
97
98bool ConstBufferLocker::IsConsistent() const {
99 if (!engine) {
100 return false;
101 }
102 return std::all_of(keys.begin(), keys.end(),
103 [this](const auto& pair) {
104 const auto [cbuf, offset] = pair.first;
105 const auto value = pair.second;
106 return value == engine->AccessConstBuffer32(stage, cbuf, offset);
107 }) &&
108 std::all_of(bound_samplers.begin(), bound_samplers.end(),
109 [this](const auto& sampler) {
110 const auto [key, value] = sampler;
111 return value == engine->AccessBoundSampler(stage, key);
112 }) &&
113 std::all_of(bindless_samplers.begin(), bindless_samplers.end(),
114 [this](const auto& sampler) {
115 const auto [cbuf, offset] = sampler.first;
116 const auto value = sampler.second;
117 return value == engine->AccessBindlessSampler(stage, cbuf, offset);
118 });
119}
120
121bool ConstBufferLocker::HasEqualKeys(const ConstBufferLocker& rhs) const {
122 return std::tie(keys, bound_samplers, bindless_samplers) ==
123 std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers);
124}
125
126} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h
deleted file mode 100644
index d3ea11087..000000000
--- a/src/video_core/shader/const_buffer_locker.h
+++ /dev/null
@@ -1,103 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <optional>
8#include <unordered_map>
9#include "common/common_types.h"
10#include "common/hash.h"
11#include "video_core/engines/const_buffer_engine_interface.h"
12#include "video_core/engines/shader_type.h"
13#include "video_core/guest_driver.h"
14
15namespace VideoCommon::Shader {
16
17using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
18using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
19using BindlessSamplerMap =
20 std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
21
22/**
23 * The ConstBufferLocker is a class use to interface the 3D and compute engines with the shader
24 * compiler. with it, the shader can obtain required data from GPU state and store it for disk
25 * shader compilation.
26 */
27class ConstBufferLocker {
28public:
29 explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage);
30
31 explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage,
32 Tegra::Engines::ConstBufferEngineInterface& engine);
33
34 ~ConstBufferLocker();
35
36 /// Retrieves a key from the locker, if it's registered, it will give the registered value, if
37 /// not it will obtain it from maxwell3d and register it.
38 std::optional<u32> ObtainKey(u32 buffer, u32 offset);
39
40 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
41
42 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
43
44 std::optional<u32> ObtainBoundBuffer();
45
46 /// Inserts a key.
47 void InsertKey(u32 buffer, u32 offset, u32 value);
48
49 /// Inserts a bound sampler key.
50 void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler);
51
52 /// Inserts a bindless sampler key.
53 void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
54
55 /// Set the bound buffer for this locker.
56 void SetBoundBuffer(u32 buffer);
57
58 /// Checks keys and samplers against engine's current const buffers. Returns true if they are
59 /// the same value, false otherwise;
60 bool IsConsistent() const;
61
62 /// Returns true if the keys are equal to the other ones in the locker.
63 bool HasEqualKeys(const ConstBufferLocker& rhs) const;
64
65 /// Gives an getter to the const buffer keys in the database.
66 const KeyMap& GetKeys() const {
67 return keys;
68 }
69
70 /// Gets samplers database.
71 const BoundSamplerMap& GetBoundSamplers() const {
72 return bound_samplers;
73 }
74
75 /// Gets bindless samplers database.
76 const BindlessSamplerMap& GetBindlessSamplers() const {
77 return bindless_samplers;
78 }
79
80 /// Gets bound buffer used on this shader
81 u32 GetBoundBuffer() const {
82 return bound_buffer;
83 }
84
85 /// Obtains access to the guest driver's profile.
86 VideoCore::GuestDriverProfile* AccessGuestDriverProfile() const {
87 if (engine) {
88 return &engine->AccessGuestDriverProfile();
89 }
90 return nullptr;
91 }
92
93private:
94 const Tegra::Engines::ShaderType stage;
95 Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
96 KeyMap keys;
97 BoundSamplerMap bound_samplers;
98 BindlessSamplerMap bindless_samplers;
99 bool bound_buffer_saved{};
100 u32 bound_buffer{};
101};
102
103} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index 0229733b6..2e2711350 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -13,6 +13,7 @@
13#include "common/common_types.h" 13#include "common/common_types.h"
14#include "video_core/shader/ast.h" 14#include "video_core/shader/ast.h"
15#include "video_core/shader/control_flow.h" 15#include "video_core/shader/control_flow.h"
16#include "video_core/shader/registry.h"
16#include "video_core/shader/shader_ir.h" 17#include "video_core/shader/shader_ir.h"
17 18
18namespace VideoCommon::Shader { 19namespace VideoCommon::Shader {
@@ -64,11 +65,11 @@ struct BlockInfo {
64}; 65};
65 66
66struct CFGRebuildState { 67struct CFGRebuildState {
67 explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker) 68 explicit CFGRebuildState(const ProgramCode& program_code, u32 start, Registry& registry)
68 : program_code{program_code}, locker{locker}, start{start} {} 69 : program_code{program_code}, registry{registry}, start{start} {}
69 70
70 const ProgramCode& program_code; 71 const ProgramCode& program_code;
71 ConstBufferLocker& locker; 72 Registry& registry;
72 u32 start{}; 73 u32 start{};
73 std::vector<BlockInfo> block_info; 74 std::vector<BlockInfo> block_info;
74 std::list<u32> inspect_queries; 75 std::list<u32> inspect_queries;
@@ -438,7 +439,7 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
438 const s32 pc_target = offset + result.relative_position; 439 const s32 pc_target = offset + result.relative_position;
439 std::vector<CaseBranch> branches; 440 std::vector<CaseBranch> branches;
440 for (u32 i = 0; i < result.entries; i++) { 441 for (u32 i = 0; i < result.entries; i++) {
441 auto key = state.locker.ObtainKey(result.buffer, result.offset + i * 4); 442 auto key = state.registry.ObtainKey(result.buffer, result.offset + i * 4);
442 if (!key) { 443 if (!key) {
443 return {ParseResult::AbnormalFlow, parse_info}; 444 return {ParseResult::AbnormalFlow, parse_info};
444 } 445 }
@@ -656,14 +657,14 @@ void DecompileShader(CFGRebuildState& state) {
656 657
657std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, 658std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
658 const CompilerSettings& settings, 659 const CompilerSettings& settings,
659 ConstBufferLocker& locker) { 660 Registry& registry) {
660 auto result_out = std::make_unique<ShaderCharacteristics>(); 661 auto result_out = std::make_unique<ShaderCharacteristics>();
661 if (settings.depth == CompileDepth::BruteForce) { 662 if (settings.depth == CompileDepth::BruteForce) {
662 result_out->settings.depth = CompileDepth::BruteForce; 663 result_out->settings.depth = CompileDepth::BruteForce;
663 return result_out; 664 return result_out;
664 } 665 }
665 666
666 CFGRebuildState state{program_code, start_address, locker}; 667 CFGRebuildState state{program_code, start_address, registry};
667 // Inspect Code and generate blocks 668 // Inspect Code and generate blocks
668 state.labels.clear(); 669 state.labels.clear();
669 state.labels.emplace(start_address); 670 state.labels.emplace(start_address);
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h
index 5304998b9..62a3510d8 100644
--- a/src/video_core/shader/control_flow.h
+++ b/src/video_core/shader/control_flow.h
@@ -12,6 +12,7 @@
12#include "video_core/engines/shader_bytecode.h" 12#include "video_core/engines/shader_bytecode.h"
13#include "video_core/shader/ast.h" 13#include "video_core/shader/ast.h"
14#include "video_core/shader/compiler_settings.h" 14#include "video_core/shader/compiler_settings.h"
15#include "video_core/shader/registry.h"
15#include "video_core/shader/shader_ir.h" 16#include "video_core/shader/shader_ir.h"
16 17
17namespace VideoCommon::Shader { 18namespace VideoCommon::Shader {
@@ -111,6 +112,6 @@ struct ShaderCharacteristics {
111 112
112std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, 113std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
113 const CompilerSettings& settings, 114 const CompilerSettings& settings,
114 ConstBufferLocker& locker); 115 Registry& registry);
115 116
116} // namespace VideoCommon::Shader 117} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 6b697ed5d..87ac9ac6c 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -34,13 +34,9 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
34 return (absolute_offset % SchedPeriod) == 0; 34 return (absolute_offset % SchedPeriod) == 0;
35} 35}
36 36
37void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver, 37void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver,
38 const std::list<Sampler>& used_samplers) { 38 const std::list<Sampler>& used_samplers) {
39 if (gpu_driver == nullptr) { 39 if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) {
40 LOG_CRITICAL(HW_GPU, "GPU driver profile has not been created yet");
41 return;
42 }
43 if (gpu_driver->TextureHandlerSizeKnown() || used_samplers.size() <= 1) {
44 return; 40 return;
45 } 41 }
46 u32 count{}; 42 u32 count{};
@@ -53,17 +49,13 @@ void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver,
53 bound_offsets.emplace_back(sampler.GetOffset()); 49 bound_offsets.emplace_back(sampler.GetOffset());
54 } 50 }
55 if (count > 1) { 51 if (count > 1) {
56 gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets)); 52 gpu_driver.DeduceTextureHandlerSize(std::move(bound_offsets));
57 } 53 }
58} 54}
59 55
60std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce, 56std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce,
61 VideoCore::GuestDriverProfile* gpu_driver, 57 VideoCore::GuestDriverProfile& gpu_driver,
62 const std::list<Sampler>& used_samplers) { 58 const std::list<Sampler>& used_samplers) {
63 if (gpu_driver == nullptr) {
64 LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet");
65 return std::nullopt;
66 }
67 const u32 base_offset = sampler_to_deduce.GetOffset(); 59 const u32 base_offset = sampler_to_deduce.GetOffset();
68 u32 max_offset{std::numeric_limits<u32>::max()}; 60 u32 max_offset{std::numeric_limits<u32>::max()};
69 for (const auto& sampler : used_samplers) { 61 for (const auto& sampler : used_samplers) {
@@ -77,7 +69,7 @@ std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce,
77 if (max_offset == std::numeric_limits<u32>::max()) { 69 if (max_offset == std::numeric_limits<u32>::max()) {
78 return std::nullopt; 70 return std::nullopt;
79 } 71 }
80 return ((max_offset - base_offset) * 4) / gpu_driver->GetTextureHandlerSize(); 72 return ((max_offset - base_offset) * 4) / gpu_driver.GetTextureHandlerSize();
81} 73}
82 74
83} // Anonymous namespace 75} // Anonymous namespace
@@ -149,7 +141,7 @@ void ShaderIR::Decode() {
149 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); 141 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
150 142
151 decompiled = false; 143 decompiled = false;
152 auto info = ScanFlow(program_code, main_offset, settings, locker); 144 auto info = ScanFlow(program_code, main_offset, settings, registry);
153 auto& shader_info = *info; 145 auto& shader_info = *info;
154 coverage_begin = shader_info.start; 146 coverage_begin = shader_info.start;
155 coverage_end = shader_info.end; 147 coverage_end = shader_info.end;
@@ -364,7 +356,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
364 356
365void ShaderIR::PostDecode() { 357void ShaderIR::PostDecode() {
366 // Deduce texture handler size if needed 358 // Deduce texture handler size if needed
367 auto gpu_driver = locker.AccessGuestDriverProfile(); 359 auto gpu_driver = registry.AccessGuestDriverProfile();
368 DeduceTextureHandlerSize(gpu_driver, used_samplers); 360 DeduceTextureHandlerSize(gpu_driver, used_samplers);
369 // Deduce Indexed Samplers 361 // Deduce Indexed Samplers
370 if (!uses_indexed_samplers) { 362 if (!uses_indexed_samplers) {
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index 90240c765..478394682 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -53,29 +53,24 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
53 53
54 op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); 54 op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);
55 55
56 // TODO(Rodrigo): Should precise be used when there's a postfactor? 56 static constexpr std::array FmulPostFactor = {
57 Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b); 57 1.000f, // None
58 0.500f, // Divide 2
59 0.250f, // Divide 4
60 0.125f, // Divide 8
61 8.000f, // Mul 8
62 4.000f, // Mul 4
63 2.000f, // Mul 2
64 };
58 65
59 if (instr.fmul.postfactor != 0) { 66 if (instr.fmul.postfactor != 0) {
60 auto postfactor = static_cast<s32>(instr.fmul.postfactor); 67 op_a = Operation(OperationCode::FMul, NO_PRECISE, op_a,
61 68 Immediate(FmulPostFactor[instr.fmul.postfactor]));
62 // Postfactor encoded as 3-bit 1's complement in instruction, interpreted with below
63 // logic.
64 if (postfactor >= 4) {
65 postfactor = 7 - postfactor;
66 } else {
67 postfactor = 0 - postfactor;
68 }
69
70 if (postfactor > 0) {
71 value = Operation(OperationCode::FMul, NO_PRECISE, value,
72 Immediate(static_cast<f32>(1 << postfactor)));
73 } else {
74 value = Operation(OperationCode::FDiv, NO_PRECISE, value,
75 Immediate(static_cast<f32>(1 << -postfactor)));
76 }
77 } 69 }
78 70
71 // TODO(Rodrigo): Should precise be used when there's a postfactor?
72 Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b);
73
79 value = GetSaturatedFloat(value, instr.alu.saturate_d); 74 value = GetSaturatedFloat(value, instr.alu.saturate_d);
80 75
81 SetInternalFlagsFromFloat(bb, value, instr.generates_cc); 76 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index 21366869d..2fe787d6f 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -293,44 +293,66 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
293 293
294void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c, 294void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c,
295 Node imm_lut, bool sets_cc) { 295 Node imm_lut, bool sets_cc) {
296 constexpr u32 lop_iterations = 32; 296 const Node lop3_fast = [&](const Node na, const Node nb, const Node nc, const Node ttbl) {
297 const Node one = Immediate(1); 297 Node value = Immediate(0);
298 const Node two = Immediate(2); 298 const ImmediateNode imm = std::get<ImmediateNode>(*ttbl);
299 299 if (imm.GetValue() & 0x01) {
300 Node value; 300 const Node a = Operation(OperationCode::IBitwiseNot, na);
301 for (u32 i = 0; i < lop_iterations; ++i) { 301 const Node b = Operation(OperationCode::IBitwiseNot, nb);
302 const Node shift_amount = Immediate(i); 302 const Node c = Operation(OperationCode::IBitwiseNot, nc);
303 303 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b);
304 const Node a = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_c, shift_amount); 304 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
305 const Node pack_0 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, one); 305 value = Operation(OperationCode::IBitwiseOr, value, r);
306
307 const Node b = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_b, shift_amount);
308 const Node c = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, b, one);
309 const Node pack_1 = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, c, one);
310
311 const Node d = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_a, shift_amount);
312 const Node e = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, d, one);
313 const Node pack_2 = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, e, two);
314
315 const Node pack_01 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, pack_0, pack_1);
316 const Node pack_012 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, pack_01, pack_2);
317
318 const Node shifted_bit =
319 Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, imm_lut, pack_012);
320 const Node bit = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, shifted_bit, one);
321
322 const Node right =
323 Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, bit, shift_amount);
324
325 if (i > 0) {
326 value = Operation(OperationCode::IBitwiseOr, NO_PRECISE, value, right);
327 } else {
328 value = right;
329 } 306 }
330 } 307 if (imm.GetValue() & 0x02) {
308 const Node a = Operation(OperationCode::IBitwiseNot, na);
309 const Node b = Operation(OperationCode::IBitwiseNot, nb);
310 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b);
311 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
312 value = Operation(OperationCode::IBitwiseOr, value, r);
313 }
314 if (imm.GetValue() & 0x04) {
315 const Node a = Operation(OperationCode::IBitwiseNot, na);
316 const Node c = Operation(OperationCode::IBitwiseNot, nc);
317 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb);
318 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
319 value = Operation(OperationCode::IBitwiseOr, value, r);
320 }
321 if (imm.GetValue() & 0x08) {
322 const Node a = Operation(OperationCode::IBitwiseNot, na);
323 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb);
324 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
325 value = Operation(OperationCode::IBitwiseOr, value, r);
326 }
327 if (imm.GetValue() & 0x10) {
328 const Node b = Operation(OperationCode::IBitwiseNot, nb);
329 const Node c = Operation(OperationCode::IBitwiseNot, nc);
330 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b);
331 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
332 value = Operation(OperationCode::IBitwiseOr, value, r);
333 }
334 if (imm.GetValue() & 0x20) {
335 const Node b = Operation(OperationCode::IBitwiseNot, nb);
336 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b);
337 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
338 value = Operation(OperationCode::IBitwiseOr, value, r);
339 }
340 if (imm.GetValue() & 0x40) {
341 const Node c = Operation(OperationCode::IBitwiseNot, nc);
342 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb);
343 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
344 value = Operation(OperationCode::IBitwiseOr, value, r);
345 }
346 if (imm.GetValue() & 0x80) {
347 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb);
348 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
349 value = Operation(OperationCode::IBitwiseOr, value, r);
350 }
351 return value;
352 }(op_a, op_b, op_c, imm_lut);
331 353
332 SetInternalFlagsFromInteger(bb, value, sets_cc); 354 SetInternalFlagsFromInteger(bb, lop3_fast, sets_cc);
333 SetRegister(bb, dest, value); 355 SetRegister(bb, dest, lop3_fast);
334} 356}
335 357
336} // namespace VideoCommon::Shader 358} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp
index e02bcd097..8e3b46e8e 100644
--- a/src/video_core/shader/decode/bfe.cpp
+++ b/src/video_core/shader/decode/bfe.cpp
@@ -17,33 +17,60 @@ u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]}; 17 const Instruction instr = {program_code[pc]};
18 const auto opcode = OpCode::Decode(instr); 18 const auto opcode = OpCode::Decode(instr);
19 19
20 UNIMPLEMENTED_IF(instr.bfe.negate_b);
21
22 Node op_a = GetRegister(instr.gpr8); 20 Node op_a = GetRegister(instr.gpr8);
23 op_a = GetOperandAbsNegInteger(op_a, false, instr.bfe.negate_a, false); 21 Node op_b = [&] {
24 22 switch (opcode->get().GetId()) {
25 switch (opcode->get().GetId()) { 23 case OpCode::Id::BFE_R:
26 case OpCode::Id::BFE_IMM: { 24 return GetRegister(instr.gpr20);
27 UNIMPLEMENTED_IF_MSG(instr.generates_cc, 25 case OpCode::Id::BFE_C:
28 "Condition codes generation in BFE is not implemented"); 26 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
27 case OpCode::Id::BFE_IMM:
28 return Immediate(instr.alu.GetSignedImm20_20());
29 default:
30 UNREACHABLE();
31 return Immediate(0);
32 }
33 }();
29 34
30 const Node inner_shift_imm = Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue())); 35 UNIMPLEMENTED_IF_MSG(instr.bfe.rd_cc, "Condition codes in BFE is not implemented");
31 const Node outer_shift_imm =
32 Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position));
33 36
34 const Node inner_shift = 37 const bool is_signed = instr.bfe.is_signed;
35 Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, inner_shift_imm);
36 const Node outer_shift =
37 Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, inner_shift, outer_shift_imm);
38 38
39 SetInternalFlagsFromInteger(bb, outer_shift, instr.generates_cc); 39 // using reverse parallel method in
40 SetRegister(bb, instr.gpr0, outer_shift); 40 // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
41 break; 41 // note for later if possible to implement faster method.
42 } 42 if (instr.bfe.brev) {
43 default: 43 const auto swap = [&](u32 s, u32 mask) {
44 UNIMPLEMENTED_MSG("Unhandled BFE instruction: {}", opcode->get().GetName()); 44 Node v1 =
45 SignedOperation(OperationCode::ILogicalShiftRight, is_signed, op_a, Immediate(s));
46 if (mask != 0) {
47 v1 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v1),
48 Immediate(mask));
49 }
50 Node v2 = op_a;
51 if (mask != 0) {
52 v2 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v2),
53 Immediate(mask));
54 }
55 v2 = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, std::move(v2),
56 Immediate(s));
57 return SignedOperation(OperationCode::IBitwiseOr, is_signed, std::move(v1),
58 std::move(v2));
59 };
60 op_a = swap(1, 0x55555555U);
61 op_a = swap(2, 0x33333333U);
62 op_a = swap(4, 0x0F0F0F0FU);
63 op_a = swap(8, 0x00FF00FFU);
64 op_a = swap(16, 0);
45 } 65 }
46 66
67 const auto offset = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b,
68 Immediate(0), Immediate(8));
69 const auto bits = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b,
70 Immediate(8), Immediate(8));
71 auto result = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_a, offset, bits);
72 SetRegister(bb, instr.gpr0, std::move(result));
73
47 return pc; 74 return pc;
48} 75}
49 76
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index bee7d8cad..48350e042 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -12,6 +12,7 @@
12#include "common/logging/log.h" 12#include "common/logging/log.h"
13#include "video_core/engines/shader_bytecode.h" 13#include "video_core/engines/shader_bytecode.h"
14#include "video_core/shader/node_helper.h" 14#include "video_core/shader/node_helper.h"
15#include "video_core/shader/registry.h"
15#include "video_core/shader/shader_ir.h" 16#include "video_core/shader/shader_ir.h"
16 17
17namespace VideoCommon::Shader { 18namespace VideoCommon::Shader {
@@ -359,8 +360,8 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(std::optional<SamplerInfo> sample
359 if (sampler_info) { 360 if (sampler_info) {
360 return *sampler_info; 361 return *sampler_info;
361 } 362 }
362 const auto sampler = 363 const auto sampler = buffer ? registry.ObtainBindlessSampler(*buffer, offset)
363 buffer ? locker.ObtainBindlessSampler(*buffer, offset) : locker.ObtainBoundSampler(offset); 364 : registry.ObtainBoundSampler(offset);
364 if (!sampler) { 365 if (!sampler) {
365 LOG_WARNING(HW_GPU, "Unknown sampler info"); 366 LOG_WARNING(HW_GPU, "Unknown sampler info");
366 return SamplerInfo{TextureType::Texture2D, false, false, false}; 367 return SamplerInfo{TextureType::Texture2D, false, false, false};
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
index 206961909..6191ffba1 100644
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -12,6 +12,7 @@ namespace VideoCommon::Shader {
12 12
13using Tegra::Shader::Instruction; 13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode; 14using Tegra::Shader::OpCode;
15using Tegra::Shader::PredCondition;
15 16
16u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { 17u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]}; 18 const Instruction instr = {program_code[pc]};
@@ -30,7 +31,7 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
30 const bool is_signed_b = instr.xmad.sign_b == 1; 31 const bool is_signed_b = instr.xmad.sign_b == 1;
31 const bool is_signed_c = is_signed_a; 32 const bool is_signed_c = is_signed_a;
32 33
33 auto [is_merge, is_psl, is_high_b, mode, op_b, 34 auto [is_merge, is_psl, is_high_b, mode, op_b_binding,
34 op_c] = [&]() -> std::tuple<bool, bool, bool, Tegra::Shader::XmadMode, Node, Node> { 35 op_c] = [&]() -> std::tuple<bool, bool, bool, Tegra::Shader::XmadMode, Node, Node> {
35 switch (opcode->get().GetId()) { 36 switch (opcode->get().GetId()) {
36 case OpCode::Id::XMAD_CR: 37 case OpCode::Id::XMAD_CR:
@@ -63,15 +64,19 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
63 } 64 }
64 }(); 65 }();
65 66
66 op_a = BitfieldExtract(op_a, instr.xmad.high_a ? 16 : 0, 16); 67 op_a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(op_a),
68 instr.xmad.high_a ? Immediate(16) : Immediate(0), Immediate(16));
67 69
68 const Node original_b = op_b; 70 const Node original_b = op_b_binding;
69 op_b = BitfieldExtract(op_b, is_high_b ? 16 : 0, 16); 71 const Node op_b =
72 SignedOperation(OperationCode::IBitfieldExtract, is_signed_b, std::move(op_b_binding),
73 is_high_b ? Immediate(16) : Immediate(0), Immediate(16));
70 74
71 // TODO(Rodrigo): Use an appropiate sign for this operation 75 // we already check sign_a and sign_b is difference or not before so just use one in here.
72 Node product = Operation(OperationCode::IMul, NO_PRECISE, op_a, op_b); 76 Node product = SignedOperation(OperationCode::IMul, is_signed_a, op_a, op_b);
73 if (is_psl) { 77 if (is_psl) {
74 product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16)); 78 product =
79 SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_a, product, Immediate(16));
75 } 80 }
76 SetTemporary(bb, 0, product); 81 SetTemporary(bb, 0, product);
77 product = GetTemporary(0); 82 product = GetTemporary(0);
@@ -88,12 +93,40 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
88 return BitfieldExtract(original_c, 16, 16); 93 return BitfieldExtract(original_c, 16, 16);
89 case Tegra::Shader::XmadMode::CBcc: { 94 case Tegra::Shader::XmadMode::CBcc: {
90 const Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, 95 const Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b,
91 NO_PRECISE, original_b, Immediate(16)); 96 original_b, Immediate(16));
92 return SignedOperation(OperationCode::IAdd, is_signed_c, NO_PRECISE, original_c, 97 return SignedOperation(OperationCode::IAdd, is_signed_c, original_c, shifted_b);
93 shifted_b); 98 }
99 case Tegra::Shader::XmadMode::CSfu: {
100 const Node comp_a = GetPredicateComparisonInteger(PredCondition::Equal, is_signed_a,
101 op_a, Immediate(0));
102 const Node comp_b = GetPredicateComparisonInteger(PredCondition::Equal, is_signed_b,
103 op_b, Immediate(0));
104 const Node comp = Operation(OperationCode::LogicalOr, comp_a, comp_b);
105
106 const Node comp_minus_a = GetPredicateComparisonInteger(
107 PredCondition::NotEqual, is_signed_a,
108 SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, op_a,
109 Immediate(0x80000000)),
110 Immediate(0));
111 const Node comp_minus_b = GetPredicateComparisonInteger(
112 PredCondition::NotEqual, is_signed_b,
113 SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, op_b,
114 Immediate(0x80000000)),
115 Immediate(0));
116
117 Node new_c = Operation(
118 OperationCode::Select, comp_minus_a,
119 SignedOperation(OperationCode::IAdd, is_signed_c, original_c, Immediate(-65536)),
120 original_c);
121 new_c = Operation(
122 OperationCode::Select, comp_minus_b,
123 SignedOperation(OperationCode::IAdd, is_signed_c, new_c, Immediate(-65536)),
124 std::move(new_c));
125
126 return Operation(OperationCode::Select, comp, original_c, std::move(new_c));
94 } 127 }
95 default: 128 default:
96 UNIMPLEMENTED_MSG("Unhandled XMAD mode: {}", static_cast<u32>(instr.xmad.mode.Value())); 129 UNREACHABLE();
97 return Immediate(0); 130 return Immediate(0);
98 } 131 }
99 }(); 132 }();
@@ -102,18 +135,19 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
102 op_c = GetTemporary(1); 135 op_c = GetTemporary(1);
103 136
104 // TODO(Rodrigo): Use an appropiate sign for this operation 137 // TODO(Rodrigo): Use an appropiate sign for this operation
105 Node sum = Operation(OperationCode::IAdd, product, op_c); 138 Node sum = SignedOperation(OperationCode::IAdd, is_signed_a, product, std::move(op_c));
106 SetTemporary(bb, 2, sum); 139 SetTemporary(bb, 2, sum);
107 sum = GetTemporary(2); 140 sum = GetTemporary(2);
108 if (is_merge) { 141 if (is_merge) {
109 const Node a = BitfieldExtract(sum, 0, 16); 142 const Node a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(sum),
110 const Node b = 143 Immediate(0), Immediate(16));
111 Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, original_b, Immediate(16)); 144 const Node b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, original_b,
112 sum = Operation(OperationCode::IBitwiseOr, NO_PRECISE, a, b); 145 Immediate(16));
146 sum = SignedOperation(OperationCode::IBitwiseOr, is_signed_a, a, b);
113 } 147 }
114 148
115 SetInternalFlagsFromInteger(bb, sum, instr.generates_cc); 149 SetInternalFlagsFromInteger(bb, sum, instr.generates_cc);
116 SetRegister(bb, instr.gpr0, sum); 150 SetRegister(bb, instr.gpr0, std::move(sum));
117 151
118 return pc; 152 return pc;
119} 153}
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index a0a7b9111..a1828546e 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -299,7 +299,7 @@ private:
299 u32 index{}; ///< Emulated index given for the this sampler. 299 u32 index{}; ///< Emulated index given for the this sampler.
300 u32 offset{}; ///< Offset in the const buffer from where the sampler is being read. 300 u32 offset{}; ///< Offset in the const buffer from where the sampler is being read.
301 u32 buffer{}; ///< Buffer where the bindless sampler is being read (unused on bound samplers). 301 u32 buffer{}; ///< Buffer where the bindless sampler is being read (unused on bound samplers).
302 u32 size{}; ///< Size of the sampler if indexed. 302 u32 size{1}; ///< Size of the sampler.
303 303
304 Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) 304 Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
305 bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. 305 bool is_array{}; ///< Whether the texture is being sampled as an array texture or not.
diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp
index b3dcd291c..76c56abb5 100644
--- a/src/video_core/shader/node_helper.cpp
+++ b/src/video_core/shader/node_helper.cpp
@@ -68,6 +68,8 @@ OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed)
68 return OperationCode::UBitwiseXor; 68 return OperationCode::UBitwiseXor;
69 case OperationCode::IBitwiseNot: 69 case OperationCode::IBitwiseNot:
70 return OperationCode::UBitwiseNot; 70 return OperationCode::UBitwiseNot;
71 case OperationCode::IBitfieldExtract:
72 return OperationCode::UBitfieldExtract;
71 case OperationCode::IBitfieldInsert: 73 case OperationCode::IBitfieldInsert:
72 return OperationCode::UBitfieldInsert; 74 return OperationCode::UBitfieldInsert;
73 case OperationCode::IBitCount: 75 case OperationCode::IBitCount:
diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp
new file mode 100644
index 000000000..af70b3f35
--- /dev/null
+++ b/src/video_core/shader/registry.cpp
@@ -0,0 +1,161 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <tuple>
7
8#include "common/assert.h"
9#include "common/common_types.h"
10#include "video_core/engines/kepler_compute.h"
11#include "video_core/engines/maxwell_3d.h"
12#include "video_core/engines/shader_type.h"
13#include "video_core/shader/registry.h"
14
15namespace VideoCommon::Shader {
16
17using Tegra::Engines::ConstBufferEngineInterface;
18using Tegra::Engines::SamplerDescriptor;
19using Tegra::Engines::ShaderType;
20
21namespace {
22
23GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
24 if (shader_stage == ShaderType::Compute) {
25 return {};
26 }
27 auto& graphics = static_cast<Tegra::Engines::Maxwell3D&>(engine);
28
29 GraphicsInfo info;
30 info.tfb_layouts = graphics.regs.tfb_layouts;
31 info.tfb_varying_locs = graphics.regs.tfb_varying_locs;
32 info.primitive_topology = graphics.regs.draw.topology;
33 info.tessellation_primitive = graphics.regs.tess_mode.prim;
34 info.tessellation_spacing = graphics.regs.tess_mode.spacing;
35 info.tfb_enabled = graphics.regs.tfb_enabled;
36 info.tessellation_clockwise = graphics.regs.tess_mode.cw;
37 return info;
38}
39
40ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
41 if (shader_stage != ShaderType::Compute) {
42 return {};
43 }
44 auto& compute = static_cast<Tegra::Engines::KeplerCompute&>(engine);
45 const auto& launch = compute.launch_description;
46
47 ComputeInfo info;
48 info.workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z};
49 info.local_memory_size_in_words = launch.local_pos_alloc;
50 info.shared_memory_size_in_words = launch.shared_alloc;
51 return info;
52}
53
54} // Anonymous namespace
55
56Registry::Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info)
57 : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile},
58 bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {}
59
60Registry::Registry(Tegra::Engines::ShaderType shader_stage,
61 Tegra::Engines::ConstBufferEngineInterface& engine)
62 : stage{shader_stage}, engine{&engine}, bound_buffer{engine.GetBoundBuffer()},
63 graphics_info{MakeGraphicsInfo(shader_stage, engine)}, compute_info{MakeComputeInfo(
64 shader_stage, engine)} {}
65
66Registry::~Registry() = default;
67
68std::optional<u32> Registry::ObtainKey(u32 buffer, u32 offset) {
69 const std::pair<u32, u32> key = {buffer, offset};
70 const auto iter = keys.find(key);
71 if (iter != keys.end()) {
72 return iter->second;
73 }
74 if (!engine) {
75 return std::nullopt;
76 }
77 const u32 value = engine->AccessConstBuffer32(stage, buffer, offset);
78 keys.emplace(key, value);
79 return value;
80}
81
82std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) {
83 const u32 key = offset;
84 const auto iter = bound_samplers.find(key);
85 if (iter != bound_samplers.end()) {
86 return iter->second;
87 }
88 if (!engine) {
89 return std::nullopt;
90 }
91 const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset);
92 bound_samplers.emplace(key, value);
93 return value;
94}
95
96std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer,
97 u32 offset) {
98 const std::pair key = {buffer, offset};
99 const auto iter = bindless_samplers.find(key);
100 if (iter != bindless_samplers.end()) {
101 return iter->second;
102 }
103 if (!engine) {
104 return std::nullopt;
105 }
106 const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset);
107 bindless_samplers.emplace(key, value);
108 return value;
109}
110
111void Registry::InsertKey(u32 buffer, u32 offset, u32 value) {
112 keys.insert_or_assign({buffer, offset}, value);
113}
114
115void Registry::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) {
116 bound_samplers.insert_or_assign(offset, sampler);
117}
118
119void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) {
120 bindless_samplers.insert_or_assign({buffer, offset}, sampler);
121}
122
123bool Registry::IsConsistent() const {
124 if (!engine) {
125 return true;
126 }
127 return std::all_of(keys.begin(), keys.end(),
128 [this](const auto& pair) {
129 const auto [cbuf, offset] = pair.first;
130 const auto value = pair.second;
131 return value == engine->AccessConstBuffer32(stage, cbuf, offset);
132 }) &&
133 std::all_of(bound_samplers.begin(), bound_samplers.end(),
134 [this](const auto& sampler) {
135 const auto [key, value] = sampler;
136 return value == engine->AccessBoundSampler(stage, key);
137 }) &&
138 std::all_of(bindless_samplers.begin(), bindless_samplers.end(),
139 [this](const auto& sampler) {
140 const auto [cbuf, offset] = sampler.first;
141 const auto value = sampler.second;
142 return value == engine->AccessBindlessSampler(stage, cbuf, offset);
143 });
144}
145
146bool Registry::HasEqualKeys(const Registry& rhs) const {
147 return std::tie(keys, bound_samplers, bindless_samplers) ==
148 std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers);
149}
150
151const GraphicsInfo& Registry::GetGraphicsInfo() const {
152 ASSERT(stage != Tegra::Engines::ShaderType::Compute);
153 return graphics_info;
154}
155
156const ComputeInfo& Registry::GetComputeInfo() const {
157 ASSERT(stage == Tegra::Engines::ShaderType::Compute);
158 return compute_info;
159}
160
161} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h
new file mode 100644
index 000000000..0c80d35fd
--- /dev/null
+++ b/src/video_core/shader/registry.h
@@ -0,0 +1,137 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <optional>
9#include <type_traits>
10#include <unordered_map>
11#include <utility>
12
13#include "common/common_types.h"
14#include "common/hash.h"
15#include "video_core/engines/const_buffer_engine_interface.h"
16#include "video_core/engines/maxwell_3d.h"
17#include "video_core/engines/shader_type.h"
18#include "video_core/guest_driver.h"
19
20namespace VideoCommon::Shader {
21
22using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
23using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
24using BindlessSamplerMap =
25 std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
26
27struct GraphicsInfo {
28 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
29
30 std::array<Maxwell::TransformFeedbackLayout, Maxwell::NumTransformFeedbackBuffers>
31 tfb_layouts{};
32 std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> tfb_varying_locs{};
33 Maxwell::PrimitiveTopology primitive_topology{};
34 Maxwell::TessellationPrimitive tessellation_primitive{};
35 Maxwell::TessellationSpacing tessellation_spacing{};
36 bool tfb_enabled = false;
37 bool tessellation_clockwise = false;
38};
39static_assert(std::is_trivially_copyable_v<GraphicsInfo> &&
40 std::is_standard_layout_v<GraphicsInfo>);
41
42struct ComputeInfo {
43 std::array<u32, 3> workgroup_size{};
44 u32 shared_memory_size_in_words = 0;
45 u32 local_memory_size_in_words = 0;
46};
47static_assert(std::is_trivially_copyable_v<ComputeInfo> && std::is_standard_layout_v<ComputeInfo>);
48
49struct SerializedRegistryInfo {
50 VideoCore::GuestDriverProfile guest_driver_profile;
51 u32 bound_buffer = 0;
52 GraphicsInfo graphics;
53 ComputeInfo compute;
54};
55
56/**
57 * The Registry is a class use to interface the 3D and compute engines with the shader compiler.
58 * With it, the shader can obtain required data from GPU state and store it for disk shader
59 * compilation.
60 */
61class Registry {
62public:
63 explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info);
64
65 explicit Registry(Tegra::Engines::ShaderType shader_stage,
66 Tegra::Engines::ConstBufferEngineInterface& engine);
67
68 ~Registry();
69
70 /// Retrieves a key from the registry, if it's registered, it will give the registered value, if
71 /// not it will obtain it from maxwell3d and register it.
72 std::optional<u32> ObtainKey(u32 buffer, u32 offset);
73
74 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
75
76 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
77
78 /// Inserts a key.
79 void InsertKey(u32 buffer, u32 offset, u32 value);
80
81 /// Inserts a bound sampler key.
82 void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler);
83
84 /// Inserts a bindless sampler key.
85 void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
86
87 /// Checks keys and samplers against engine's current const buffers.
88 /// Returns true if they are the same value, false otherwise.
89 bool IsConsistent() const;
90
91 /// Returns true if the keys are equal to the other ones in the registry.
92 bool HasEqualKeys(const Registry& rhs) const;
93
94 /// Returns graphics information from this shader
95 const GraphicsInfo& GetGraphicsInfo() const;
96
97 /// Returns compute information from this shader
98 const ComputeInfo& GetComputeInfo() const;
99
100 /// Gives an getter to the const buffer keys in the database.
101 const KeyMap& GetKeys() const {
102 return keys;
103 }
104
105 /// Gets samplers database.
106 const BoundSamplerMap& GetBoundSamplers() const {
107 return bound_samplers;
108 }
109
110 /// Gets bindless samplers database.
111 const BindlessSamplerMap& GetBindlessSamplers() const {
112 return bindless_samplers;
113 }
114
115 /// Gets bound buffer used on this shader
116 u32 GetBoundBuffer() const {
117 return bound_buffer;
118 }
119
120 /// Obtains access to the guest driver's profile.
121 VideoCore::GuestDriverProfile& AccessGuestDriverProfile() {
122 return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile;
123 }
124
125private:
126 const Tegra::Engines::ShaderType stage;
127 VideoCore::GuestDriverProfile stored_guest_driver_profile;
128 Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
129 KeyMap keys;
130 BoundSamplerMap bound_samplers;
131 BindlessSamplerMap bindless_samplers;
132 u32 bound_buffer;
133 GraphicsInfo graphics_info;
134 ComputeInfo compute_info;
135};
136
137} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 3a5d280a9..baf7188d2 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -11,6 +11,7 @@
11#include "common/logging/log.h" 11#include "common/logging/log.h"
12#include "video_core/engines/shader_bytecode.h" 12#include "video_core/engines/shader_bytecode.h"
13#include "video_core/shader/node_helper.h" 13#include "video_core/shader/node_helper.h"
14#include "video_core/shader/registry.h"
14#include "video_core/shader/shader_ir.h" 15#include "video_core/shader/shader_ir.h"
15 16
16namespace VideoCommon::Shader { 17namespace VideoCommon::Shader {
@@ -24,8 +25,8 @@ using Tegra::Shader::PredOperation;
24using Tegra::Shader::Register; 25using Tegra::Shader::Register;
25 26
26ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, 27ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings,
27 ConstBufferLocker& locker) 28 Registry& registry)
28 : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} { 29 : program_code{program_code}, main_offset{main_offset}, settings{settings}, registry{registry} {
29 Decode(); 30 Decode();
30 PostDecode(); 31 PostDecode();
31} 32}
@@ -95,6 +96,7 @@ Node ShaderIR::GetPredicate(bool immediate) {
95} 96}
96 97
97Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) { 98Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) {
99 MarkAttributeUsage(index, element);
98 used_input_attributes.emplace(index); 100 used_input_attributes.emplace(index);
99 return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer)); 101 return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
100} 102}
@@ -105,42 +107,8 @@ Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_addres
105} 107}
106 108
107Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) { 109Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) {
108 if (index == Attribute::Index::LayerViewportPointSize) { 110 MarkAttributeUsage(index, element);
109 switch (element) {
110 case 0:
111 UNIMPLEMENTED();
112 break;
113 case 1:
114 uses_layer = true;
115 break;
116 case 2:
117 uses_viewport_index = true;
118 break;
119 case 3:
120 uses_point_size = true;
121 break;
122 }
123 }
124 if (index == Attribute::Index::TessCoordInstanceIDVertexID) {
125 switch (element) {
126 case 2:
127 uses_instance_id = true;
128 break;
129 case 3:
130 uses_vertex_id = true;
131 break;
132 default:
133 break;
134 }
135 }
136 if (index == Attribute::Index::ClipDistances0123 ||
137 index == Attribute::Index::ClipDistances4567) {
138 const auto clip_index =
139 static_cast<u32>((index == Attribute::Index::ClipDistances4567 ? 1 : 0) + element);
140 used_clip_distances.at(clip_index) = true;
141 }
142 used_output_attributes.insert(index); 111 used_output_attributes.insert(index);
143
144 return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer)); 112 return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
145} 113}
146 114
@@ -451,6 +419,54 @@ Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) {
451 Immediate(bits)); 419 Immediate(bits));
452} 420}
453 421
422void ShaderIR::MarkAttributeUsage(Attribute::Index index, u64 element) {
423 switch (index) {
424 case Attribute::Index::LayerViewportPointSize:
425 switch (element) {
426 case 0:
427 UNIMPLEMENTED();
428 break;
429 case 1:
430 uses_layer = true;
431 break;
432 case 2:
433 uses_viewport_index = true;
434 break;
435 case 3:
436 uses_point_size = true;
437 break;
438 }
439 break;
440 case Attribute::Index::TessCoordInstanceIDVertexID:
441 switch (element) {
442 case 2:
443 uses_instance_id = true;
444 break;
445 case 3:
446 uses_vertex_id = true;
447 break;
448 }
449 break;
450 case Attribute::Index::ClipDistances0123:
451 case Attribute::Index::ClipDistances4567: {
452 const u64 clip_index = (index == Attribute::Index::ClipDistances4567 ? 4 : 0) + element;
453 used_clip_distances.at(clip_index) = true;
454 break;
455 }
456 case Attribute::Index::FrontColor:
457 case Attribute::Index::FrontSecondaryColor:
458 case Attribute::Index::BackColor:
459 case Attribute::Index::BackSecondaryColor:
460 uses_legacy_varyings = true;
461 break;
462 default:
463 if (index >= Attribute::Index::TexCoord_0 && index <= Attribute::Index::TexCoord_7) {
464 uses_legacy_varyings = true;
465 }
466 break;
467 }
468}
469
454std::size_t ShaderIR::DeclareAmend(Node new_amend) { 470std::size_t ShaderIR::DeclareAmend(Node new_amend) {
455 const std::size_t id = amend_code.size(); 471 const std::size_t id = amend_code.size();
456 amend_code.push_back(new_amend); 472 amend_code.push_back(new_amend);
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index b0851c3be..80fc9b82c 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -18,8 +18,8 @@
18#include "video_core/engines/shader_header.h" 18#include "video_core/engines/shader_header.h"
19#include "video_core/shader/ast.h" 19#include "video_core/shader/ast.h"
20#include "video_core/shader/compiler_settings.h" 20#include "video_core/shader/compiler_settings.h"
21#include "video_core/shader/const_buffer_locker.h"
22#include "video_core/shader/node.h" 21#include "video_core/shader/node.h"
22#include "video_core/shader/registry.h"
23 23
24namespace VideoCommon::Shader { 24namespace VideoCommon::Shader {
25 25
@@ -69,7 +69,7 @@ struct GlobalMemoryUsage {
69class ShaderIR final { 69class ShaderIR final {
70public: 70public:
71 explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, 71 explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings,
72 ConstBufferLocker& locker); 72 Registry& registry);
73 ~ShaderIR(); 73 ~ShaderIR();
74 74
75 const std::map<u32, NodeBlock>& GetBasicBlocks() const { 75 const std::map<u32, NodeBlock>& GetBasicBlocks() const {
@@ -137,6 +137,10 @@ public:
137 return uses_vertex_id; 137 return uses_vertex_id;
138 } 138 }
139 139
140 bool UsesLegacyVaryings() const {
141 return uses_legacy_varyings;
142 }
143
140 bool UsesWarps() const { 144 bool UsesWarps() const {
141 return uses_warps; 145 return uses_warps;
142 } 146 }
@@ -343,6 +347,9 @@ private:
343 /// Inserts a sequence of bits from a node 347 /// Inserts a sequence of bits from a node
344 Node BitfieldInsert(Node base, Node insert, u32 offset, u32 bits); 348 Node BitfieldInsert(Node base, Node insert, u32 offset, u32 bits);
345 349
350 /// Marks the usage of a input or output attribute.
351 void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element);
352
346 void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, 353 void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
347 const Node4& components); 354 const Node4& components);
348 355
@@ -414,7 +421,7 @@ private:
414 const ProgramCode& program_code; 421 const ProgramCode& program_code;
415 const u32 main_offset; 422 const u32 main_offset;
416 const CompilerSettings settings; 423 const CompilerSettings settings;
417 ConstBufferLocker& locker; 424 Registry& registry;
418 425
419 bool decompiled{}; 426 bool decompiled{};
420 bool disable_flow_stack{}; 427 bool disable_flow_stack{};
@@ -443,6 +450,7 @@ private:
443 bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes 450 bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes
444 bool uses_instance_id{}; 451 bool uses_instance_id{};
445 bool uses_vertex_id{}; 452 bool uses_vertex_id{};
453 bool uses_legacy_varyings{};
446 bool uses_warps{}; 454 bool uses_warps{};
447 bool uses_indexed_samplers{}; 455 bool uses_indexed_samplers{};
448 456
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index face8c943..10739b37d 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -81,26 +81,20 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons
81 MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue()); 81 MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue());
82 return {tracked, track}; 82 return {tracked, track};
83 } else if (const auto operation = std::get_if<OperationNode>(&*offset)) { 83 } else if (const auto operation = std::get_if<OperationNode>(&*offset)) {
84 auto bound_buffer = locker.ObtainBoundBuffer(); 84 const u32 bound_buffer = registry.GetBoundBuffer();
85 if (!bound_buffer) { 85 if (bound_buffer != cbuf->GetIndex()) {
86 return {}; 86 return {};
87 } 87 }
88 if (*bound_buffer != cbuf->GetIndex()) { 88 const auto pair = DecoupleIndirectRead(*operation);
89 return {};
90 }
91 auto pair = DecoupleIndirectRead(*operation);
92 if (!pair) { 89 if (!pair) {
93 return {}; 90 return {};
94 } 91 }
95 auto [gpr, base_offset] = *pair; 92 auto [gpr, base_offset] = *pair;
96 const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset); 93 const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset);
97 auto gpu_driver = locker.AccessGuestDriverProfile(); 94 const auto& gpu_driver = registry.AccessGuestDriverProfile();
98 if (gpu_driver == nullptr) {
99 return {};
100 }
101 const u32 bindless_cv = NewCustomVariable(); 95 const u32 bindless_cv = NewCustomVariable();
102 const Node op = Operation(OperationCode::UDiv, NO_PRECISE, gpr, 96 const Node op =
103 Immediate(gpu_driver->GetTextureHandlerSize())); 97 Operation(OperationCode::UDiv, gpr, Immediate(gpu_driver.GetTextureHandlerSize()));
104 98
105 const Node cv_node = GetCustomVariable(bindless_cv); 99 const Node cv_node = GetCustomVariable(bindless_cv);
106 Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op)); 100 Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op));
@@ -157,13 +151,21 @@ std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& co
157 if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { 151 if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
158 return {}; 152 return {};
159 } 153 }
160 // Reduce the cursor in one to avoid infinite loops when the instruction sets the same 154 s64 current_cursor = cursor;
161 // register that it uses as operand 155 while (current_cursor > 0) {
162 const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); 156 // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
163 if (!source) { 157 // register that it uses as operand
164 return {}; 158 const auto [source, new_cursor] = TrackRegister(gpr, code, current_cursor - 1);
159 current_cursor = new_cursor;
160 if (!source) {
161 continue;
162 }
163 const auto [base_address, index, offset] = TrackCbuf(source, code, current_cursor);
164 if (base_address != nullptr) {
165 return {base_address, index, offset};
166 }
165 } 167 }
166 return TrackCbuf(source, code, new_cursor); 168 return {};
167 } 169 }
168 if (const auto operation = std::get_if<OperationNode>(&*tracked)) { 170 if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
169 for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { 171 for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) {
diff --git a/src/video_core/shader/transform_feedback.cpp b/src/video_core/shader/transform_feedback.cpp
new file mode 100644
index 000000000..22a933761
--- /dev/null
+++ b/src/video_core/shader/transform_feedback.cpp
@@ -0,0 +1,115 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <unordered_map>
8
9#include "common/assert.h"
10#include "common/common_types.h"
11#include "video_core/engines/maxwell_3d.h"
12#include "video_core/shader/registry.h"
13#include "video_core/shader/transform_feedback.h"
14
15namespace VideoCommon::Shader {
16
17namespace {
18
19using Maxwell = Tegra::Engines::Maxwell3D::Regs;
20
21// TODO(Rodrigo): Change this to constexpr std::unordered_set in C++20
22
23/// Attribute offsets that describe a vector
24constexpr std::array VECTORS = {
25 28, // gl_Position
26 32, // Generic 0
27 36, // Generic 1
28 40, // Generic 2
29 44, // Generic 3
30 48, // Generic 4
31 52, // Generic 5
32 56, // Generic 6
33 60, // Generic 7
34 64, // Generic 8
35 68, // Generic 9
36 72, // Generic 10
37 76, // Generic 11
38 80, // Generic 12
39 84, // Generic 13
40 88, // Generic 14
41 92, // Generic 15
42 96, // Generic 16
43 100, // Generic 17
44 104, // Generic 18
45 108, // Generic 19
46 112, // Generic 20
47 116, // Generic 21
48 120, // Generic 22
49 124, // Generic 23
50 128, // Generic 24
51 132, // Generic 25
52 136, // Generic 26
53 140, // Generic 27
54 144, // Generic 28
55 148, // Generic 29
56 152, // Generic 30
57 156, // Generic 31
58 160, // gl_FrontColor
59 164, // gl_FrontSecondaryColor
60 160, // gl_BackColor
61 164, // gl_BackSecondaryColor
62 192, // gl_TexCoord[0]
63 196, // gl_TexCoord[1]
64 200, // gl_TexCoord[2]
65 204, // gl_TexCoord[3]
66 208, // gl_TexCoord[4]
67 212, // gl_TexCoord[5]
68 216, // gl_TexCoord[6]
69 220, // gl_TexCoord[7]
70};
71} // namespace
72
73std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info) {
74
75 std::unordered_map<u8, VaryingTFB> tfb;
76
77 for (std::size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) {
78 const auto& locations = info.tfb_varying_locs[buffer];
79 const auto& layout = info.tfb_layouts[buffer];
80 const std::size_t varying_count = layout.varying_count;
81
82 std::size_t highest = 0;
83
84 for (std::size_t offset = 0; offset < varying_count; ++offset) {
85 const std::size_t base_offset = offset;
86 const u8 location = locations[offset];
87
88 VaryingTFB varying;
89 varying.buffer = layout.stream;
90 varying.stride = layout.stride;
91 varying.offset = offset * sizeof(u32);
92 varying.components = 1;
93
94 if (std::find(VECTORS.begin(), VECTORS.end(), location / 4 * 4) != VECTORS.end()) {
95 UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB");
96
97 const u8 base_index = location / 4;
98 while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) {
99 ++offset;
100 ++varying.components;
101 }
102 }
103
104 [[maybe_unused]] const bool inserted = tfb.emplace(location, varying).second;
105 UNIMPLEMENTED_IF_MSG(!inserted, "Varying already stored");
106
107 highest = std::max(highest, (base_offset + varying.components) * sizeof(u32));
108 }
109
110 UNIMPLEMENTED_IF(highest != layout.stride);
111 }
112 return tfb;
113}
114
115} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/transform_feedback.h b/src/video_core/shader/transform_feedback.h
new file mode 100644
index 000000000..77d05f64c
--- /dev/null
+++ b/src/video_core/shader/transform_feedback.h
@@ -0,0 +1,23 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <unordered_map>
8
9#include "common/common_types.h"
10#include "video_core/shader/registry.h"
11
12namespace VideoCommon::Shader {
13
14struct VaryingTFB {
15 std::size_t buffer;
16 std::size_t stride;
17 std::size_t offset;
18 std::size_t components;
19};
20
21std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info);
22
23} // namespace VideoCommon::Shader
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 1655ccf16..cc7181229 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -111,6 +111,8 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format)
111 return PixelFormat::RGBA16F; 111 return PixelFormat::RGBA16F;
112 case Tegra::RenderTargetFormat::RGBA16_UNORM: 112 case Tegra::RenderTargetFormat::RGBA16_UNORM:
113 return PixelFormat::RGBA16U; 113 return PixelFormat::RGBA16U;
114 case Tegra::RenderTargetFormat::RGBA16_SNORM:
115 return PixelFormat::RGBA16S;
114 case Tegra::RenderTargetFormat::RGBA16_UINT: 116 case Tegra::RenderTargetFormat::RGBA16_UINT:
115 return PixelFormat::RGBA16UI; 117 return PixelFormat::RGBA16UI;
116 case Tegra::RenderTargetFormat::RGBA32_FLOAT: 118 case Tegra::RenderTargetFormat::RGBA32_FLOAT:
@@ -155,6 +157,8 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format)
155 return PixelFormat::R16I; 157 return PixelFormat::R16I;
156 case Tegra::RenderTargetFormat::R32_FLOAT: 158 case Tegra::RenderTargetFormat::R32_FLOAT:
157 return PixelFormat::R32F; 159 return PixelFormat::R32F;
160 case Tegra::RenderTargetFormat::R32_SINT:
161 return PixelFormat::R32I;
158 case Tegra::RenderTargetFormat::R32_UINT: 162 case Tegra::RenderTargetFormat::R32_UINT:
159 return PixelFormat::R32UI; 163 return PixelFormat::R32UI;
160 case Tegra::RenderTargetFormat::RG32_UINT: 164 case Tegra::RenderTargetFormat::RG32_UINT:
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index 0d17a93ed..ae8817465 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -25,81 +25,83 @@ enum class PixelFormat {
25 R8UI = 7, 25 R8UI = 7,
26 RGBA16F = 8, 26 RGBA16F = 8,
27 RGBA16U = 9, 27 RGBA16U = 9,
28 RGBA16UI = 10, 28 RGBA16S = 10,
29 R11FG11FB10F = 11, 29 RGBA16UI = 11,
30 RGBA32UI = 12, 30 R11FG11FB10F = 12,
31 DXT1 = 13, 31 RGBA32UI = 13,
32 DXT23 = 14, 32 DXT1 = 14,
33 DXT45 = 15, 33 DXT23 = 15,
34 DXN1 = 16, // This is also known as BC4 34 DXT45 = 16,
35 DXN2UNORM = 17, 35 DXN1 = 17, // This is also known as BC4
36 DXN2SNORM = 18, 36 DXN2UNORM = 18,
37 BC7U = 19, 37 DXN2SNORM = 19,
38 BC6H_UF16 = 20, 38 BC7U = 20,
39 BC6H_SF16 = 21, 39 BC6H_UF16 = 21,
40 ASTC_2D_4X4 = 22, 40 BC6H_SF16 = 22,
41 BGRA8 = 23, 41 ASTC_2D_4X4 = 23,
42 RGBA32F = 24, 42 BGRA8 = 24,
43 RG32F = 25, 43 RGBA32F = 25,
44 R32F = 26, 44 RG32F = 26,
45 R16F = 27, 45 R32F = 27,
46 R16U = 28, 46 R16F = 28,
47 R16S = 29, 47 R16U = 29,
48 R16UI = 30, 48 R16S = 30,
49 R16I = 31, 49 R16UI = 31,
50 RG16 = 32, 50 R16I = 32,
51 RG16F = 33, 51 RG16 = 33,
52 RG16UI = 34, 52 RG16F = 34,
53 RG16I = 35, 53 RG16UI = 35,
54 RG16S = 36, 54 RG16I = 36,
55 RGB32F = 37, 55 RG16S = 37,
56 RGBA8_SRGB = 38, 56 RGB32F = 38,
57 RG8U = 39, 57 RGBA8_SRGB = 39,
58 RG8S = 40, 58 RG8U = 40,
59 RG32UI = 41, 59 RG8S = 41,
60 RGBX16F = 42, 60 RG32UI = 42,
61 R32UI = 43, 61 RGBX16F = 43,
62 ASTC_2D_8X8 = 44, 62 R32UI = 44,
63 ASTC_2D_8X5 = 45, 63 R32I = 45,
64 ASTC_2D_5X4 = 46, 64 ASTC_2D_8X8 = 46,
65 BGRA8_SRGB = 47, 65 ASTC_2D_8X5 = 47,
66 DXT1_SRGB = 48, 66 ASTC_2D_5X4 = 48,
67 DXT23_SRGB = 49, 67 BGRA8_SRGB = 49,
68 DXT45_SRGB = 50, 68 DXT1_SRGB = 50,
69 BC7U_SRGB = 51, 69 DXT23_SRGB = 51,
70 R4G4B4A4U = 52, 70 DXT45_SRGB = 52,
71 ASTC_2D_4X4_SRGB = 53, 71 BC7U_SRGB = 53,
72 ASTC_2D_8X8_SRGB = 54, 72 R4G4B4A4U = 54,
73 ASTC_2D_8X5_SRGB = 55, 73 ASTC_2D_4X4_SRGB = 55,
74 ASTC_2D_5X4_SRGB = 56, 74 ASTC_2D_8X8_SRGB = 56,
75 ASTC_2D_5X5 = 57, 75 ASTC_2D_8X5_SRGB = 57,
76 ASTC_2D_5X5_SRGB = 58, 76 ASTC_2D_5X4_SRGB = 58,
77 ASTC_2D_10X8 = 59, 77 ASTC_2D_5X5 = 59,
78 ASTC_2D_10X8_SRGB = 60, 78 ASTC_2D_5X5_SRGB = 60,
79 ASTC_2D_6X6 = 61, 79 ASTC_2D_10X8 = 61,
80 ASTC_2D_6X6_SRGB = 62, 80 ASTC_2D_10X8_SRGB = 62,
81 ASTC_2D_10X10 = 63, 81 ASTC_2D_6X6 = 63,
82 ASTC_2D_10X10_SRGB = 64, 82 ASTC_2D_6X6_SRGB = 64,
83 ASTC_2D_12X12 = 65, 83 ASTC_2D_10X10 = 65,
84 ASTC_2D_12X12_SRGB = 66, 84 ASTC_2D_10X10_SRGB = 66,
85 ASTC_2D_8X6 = 67, 85 ASTC_2D_12X12 = 67,
86 ASTC_2D_8X6_SRGB = 68, 86 ASTC_2D_12X12_SRGB = 68,
87 ASTC_2D_6X5 = 69, 87 ASTC_2D_8X6 = 69,
88 ASTC_2D_6X5_SRGB = 70, 88 ASTC_2D_8X6_SRGB = 70,
89 E5B9G9R9F = 71, 89 ASTC_2D_6X5 = 71,
90 ASTC_2D_6X5_SRGB = 72,
91 E5B9G9R9F = 73,
90 92
91 MaxColorFormat, 93 MaxColorFormat,
92 94
93 // Depth formats 95 // Depth formats
94 Z32F = 72, 96 Z32F = 74,
95 Z16 = 73, 97 Z16 = 75,
96 98
97 MaxDepthFormat, 99 MaxDepthFormat,
98 100
99 // DepthStencil formats 101 // DepthStencil formats
100 Z24S8 = 74, 102 Z24S8 = 76,
101 S8Z24 = 75, 103 S8Z24 = 77,
102 Z32FS8 = 76, 104 Z32FS8 = 78,
103 105
104 MaxDepthStencilFormat, 106 MaxDepthStencilFormat,
105 107
@@ -137,6 +139,7 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{
137 0, // R8UI 139 0, // R8UI
138 0, // RGBA16F 140 0, // RGBA16F
139 0, // RGBA16U 141 0, // RGBA16U
142 0, // RGBA16S
140 0, // RGBA16UI 143 0, // RGBA16UI
141 0, // R11FG11FB10F 144 0, // R11FG11FB10F
142 0, // RGBA32UI 145 0, // RGBA32UI
@@ -171,6 +174,7 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{
171 0, // RG32UI 174 0, // RG32UI
172 0, // RGBX16F 175 0, // RGBX16F
173 0, // R32UI 176 0, // R32UI
177 0, // R32I
174 2, // ASTC_2D_8X8 178 2, // ASTC_2D_8X8
175 2, // ASTC_2D_8X5 179 2, // ASTC_2D_8X5
176 2, // ASTC_2D_5X4 180 2, // ASTC_2D_5X4
@@ -233,6 +237,7 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{
233 1, // R8UI 237 1, // R8UI
234 1, // RGBA16F 238 1, // RGBA16F
235 1, // RGBA16U 239 1, // RGBA16U
240 1, // RGBA16S
236 1, // RGBA16UI 241 1, // RGBA16UI
237 1, // R11FG11FB10F 242 1, // R11FG11FB10F
238 1, // RGBA32UI 243 1, // RGBA32UI
@@ -267,6 +272,7 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{
267 1, // RG32UI 272 1, // RG32UI
268 1, // RGBX16F 273 1, // RGBX16F
269 1, // R32UI 274 1, // R32UI
275 1, // R32I
270 8, // ASTC_2D_8X8 276 8, // ASTC_2D_8X8
271 8, // ASTC_2D_8X5 277 8, // ASTC_2D_8X5
272 5, // ASTC_2D_5X4 278 5, // ASTC_2D_5X4
@@ -321,6 +327,7 @@ constexpr std::array<u32, MaxPixelFormat> block_height_table = {{
321 1, // R8UI 327 1, // R8UI
322 1, // RGBA16F 328 1, // RGBA16F
323 1, // RGBA16U 329 1, // RGBA16U
330 1, // RGBA16S
324 1, // RGBA16UI 331 1, // RGBA16UI
325 1, // R11FG11FB10F 332 1, // R11FG11FB10F
326 1, // RGBA32UI 333 1, // RGBA32UI
@@ -355,6 +362,7 @@ constexpr std::array<u32, MaxPixelFormat> block_height_table = {{
355 1, // RG32UI 362 1, // RG32UI
356 1, // RGBX16F 363 1, // RGBX16F
357 1, // R32UI 364 1, // R32UI
365 1, // R32I
358 8, // ASTC_2D_8X8 366 8, // ASTC_2D_8X8
359 5, // ASTC_2D_8X5 367 5, // ASTC_2D_8X5
360 4, // ASTC_2D_5X4 368 4, // ASTC_2D_5X4
@@ -409,6 +417,7 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{
409 8, // R8UI 417 8, // R8UI
410 64, // RGBA16F 418 64, // RGBA16F
411 64, // RGBA16U 419 64, // RGBA16U
420 64, // RGBA16S
412 64, // RGBA16UI 421 64, // RGBA16UI
413 32, // R11FG11FB10F 422 32, // R11FG11FB10F
414 128, // RGBA32UI 423 128, // RGBA32UI
@@ -443,6 +452,7 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{
443 64, // RG32UI 452 64, // RG32UI
444 64, // RGBX16F 453 64, // RGBX16F
445 32, // R32UI 454 32, // R32UI
455 32, // R32I
446 128, // ASTC_2D_8X8 456 128, // ASTC_2D_8X8
447 128, // ASTC_2D_8X5 457 128, // ASTC_2D_8X5
448 128, // ASTC_2D_5X4 458 128, // ASTC_2D_5X4
@@ -512,6 +522,7 @@ constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table
512 SurfaceCompression::None, // R8UI 522 SurfaceCompression::None, // R8UI
513 SurfaceCompression::None, // RGBA16F 523 SurfaceCompression::None, // RGBA16F
514 SurfaceCompression::None, // RGBA16U 524 SurfaceCompression::None, // RGBA16U
525 SurfaceCompression::None, // RGBA16S
515 SurfaceCompression::None, // RGBA16UI 526 SurfaceCompression::None, // RGBA16UI
516 SurfaceCompression::None, // R11FG11FB10F 527 SurfaceCompression::None, // R11FG11FB10F
517 SurfaceCompression::None, // RGBA32UI 528 SurfaceCompression::None, // RGBA32UI
@@ -546,6 +557,7 @@ constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table
546 SurfaceCompression::None, // RG32UI 557 SurfaceCompression::None, // RG32UI
547 SurfaceCompression::None, // RGBX16F 558 SurfaceCompression::None, // RGBX16F
548 SurfaceCompression::None, // R32UI 559 SurfaceCompression::None, // R32UI
560 SurfaceCompression::None, // R32I
549 SurfaceCompression::Converted, // ASTC_2D_8X8 561 SurfaceCompression::Converted, // ASTC_2D_8X8
550 SurfaceCompression::Converted, // ASTC_2D_8X5 562 SurfaceCompression::Converted, // ASTC_2D_8X5
551 SurfaceCompression::Converted, // ASTC_2D_5X4 563 SurfaceCompression::Converted, // ASTC_2D_5X4
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index 81fb9f633..e151c26c4 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -41,7 +41,7 @@ struct Table {
41 ComponentType alpha_component; 41 ComponentType alpha_component;
42 bool is_srgb; 42 bool is_srgb;
43}; 43};
44constexpr std::array<Table, 74> DefinitionTable = {{ 44constexpr std::array<Table, 76> DefinitionTable = {{
45 {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U}, 45 {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U},
46 {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S}, 46 {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S},
47 {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI}, 47 {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI},
@@ -61,6 +61,7 @@ constexpr std::array<Table, 74> DefinitionTable = {{
61 {TextureFormat::G8R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RG8U}, 61 {TextureFormat::G8R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RG8U},
62 {TextureFormat::G8R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RG8S}, 62 {TextureFormat::G8R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RG8S},
63 63
64 {TextureFormat::R16_G16_B16_A16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RGBA16S},
64 {TextureFormat::R16_G16_B16_A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RGBA16U}, 65 {TextureFormat::R16_G16_B16_A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RGBA16U},
65 {TextureFormat::R16_G16_B16_A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGBA16F}, 66 {TextureFormat::R16_G16_B16_A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGBA16F},
66 {TextureFormat::R16_G16_B16_A16, C, UINT, UINT, UINT, UINT, PixelFormat::RGBA16UI}, 67 {TextureFormat::R16_G16_B16_A16, C, UINT, UINT, UINT, UINT, PixelFormat::RGBA16UI},
@@ -89,6 +90,7 @@ constexpr std::array<Table, 74> DefinitionTable = {{
89 90
90 {TextureFormat::R32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32F}, 91 {TextureFormat::R32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32F},
91 {TextureFormat::R32, C, UINT, UINT, UINT, UINT, PixelFormat::R32UI}, 92 {TextureFormat::R32, C, UINT, UINT, UINT, UINT, PixelFormat::R32UI},
93 {TextureFormat::R32, C, SINT, SINT, SINT, SINT, PixelFormat::R32I},
92 94
93 {TextureFormat::E5B9G9R9_SHAREDEXP, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::E5B9G9R9F}, 95 {TextureFormat::E5B9G9R9_SHAREDEXP, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::E5B9G9R9F},
94 96
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
index 38b3a4ba8..9931c5ef7 100644
--- a/src/video_core/texture_cache/surface_params.cpp
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -84,19 +84,16 @@ SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_ta
84 if (entry.IsShadow() && params.type == SurfaceType::ColorTexture) { 84 if (entry.IsShadow() && params.type == SurfaceType::ColorTexture) {
85 switch (params.pixel_format) { 85 switch (params.pixel_format) {
86 case PixelFormat::R16U: 86 case PixelFormat::R16U:
87 case PixelFormat::R16F: { 87 case PixelFormat::R16F:
88 params.pixel_format = PixelFormat::Z16; 88 params.pixel_format = PixelFormat::Z16;
89 break; 89 break;
90 } 90 case PixelFormat::R32F:
91 case PixelFormat::R32F: {
92 params.pixel_format = PixelFormat::Z32F; 91 params.pixel_format = PixelFormat::Z32F;
93 break; 92 break;
94 } 93 default:
95 default: {
96 UNIMPLEMENTED_MSG("Unimplemented shadow convert format: {}", 94 UNIMPLEMENTED_MSG("Unimplemented shadow convert format: {}",
97 static_cast<u32>(params.pixel_format)); 95 static_cast<u32>(params.pixel_format));
98 } 96 }
99 }
100 params.type = GetFormatType(params.pixel_format); 97 params.type = GetFormatType(params.pixel_format);
101 } 98 }
102 params.type = GetFormatType(params.pixel_format); 99 params.type = GetFormatType(params.pixel_format);
@@ -116,8 +113,10 @@ SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_ta
116 params.height = tic.Height(); 113 params.height = tic.Height();
117 params.depth = tic.Depth(); 114 params.depth = tic.Depth();
118 params.pitch = params.is_tiled ? 0 : tic.Pitch(); 115 params.pitch = params.is_tiled ? 0 : tic.Pitch();
119 if (params.target == SurfaceTarget::TextureCubemap || 116 if (params.target == SurfaceTarget::Texture2D && params.depth > 1) {
120 params.target == SurfaceTarget::TextureCubeArray) { 117 params.depth = 1;
118 } else if (params.target == SurfaceTarget::TextureCubemap ||
119 params.target == SurfaceTarget::TextureCubeArray) {
121 params.depth *= 6; 120 params.depth *= 6;
122 } 121 }
123 params.num_levels = tic.max_mip_level + 1; 122 params.num_levels = tic.max_mip_level + 1;
@@ -168,27 +167,29 @@ SurfaceParams SurfaceParams::CreateForImage(const FormatLookupTable& lookup_tabl
168 return params; 167 return params;
169} 168}
170 169
171SurfaceParams SurfaceParams::CreateForDepthBuffer( 170SurfaceParams SurfaceParams::CreateForDepthBuffer(Core::System& system) {
172 Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format, 171 const auto& regs = system.GPU().Maxwell3D().regs;
173 u32 block_width, u32 block_height, u32 block_depth, 172 regs.zeta_width, regs.zeta_height, regs.zeta.format, regs.zeta.memory_layout.type;
174 Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) {
175 SurfaceParams params; 173 SurfaceParams params;
176 params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; 174 params.is_tiled = regs.zeta.memory_layout.type ==
175 Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
177 params.srgb_conversion = false; 176 params.srgb_conversion = false;
178 params.block_width = std::min(block_width, 5U); 177 params.block_width = std::min(regs.zeta.memory_layout.block_width.Value(), 5U);
179 params.block_height = std::min(block_height, 5U); 178 params.block_height = std::min(regs.zeta.memory_layout.block_height.Value(), 5U);
180 params.block_depth = std::min(block_depth, 5U); 179 params.block_depth = std::min(regs.zeta.memory_layout.block_depth.Value(), 5U);
181 params.tile_width_spacing = 1; 180 params.tile_width_spacing = 1;
182 params.pixel_format = PixelFormatFromDepthFormat(format); 181 params.pixel_format = PixelFormatFromDepthFormat(regs.zeta.format);
183 params.type = GetFormatType(params.pixel_format); 182 params.type = GetFormatType(params.pixel_format);
184 params.width = zeta_width; 183 params.width = regs.zeta_width;
185 params.height = zeta_height; 184 params.height = regs.zeta_height;
186 params.target = SurfaceTarget::Texture2D;
187 params.depth = 1;
188 params.pitch = 0; 185 params.pitch = 0;
189 params.num_levels = 1; 186 params.num_levels = 1;
190 params.emulated_levels = 1; 187 params.emulated_levels = 1;
191 params.is_layered = false; 188
189 const bool is_layered = regs.zeta_layers > 1 && params.block_depth == 0;
190 params.is_layered = is_layered;
191 params.target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D;
192 params.depth = is_layered ? regs.zeta_layers.Value() : 1U;
192 return params; 193 return params;
193} 194}
194 195
@@ -214,11 +215,13 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz
214 params.width = params.pitch / bpp; 215 params.width = params.pitch / bpp;
215 } 216 }
216 params.height = config.height; 217 params.height = config.height;
217 params.depth = 1;
218 params.target = SurfaceTarget::Texture2D;
219 params.num_levels = 1; 218 params.num_levels = 1;
220 params.emulated_levels = 1; 219 params.emulated_levels = 1;
221 params.is_layered = false; 220
221 const bool is_layered = config.layers > 1 && params.block_depth == 0;
222 params.is_layered = is_layered;
223 params.depth = is_layered ? config.layers.Value() : 1;
224 params.target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D;
222 return params; 225 return params;
223} 226}
224 227
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h
index 9256fd6d9..995cc3818 100644
--- a/src/video_core/texture_cache/surface_params.h
+++ b/src/video_core/texture_cache/surface_params.h
@@ -35,10 +35,7 @@ public:
35 const VideoCommon::Shader::Image& entry); 35 const VideoCommon::Shader::Image& entry);
36 36
37 /// Creates SurfaceCachedParams for a depth buffer configuration. 37 /// Creates SurfaceCachedParams for a depth buffer configuration.
38 static SurfaceParams CreateForDepthBuffer( 38 static SurfaceParams CreateForDepthBuffer(Core::System& system);
39 Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format,
40 u32 block_width, u32 block_height, u32 block_depth,
41 Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type);
42 39
43 /// Creates SurfaceCachedParams from a framebuffer configuration. 40 /// Creates SurfaceCachedParams from a framebuffer configuration.
44 static SurfaceParams CreateForFramebuffer(Core::System& system, std::size_t index); 41 static SurfaceParams CreateForFramebuffer(Core::System& system, std::size_t index);
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 0d105d386..6cdbe63d0 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -22,6 +22,7 @@
22#include "core/core.h" 22#include "core/core.h"
23#include "core/memory.h" 23#include "core/memory.h"
24#include "core/settings.h" 24#include "core/settings.h"
25#include "video_core/dirty_flags.h"
25#include "video_core/engines/fermi_2d.h" 26#include "video_core/engines/fermi_2d.h"
26#include "video_core/engines/maxwell_3d.h" 27#include "video_core/engines/maxwell_3d.h"
27#include "video_core/gpu.h" 28#include "video_core/gpu.h"
@@ -103,6 +104,11 @@ public:
103 if (!cache_addr) { 104 if (!cache_addr) {
104 return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); 105 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
105 } 106 }
107
108 if (!IsTypeCompatible(tic.texture_type, entry)) {
109 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
110 }
111
106 const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; 112 const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)};
107 const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false); 113 const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false);
108 if (guard_samplers) { 114 if (guard_samplers) {
@@ -142,11 +148,10 @@ public:
142 TView GetDepthBufferSurface(bool preserve_contents) { 148 TView GetDepthBufferSurface(bool preserve_contents) {
143 std::lock_guard lock{mutex}; 149 std::lock_guard lock{mutex};
144 auto& maxwell3d = system.GPU().Maxwell3D(); 150 auto& maxwell3d = system.GPU().Maxwell3D();
145 151 if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer]) {
146 if (!maxwell3d.dirty.depth_buffer) {
147 return depth_buffer.view; 152 return depth_buffer.view;
148 } 153 }
149 maxwell3d.dirty.depth_buffer = false; 154 maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer] = false;
150 155
151 const auto& regs{maxwell3d.regs}; 156 const auto& regs{maxwell3d.regs};
152 const auto gpu_addr{regs.zeta.Address()}; 157 const auto gpu_addr{regs.zeta.Address()};
@@ -160,10 +165,7 @@ public:
160 SetEmptyDepthBuffer(); 165 SetEmptyDepthBuffer();
161 return {}; 166 return {};
162 } 167 }
163 const auto depth_params{SurfaceParams::CreateForDepthBuffer( 168 const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)};
164 system, regs.zeta_width, regs.zeta_height, regs.zeta.format,
165 regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height,
166 regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)};
167 auto surface_view = GetSurface(gpu_addr, cache_addr, depth_params, preserve_contents, true); 169 auto surface_view = GetSurface(gpu_addr, cache_addr, depth_params, preserve_contents, true);
168 if (depth_buffer.target) 170 if (depth_buffer.target)
169 depth_buffer.target->MarkAsRenderTarget(false, NO_RT); 171 depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
@@ -178,10 +180,10 @@ public:
178 std::lock_guard lock{mutex}; 180 std::lock_guard lock{mutex};
179 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); 181 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
180 auto& maxwell3d = system.GPU().Maxwell3D(); 182 auto& maxwell3d = system.GPU().Maxwell3D();
181 if (!maxwell3d.dirty.render_target[index]) { 183 if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index]) {
182 return render_targets[index].view; 184 return render_targets[index].view;
183 } 185 }
184 maxwell3d.dirty.render_target[index] = false; 186 maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = false;
185 187
186 const auto& regs{maxwell3d.regs}; 188 const auto& regs{maxwell3d.regs};
187 if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || 189 if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 ||
@@ -323,14 +325,14 @@ protected:
323 virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; 325 virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0;
324 326
325 void ManageRenderTargetUnregister(TSurface& surface) { 327 void ManageRenderTargetUnregister(TSurface& surface) {
326 auto& maxwell3d = system.GPU().Maxwell3D(); 328 auto& dirty = system.GPU().Maxwell3D().dirty;
327 const u32 index = surface->GetRenderTarget(); 329 const u32 index = surface->GetRenderTarget();
328 if (index == DEPTH_RT) { 330 if (index == DEPTH_RT) {
329 maxwell3d.dirty.depth_buffer = true; 331 dirty.flags[VideoCommon::Dirty::ZetaBuffer] = true;
330 } else { 332 } else {
331 maxwell3d.dirty.render_target[index] = true; 333 dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = true;
332 } 334 }
333 maxwell3d.dirty.render_settings = true; 335 dirty.flags[VideoCommon::Dirty::RenderTargets] = true;
334 } 336 }
335 337
336 void Register(TSurface surface) { 338 void Register(TSurface surface) {
@@ -917,13 +919,15 @@ private:
917 params.width = 1; 919 params.width = 1;
918 params.height = 1; 920 params.height = 1;
919 params.depth = 1; 921 params.depth = 1;
922 if (target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray) {
923 params.depth = 6;
924 }
920 params.pitch = 4; 925 params.pitch = 4;
921 params.num_levels = 1; 926 params.num_levels = 1;
922 params.emulated_levels = 1; 927 params.emulated_levels = 1;
923 params.pixel_format = VideoCore::Surface::PixelFormat::RGBA16F; 928 params.pixel_format = VideoCore::Surface::PixelFormat::R8U;
924 params.type = VideoCore::Surface::SurfaceType::ColorTexture; 929 params.type = VideoCore::Surface::SurfaceType::ColorTexture;
925 auto surface = CreateSurface(0ULL, params); 930 auto surface = CreateSurface(0ULL, params);
926 invalid_memory.clear();
927 invalid_memory.resize(surface->GetHostSizeInBytes(), 0U); 931 invalid_memory.resize(surface->GetHostSizeInBytes(), 0U);
928 surface->UploadTexture(invalid_memory); 932 surface->UploadTexture(invalid_memory);
929 surface->MarkAsModified(false, Tick()); 933 surface->MarkAsModified(false, Tick());
@@ -1085,6 +1089,36 @@ private:
1085 return siblings_table[static_cast<std::size_t>(format)]; 1089 return siblings_table[static_cast<std::size_t>(format)];
1086 } 1090 }
1087 1091
1092 /// Returns true the shader sampler entry is compatible with the TIC texture type.
1093 static bool IsTypeCompatible(Tegra::Texture::TextureType tic_type,
1094 const VideoCommon::Shader::Sampler& entry) {
1095 const auto shader_type = entry.GetType();
1096 switch (tic_type) {
1097 case Tegra::Texture::TextureType::Texture1D:
1098 case Tegra::Texture::TextureType::Texture1DArray:
1099 return shader_type == Tegra::Shader::TextureType::Texture1D;
1100 case Tegra::Texture::TextureType::Texture1DBuffer:
1101 // TODO(Rodrigo): Assume as valid for now
1102 return true;
1103 case Tegra::Texture::TextureType::Texture2D:
1104 case Tegra::Texture::TextureType::Texture2DNoMipmap:
1105 return shader_type == Tegra::Shader::TextureType::Texture2D;
1106 case Tegra::Texture::TextureType::Texture2DArray:
1107 return shader_type == Tegra::Shader::TextureType::Texture2D ||
1108 shader_type == Tegra::Shader::TextureType::TextureCube;
1109 case Tegra::Texture::TextureType::Texture3D:
1110 return shader_type == Tegra::Shader::TextureType::Texture3D;
1111 case Tegra::Texture::TextureType::TextureCubeArray:
1112 case Tegra::Texture::TextureType::TextureCubemap:
1113 if (shader_type == Tegra::Shader::TextureType::TextureCube) {
1114 return true;
1115 }
1116 return shader_type == Tegra::Shader::TextureType::Texture2D && entry.IsArray();
1117 }
1118 UNREACHABLE();
1119 return true;
1120 }
1121
1088 struct FramebufferTargetInfo { 1122 struct FramebufferTargetInfo {
1089 TSurface target; 1123 TSurface target;
1090 TView view; 1124 TView view;
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index 33bd31865..062b4f252 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -17,26 +17,37 @@
17 17
18#include <algorithm> 18#include <algorithm>
19#include <cassert> 19#include <cassert>
20#include <cstdint>
21#include <cstring> 20#include <cstring>
22#include <vector> 21#include <vector>
23 22
23#include "common/common_types.h"
24
24#include "video_core/textures/astc.h" 25#include "video_core/textures/astc.h"
25 26
27namespace {
28
29/// Count the number of bits set in a number.
30constexpr u32 Popcnt(u32 n) {
31 u32 c = 0;
32 for (; n; c++) {
33 n &= n - 1;
34 }
35 return c;
36}
37
38} // Anonymous namespace
39
26class InputBitStream { 40class InputBitStream {
27public: 41public:
28 explicit InputBitStream(const unsigned char* ptr, int start_offset = 0) 42 explicit InputBitStream(const u8* ptr, std::size_t start_offset = 0)
29 : m_CurByte(ptr), m_NextBit(start_offset % 8) {} 43 : m_CurByte(ptr), m_NextBit(start_offset % 8) {}
30 44
31 ~InputBitStream() = default; 45 std::size_t GetBitsRead() const {
32
33 int GetBitsRead() const {
34 return m_BitsRead; 46 return m_BitsRead;
35 } 47 }
36 48
37 int ReadBit() { 49 u32 ReadBit() {
38 50 u32 bit = *m_CurByte >> m_NextBit++;
39 int bit = *m_CurByte >> m_NextBit++;
40 while (m_NextBit >= 8) { 51 while (m_NextBit >= 8) {
41 m_NextBit -= 8; 52 m_NextBit -= 8;
42 m_CurByte++; 53 m_CurByte++;
@@ -46,57 +57,66 @@ public:
46 return bit & 1; 57 return bit & 1;
47 } 58 }
48 59
49 unsigned int ReadBits(unsigned int nBits) { 60 u32 ReadBits(std::size_t nBits) {
50 unsigned int ret = 0; 61 u32 ret = 0;
51 for (unsigned int i = 0; i < nBits; i++) { 62 for (std::size_t i = 0; i < nBits; ++i) {
63 ret |= (ReadBit() & 1) << i;
64 }
65 return ret;
66 }
67
68 template <std::size_t nBits>
69 u32 ReadBits() {
70 u32 ret = 0;
71 for (std::size_t i = 0; i < nBits; ++i) {
52 ret |= (ReadBit() & 1) << i; 72 ret |= (ReadBit() & 1) << i;
53 } 73 }
54 return ret; 74 return ret;
55 } 75 }
56 76
57private: 77private:
58 const unsigned char* m_CurByte; 78 const u8* m_CurByte;
59 int m_NextBit = 0; 79 std::size_t m_NextBit = 0;
60 int m_BitsRead = 0; 80 std::size_t m_BitsRead = 0;
61}; 81};
62 82
63class OutputBitStream { 83class OutputBitStream {
64public: 84public:
65 explicit OutputBitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0) 85 explicit OutputBitStream(u8* ptr, s32 nBits = 0, s32 start_offset = 0)
66 : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {} 86 : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {}
67 87
68 ~OutputBitStream() = default; 88 ~OutputBitStream() = default;
69 89
70 int GetBitsWritten() const { 90 s32 GetBitsWritten() const {
71 return m_BitsWritten; 91 return m_BitsWritten;
72 } 92 }
73 93
74 void WriteBitsR(unsigned int val, unsigned int nBits) { 94 void WriteBitsR(u32 val, u32 nBits) {
75 for (unsigned int i = 0; i < nBits; i++) { 95 for (u32 i = 0; i < nBits; i++) {
76 WriteBit((val >> (nBits - i - 1)) & 1); 96 WriteBit((val >> (nBits - i - 1)) & 1);
77 } 97 }
78 } 98 }
79 99
80 void WriteBits(unsigned int val, unsigned int nBits) { 100 void WriteBits(u32 val, u32 nBits) {
81 for (unsigned int i = 0; i < nBits; i++) { 101 for (u32 i = 0; i < nBits; i++) {
82 WriteBit((val >> i) & 1); 102 WriteBit((val >> i) & 1);
83 } 103 }
84 } 104 }
85 105
86private: 106private:
87 void WriteBit(int b) { 107 void WriteBit(s32 b) {
88 108
89 if (done) 109 if (done)
90 return; 110 return;
91 111
92 const unsigned int mask = 1 << m_NextBit++; 112 const u32 mask = 1 << m_NextBit++;
93 113
94 // clear the bit 114 // clear the bit
95 *m_CurByte &= static_cast<unsigned char>(~mask); 115 *m_CurByte &= static_cast<u8>(~mask);
96 116
97 // Write the bit, if necessary 117 // Write the bit, if necessary
98 if (b) 118 if (b)
99 *m_CurByte |= static_cast<unsigned char>(mask); 119 *m_CurByte |= static_cast<u8>(mask);
100 120
101 // Next byte? 121 // Next byte?
102 if (m_NextBit >= 8) { 122 if (m_NextBit >= 8) {
@@ -107,10 +127,10 @@ private:
107 done = done || ++m_BitsWritten >= m_NumBits; 127 done = done || ++m_BitsWritten >= m_NumBits;
108 } 128 }
109 129
110 int m_BitsWritten = 0; 130 s32 m_BitsWritten = 0;
111 const int m_NumBits; 131 const s32 m_NumBits;
112 unsigned char* m_CurByte; 132 u8* m_CurByte;
113 int m_NextBit = 0; 133 s32 m_NextBit = 0;
114 134
115 bool done = false; 135 bool done = false;
116}; 136};
@@ -123,20 +143,20 @@ public:
123 Bits(const Bits&) = delete; 143 Bits(const Bits&) = delete;
124 Bits& operator=(const Bits&) = delete; 144 Bits& operator=(const Bits&) = delete;
125 145
126 uint8_t operator[](uint32_t bitPos) const { 146 u8 operator[](u32 bitPos) const {
127 return static_cast<uint8_t>((m_Bits >> bitPos) & 1); 147 return static_cast<u8>((m_Bits >> bitPos) & 1);
128 } 148 }
129 149
130 IntType operator()(uint32_t start, uint32_t end) const { 150 IntType operator()(u32 start, u32 end) const {
131 if (start == end) { 151 if (start == end) {
132 return (*this)[start]; 152 return (*this)[start];
133 } else if (start > end) { 153 } else if (start > end) {
134 uint32_t t = start; 154 u32 t = start;
135 start = end; 155 start = end;
136 end = t; 156 end = t;
137 } 157 }
138 158
139 uint64_t mask = (1 << (end - start + 1)) - 1; 159 u64 mask = (1 << (end - start + 1)) - 1;
140 return (m_Bits >> start) & static_cast<IntType>(mask); 160 return (m_Bits >> start) & static_cast<IntType>(mask);
141 } 161 }
142 162
@@ -144,273 +164,236 @@ private:
144 const IntType& m_Bits; 164 const IntType& m_Bits;
145}; 165};
146 166
147enum EIntegerEncoding { eIntegerEncoding_JustBits, eIntegerEncoding_Quint, eIntegerEncoding_Trit }; 167enum class IntegerEncoding { JustBits, Qus32, Trit };
148
149class IntegerEncodedValue {
150private:
151 const EIntegerEncoding m_Encoding;
152 const uint32_t m_NumBits;
153 uint32_t m_BitValue;
154 union {
155 uint32_t m_QuintValue;
156 uint32_t m_TritValue;
157 };
158 168
159public: 169struct IntegerEncodedValue {
160 // Jank, but we're not doing any heavy lifting in this class, so it's 170 constexpr IntegerEncodedValue() = default;
161 // probably OK. It allows us to use these in std::vectors...
162 IntegerEncodedValue& operator=(const IntegerEncodedValue& other) {
163 new (this) IntegerEncodedValue(other);
164 return *this;
165 }
166 171
167 IntegerEncodedValue(EIntegerEncoding encoding, uint32_t numBits) 172 constexpr IntegerEncodedValue(IntegerEncoding encoding_, u32 num_bits_)
168 : m_Encoding(encoding), m_NumBits(numBits) {} 173 : encoding{encoding_}, num_bits{num_bits_} {}
169 174
170 EIntegerEncoding GetEncoding() const { 175 constexpr bool MatchesEncoding(const IntegerEncodedValue& other) const {
171 return m_Encoding; 176 return encoding == other.encoding && num_bits == other.num_bits;
172 }
173 uint32_t BaseBitLength() const {
174 return m_NumBits;
175 }
176
177 uint32_t GetBitValue() const {
178 return m_BitValue;
179 }
180 void SetBitValue(uint32_t val) {
181 m_BitValue = val;
182 }
183
184 uint32_t GetTritValue() const {
185 return m_TritValue;
186 }
187 void SetTritValue(uint32_t val) {
188 m_TritValue = val;
189 }
190
191 uint32_t GetQuintValue() const {
192 return m_QuintValue;
193 }
194 void SetQuintValue(uint32_t val) {
195 m_QuintValue = val;
196 }
197
198 bool MatchesEncoding(const IntegerEncodedValue& other) const {
199 return m_Encoding == other.m_Encoding && m_NumBits == other.m_NumBits;
200 } 177 }
201 178
202 // Returns the number of bits required to encode nVals values. 179 // Returns the number of bits required to encode nVals values.
203 uint32_t GetBitLength(uint32_t nVals) const { 180 u32 GetBitLength(u32 nVals) const {
204 uint32_t totalBits = m_NumBits * nVals; 181 u32 totalBits = num_bits * nVals;
205 if (m_Encoding == eIntegerEncoding_Trit) { 182 if (encoding == IntegerEncoding::Trit) {
206 totalBits += (nVals * 8 + 4) / 5; 183 totalBits += (nVals * 8 + 4) / 5;
207 } else if (m_Encoding == eIntegerEncoding_Quint) { 184 } else if (encoding == IntegerEncoding::Qus32) {
208 totalBits += (nVals * 7 + 2) / 3; 185 totalBits += (nVals * 7 + 2) / 3;
209 } 186 }
210 return totalBits; 187 return totalBits;
211 } 188 }
212 189
213 // Count the number of bits set in a number. 190 IntegerEncoding encoding{};
214 static inline uint32_t Popcnt(uint32_t n) { 191 u32 num_bits = 0;
215 uint32_t c; 192 u32 bit_value = 0;
216 for (c = 0; n; c++) { 193 union {
217 n &= n - 1; 194 u32 qus32_value = 0;
195 u32 trit_value;
196 };
197};
198
199static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result,
200 u32 nBitsPerValue) {
201 // Implement the algorithm in section C.2.12
202 u32 m[5];
203 u32 t[5];
204 u32 T;
205
206 // Read the trit encoded block according to
207 // table C.2.14
208 m[0] = bits.ReadBits(nBitsPerValue);
209 T = bits.ReadBits<2>();
210 m[1] = bits.ReadBits(nBitsPerValue);
211 T |= bits.ReadBits<2>() << 2;
212 m[2] = bits.ReadBits(nBitsPerValue);
213 T |= bits.ReadBit() << 4;
214 m[3] = bits.ReadBits(nBitsPerValue);
215 T |= bits.ReadBits<2>() << 5;
216 m[4] = bits.ReadBits(nBitsPerValue);
217 T |= bits.ReadBit() << 7;
218
219 u32 C = 0;
220
221 Bits<u32> Tb(T);
222 if (Tb(2, 4) == 7) {
223 C = (Tb(5, 7) << 2) | Tb(0, 1);
224 t[4] = t[3] = 2;
225 } else {
226 C = Tb(0, 4);
227 if (Tb(5, 6) == 3) {
228 t[4] = 2;
229 t[3] = Tb[7];
230 } else {
231 t[4] = Tb[7];
232 t[3] = Tb(5, 6);
218 } 233 }
219 return c;
220 } 234 }
221 235
222 // Returns a new instance of this struct that corresponds to the 236 Bits<u32> Cb(C);
223 // can take no more than maxval values 237 if (Cb(0, 1) == 3) {
224 static IntegerEncodedValue CreateEncoding(uint32_t maxVal) { 238 t[2] = 2;
225 while (maxVal > 0) { 239 t[1] = Cb[4];
226 uint32_t check = maxVal + 1; 240 t[0] = (Cb[3] << 1) | (Cb[2] & ~Cb[3]);
227 241 } else if (Cb(2, 3) == 3) {
228 // Is maxVal a power of two? 242 t[2] = 2;
229 if (!(check & (check - 1))) { 243 t[1] = 2;
230 return IntegerEncodedValue(eIntegerEncoding_JustBits, Popcnt(maxVal)); 244 t[0] = Cb(0, 1);
231 } 245 } else {
232 246 t[2] = Cb[4];
233 // Is maxVal of the type 3*2^n - 1? 247 t[1] = Cb(2, 3);
234 if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) { 248 t[0] = (Cb[1] << 1) | (Cb[0] & ~Cb[1]);
235 return IntegerEncodedValue(eIntegerEncoding_Trit, Popcnt(check / 3 - 1)); 249 }
236 }
237 250
238 // Is maxVal of the type 5*2^n - 1? 251 for (std::size_t i = 0; i < 5; ++i) {
239 if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) { 252 IntegerEncodedValue& val = result.emplace_back(IntegerEncoding::Trit, nBitsPerValue);
240 return IntegerEncodedValue(eIntegerEncoding_Quint, Popcnt(check / 5 - 1)); 253 val.bit_value = m[i];
241 } 254 val.trit_value = t[i];
255 }
256}
242 257
243 // Apparently it can't be represented with a bounded integer sequence... 258static void DecodeQus32Block(InputBitStream& bits, std::vector<IntegerEncodedValue>& result,
244 // just iterate. 259 u32 nBitsPerValue) {
245 maxVal--; 260 // Implement the algorithm in section C.2.12
261 u32 m[3];
262 u32 q[3];
263 u32 Q;
264
265 // Read the trit encoded block according to
266 // table C.2.15
267 m[0] = bits.ReadBits(nBitsPerValue);
268 Q = bits.ReadBits<3>();
269 m[1] = bits.ReadBits(nBitsPerValue);
270 Q |= bits.ReadBits<2>() << 3;
271 m[2] = bits.ReadBits(nBitsPerValue);
272 Q |= bits.ReadBits<2>() << 5;
273
274 Bits<u32> Qb(Q);
275 if (Qb(1, 2) == 3 && Qb(5, 6) == 0) {
276 q[0] = q[1] = 4;
277 q[2] = (Qb[0] << 2) | ((Qb[4] & ~Qb[0]) << 1) | (Qb[3] & ~Qb[0]);
278 } else {
279 u32 C = 0;
280 if (Qb(1, 2) == 3) {
281 q[2] = 4;
282 C = (Qb(3, 4) << 3) | ((~Qb(5, 6) & 3) << 1) | Qb[0];
283 } else {
284 q[2] = Qb(5, 6);
285 C = Qb(0, 4);
246 } 286 }
247 return IntegerEncodedValue(eIntegerEncoding_JustBits, 0);
248 }
249
250 // Fills result with the values that are encoded in the given
251 // bitstream. We must know beforehand what the maximum possible
252 // value is, and how many values we're decoding.
253 static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result,
254 InputBitStream& bits, uint32_t maxRange, uint32_t nValues) {
255 // Determine encoding parameters
256 IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(maxRange);
257
258 // Start decoding
259 uint32_t nValsDecoded = 0;
260 while (nValsDecoded < nValues) {
261 switch (val.GetEncoding()) {
262 case eIntegerEncoding_Quint:
263 DecodeQuintBlock(bits, result, val.BaseBitLength());
264 nValsDecoded += 3;
265 break;
266 287
267 case eIntegerEncoding_Trit: 288 Bits<u32> Cb(C);
268 DecodeTritBlock(bits, result, val.BaseBitLength()); 289 if (Cb(0, 2) == 5) {
269 nValsDecoded += 5; 290 q[1] = 4;
270 break; 291 q[0] = Cb(3, 4);
271 292 } else {
272 case eIntegerEncoding_JustBits: 293 q[1] = Cb(3, 4);
273 val.SetBitValue(bits.ReadBits(val.BaseBitLength())); 294 q[0] = Cb(0, 2);
274 result.push_back(val);
275 nValsDecoded++;
276 break;
277 }
278 } 295 }
279 } 296 }
280 297
281private: 298 for (std::size_t i = 0; i < 3; ++i) {
282 static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, 299 IntegerEncodedValue& val = result.emplace_back(IntegerEncoding::Qus32, nBitsPerValue);
283 uint32_t nBitsPerValue) { 300 val.bit_value = m[i];
284 // Implement the algorithm in section C.2.12 301 val.qus32_value = q[i];
285 uint32_t m[5]; 302 }
286 uint32_t t[5]; 303}
287 uint32_t T; 304
288 305// Returns a new instance of this struct that corresponds to the
289 // Read the trit encoded block according to 306// can take no more than maxval values
290 // table C.2.14 307static constexpr IntegerEncodedValue CreateEncoding(u32 maxVal) {
291 m[0] = bits.ReadBits(nBitsPerValue); 308 while (maxVal > 0) {
292 T = bits.ReadBits(2); 309 u32 check = maxVal + 1;
293 m[1] = bits.ReadBits(nBitsPerValue); 310
294 T |= bits.ReadBits(2) << 2; 311 // Is maxVal a power of two?
295 m[2] = bits.ReadBits(nBitsPerValue); 312 if (!(check & (check - 1))) {
296 T |= bits.ReadBit() << 4; 313 return IntegerEncodedValue(IntegerEncoding::JustBits, Popcnt(maxVal));
297 m[3] = bits.ReadBits(nBitsPerValue);
298 T |= bits.ReadBits(2) << 5;
299 m[4] = bits.ReadBits(nBitsPerValue);
300 T |= bits.ReadBit() << 7;
301
302 uint32_t C = 0;
303
304 Bits<uint32_t> Tb(T);
305 if (Tb(2, 4) == 7) {
306 C = (Tb(5, 7) << 2) | Tb(0, 1);
307 t[4] = t[3] = 2;
308 } else {
309 C = Tb(0, 4);
310 if (Tb(5, 6) == 3) {
311 t[4] = 2;
312 t[3] = Tb[7];
313 } else {
314 t[4] = Tb[7];
315 t[3] = Tb(5, 6);
316 }
317 } 314 }
318 315
319 Bits<uint32_t> Cb(C); 316 // Is maxVal of the type 3*2^n - 1?
320 if (Cb(0, 1) == 3) { 317 if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) {
321 t[2] = 2; 318 return IntegerEncodedValue(IntegerEncoding::Trit, Popcnt(check / 3 - 1));
322 t[1] = Cb[4];
323 t[0] = (Cb[3] << 1) | (Cb[2] & ~Cb[3]);
324 } else if (Cb(2, 3) == 3) {
325 t[2] = 2;
326 t[1] = 2;
327 t[0] = Cb(0, 1);
328 } else {
329 t[2] = Cb[4];
330 t[1] = Cb(2, 3);
331 t[0] = (Cb[1] << 1) | (Cb[0] & ~Cb[1]);
332 } 319 }
333 320
334 for (uint32_t i = 0; i < 5; i++) { 321 // Is maxVal of the type 5*2^n - 1?
335 IntegerEncodedValue val(eIntegerEncoding_Trit, nBitsPerValue); 322 if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) {
336 val.SetBitValue(m[i]); 323 return IntegerEncodedValue(IntegerEncoding::Qus32, Popcnt(check / 5 - 1));
337 val.SetTritValue(t[i]);
338 result.push_back(val);
339 } 324 }
325
326 // Apparently it can't be represented with a bounded integer sequence...
327 // just iterate.
328 maxVal--;
340 } 329 }
330 return IntegerEncodedValue(IntegerEncoding::JustBits, 0);
331}
341 332
342 static void DecodeQuintBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, 333static constexpr std::array<IntegerEncodedValue, 256> MakeEncodedValues() {
343 uint32_t nBitsPerValue) { 334 std::array<IntegerEncodedValue, 256> encodings{};
344 // Implement the algorithm in section C.2.12 335 for (std::size_t i = 0; i < encodings.size(); ++i) {
345 uint32_t m[3]; 336 encodings[i] = CreateEncoding(static_cast<u32>(i));
346 uint32_t q[3]; 337 }
347 uint32_t Q; 338 return encodings;
348 339}
349 // Read the trit encoded block according to
350 // table C.2.15
351 m[0] = bits.ReadBits(nBitsPerValue);
352 Q = bits.ReadBits(3);
353 m[1] = bits.ReadBits(nBitsPerValue);
354 Q |= bits.ReadBits(2) << 3;
355 m[2] = bits.ReadBits(nBitsPerValue);
356 Q |= bits.ReadBits(2) << 5;
357
358 Bits<uint32_t> Qb(Q);
359 if (Qb(1, 2) == 3 && Qb(5, 6) == 0) {
360 q[0] = q[1] = 4;
361 q[2] = (Qb[0] << 2) | ((Qb[4] & ~Qb[0]) << 1) | (Qb[3] & ~Qb[0]);
362 } else {
363 uint32_t C = 0;
364 if (Qb(1, 2) == 3) {
365 q[2] = 4;
366 C = (Qb(3, 4) << 3) | ((~Qb(5, 6) & 3) << 1) | Qb[0];
367 } else {
368 q[2] = Qb(5, 6);
369 C = Qb(0, 4);
370 }
371 340
372 Bits<uint32_t> Cb(C); 341static constexpr std::array EncodingsValues = MakeEncodedValues();
373 if (Cb(0, 2) == 5) { 342
374 q[1] = 4; 343// Fills result with the values that are encoded in the given
375 q[0] = Cb(3, 4); 344// bitstream. We must know beforehand what the maximum possible
376 } else { 345// value is, and how many values we're decoding.
377 q[1] = Cb(3, 4); 346static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, InputBitStream& bits,
378 q[0] = Cb(0, 2); 347 u32 maxRange, u32 nValues) {
379 } 348 // Determine encoding parameters
380 } 349 IntegerEncodedValue val = EncodingsValues[maxRange];
350
351 // Start decoding
352 u32 nValsDecoded = 0;
353 while (nValsDecoded < nValues) {
354 switch (val.encoding) {
355 case IntegerEncoding::Qus32:
356 DecodeQus32Block(bits, result, val.num_bits);
357 nValsDecoded += 3;
358 break;
359
360 case IntegerEncoding::Trit:
361 DecodeTritBlock(bits, result, val.num_bits);
362 nValsDecoded += 5;
363 break;
381 364
382 for (uint32_t i = 0; i < 3; i++) { 365 case IntegerEncoding::JustBits:
383 IntegerEncodedValue val(eIntegerEncoding_Quint, nBitsPerValue); 366 val.bit_value = bits.ReadBits(val.num_bits);
384 val.m_BitValue = m[i];
385 val.m_QuintValue = q[i];
386 result.push_back(val); 367 result.push_back(val);
368 nValsDecoded++;
369 break;
387 } 370 }
388 } 371 }
389}; 372}
390 373
391namespace ASTCC { 374namespace ASTCC {
392 375
393struct TexelWeightParams { 376struct TexelWeightParams {
394 uint32_t m_Width = 0; 377 u32 m_Width = 0;
395 uint32_t m_Height = 0; 378 u32 m_Height = 0;
396 bool m_bDualPlane = false; 379 bool m_bDualPlane = false;
397 uint32_t m_MaxWeight = 0; 380 u32 m_MaxWeight = 0;
398 bool m_bError = false; 381 bool m_bError = false;
399 bool m_bVoidExtentLDR = false; 382 bool m_bVoidExtentLDR = false;
400 bool m_bVoidExtentHDR = false; 383 bool m_bVoidExtentHDR = false;
401 384
402 uint32_t GetPackedBitSize() const { 385 u32 GetPackedBitSize() const {
403 // How many indices do we have? 386 // How many indices do we have?
404 uint32_t nIdxs = m_Height * m_Width; 387 u32 nIdxs = m_Height * m_Width;
405 if (m_bDualPlane) { 388 if (m_bDualPlane) {
406 nIdxs *= 2; 389 nIdxs *= 2;
407 } 390 }
408 391
409 return IntegerEncodedValue::CreateEncoding(m_MaxWeight).GetBitLength(nIdxs); 392 return EncodingsValues[m_MaxWeight].GetBitLength(nIdxs);
410 } 393 }
411 394
412 uint32_t GetNumWeightValues() const { 395 u32 GetNumWeightValues() const {
413 uint32_t ret = m_Width * m_Height; 396 u32 ret = m_Width * m_Height;
414 if (m_bDualPlane) { 397 if (m_bDualPlane) {
415 ret *= 2; 398 ret *= 2;
416 } 399 }
@@ -422,7 +405,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
422 TexelWeightParams params; 405 TexelWeightParams params;
423 406
424 // Read the entire block mode all at once 407 // Read the entire block mode all at once
425 uint16_t modeBits = static_cast<uint16_t>(strm.ReadBits(11)); 408 u16 modeBits = static_cast<u16>(strm.ReadBits<11>());
426 409
427 // Does this match the void extent block mode? 410 // Does this match the void extent block mode?
428 if ((modeBits & 0x01FF) == 0x1FC) { 411 if ((modeBits & 0x01FF) == 0x1FC) {
@@ -457,7 +440,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
457 // of the block mode. Layout is determined by a number 440 // of the block mode. Layout is determined by a number
458 // between 0 and 9 corresponding to table C.2.8 of the 441 // between 0 and 9 corresponding to table C.2.8 of the
459 // ASTC spec. 442 // ASTC spec.
460 uint32_t layout = 0; 443 u32 layout = 0;
461 444
462 if ((modeBits & 0x1) || (modeBits & 0x2)) { 445 if ((modeBits & 0x1) || (modeBits & 0x2)) {
463 // layout is in [0-4] 446 // layout is in [0-4]
@@ -509,7 +492,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
509 assert(layout < 10); 492 assert(layout < 10);
510 493
511 // Determine R 494 // Determine R
512 uint32_t R = !!(modeBits & 0x10); 495 u32 R = !!(modeBits & 0x10);
513 if (layout < 5) { 496 if (layout < 5) {
514 R |= (modeBits & 0x3) << 1; 497 R |= (modeBits & 0x3) << 1;
515 } else { 498 } else {
@@ -520,54 +503,54 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
520 // Determine width & height 503 // Determine width & height
521 switch (layout) { 504 switch (layout) {
522 case 0: { 505 case 0: {
523 uint32_t A = (modeBits >> 5) & 0x3; 506 u32 A = (modeBits >> 5) & 0x3;
524 uint32_t B = (modeBits >> 7) & 0x3; 507 u32 B = (modeBits >> 7) & 0x3;
525 params.m_Width = B + 4; 508 params.m_Width = B + 4;
526 params.m_Height = A + 2; 509 params.m_Height = A + 2;
527 break; 510 break;
528 } 511 }
529 512
530 case 1: { 513 case 1: {
531 uint32_t A = (modeBits >> 5) & 0x3; 514 u32 A = (modeBits >> 5) & 0x3;
532 uint32_t B = (modeBits >> 7) & 0x3; 515 u32 B = (modeBits >> 7) & 0x3;
533 params.m_Width = B + 8; 516 params.m_Width = B + 8;
534 params.m_Height = A + 2; 517 params.m_Height = A + 2;
535 break; 518 break;
536 } 519 }
537 520
538 case 2: { 521 case 2: {
539 uint32_t A = (modeBits >> 5) & 0x3; 522 u32 A = (modeBits >> 5) & 0x3;
540 uint32_t B = (modeBits >> 7) & 0x3; 523 u32 B = (modeBits >> 7) & 0x3;
541 params.m_Width = A + 2; 524 params.m_Width = A + 2;
542 params.m_Height = B + 8; 525 params.m_Height = B + 8;
543 break; 526 break;
544 } 527 }
545 528
546 case 3: { 529 case 3: {
547 uint32_t A = (modeBits >> 5) & 0x3; 530 u32 A = (modeBits >> 5) & 0x3;
548 uint32_t B = (modeBits >> 7) & 0x1; 531 u32 B = (modeBits >> 7) & 0x1;
549 params.m_Width = A + 2; 532 params.m_Width = A + 2;
550 params.m_Height = B + 6; 533 params.m_Height = B + 6;
551 break; 534 break;
552 } 535 }
553 536
554 case 4: { 537 case 4: {
555 uint32_t A = (modeBits >> 5) & 0x3; 538 u32 A = (modeBits >> 5) & 0x3;
556 uint32_t B = (modeBits >> 7) & 0x1; 539 u32 B = (modeBits >> 7) & 0x1;
557 params.m_Width = B + 2; 540 params.m_Width = B + 2;
558 params.m_Height = A + 2; 541 params.m_Height = A + 2;
559 break; 542 break;
560 } 543 }
561 544
562 case 5: { 545 case 5: {
563 uint32_t A = (modeBits >> 5) & 0x3; 546 u32 A = (modeBits >> 5) & 0x3;
564 params.m_Width = 12; 547 params.m_Width = 12;
565 params.m_Height = A + 2; 548 params.m_Height = A + 2;
566 break; 549 break;
567 } 550 }
568 551
569 case 6: { 552 case 6: {
570 uint32_t A = (modeBits >> 5) & 0x3; 553 u32 A = (modeBits >> 5) & 0x3;
571 params.m_Width = A + 2; 554 params.m_Width = A + 2;
572 params.m_Height = 12; 555 params.m_Height = 12;
573 break; 556 break;
@@ -586,15 +569,15 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
586 } 569 }
587 570
588 case 9: { 571 case 9: {
589 uint32_t A = (modeBits >> 5) & 0x3; 572 u32 A = (modeBits >> 5) & 0x3;
590 uint32_t B = (modeBits >> 9) & 0x3; 573 u32 B = (modeBits >> 9) & 0x3;
591 params.m_Width = A + 6; 574 params.m_Width = A + 6;
592 params.m_Height = B + 6; 575 params.m_Height = B + 6;
593 break; 576 break;
594 } 577 }
595 578
596 default: 579 default:
597 assert(!"Don't know this layout..."); 580 assert(false && "Don't know this layout...");
598 params.m_bError = true; 581 params.m_bError = true;
599 break; 582 break;
600 } 583 }
@@ -605,10 +588,10 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
605 bool H = (layout != 9) && (modeBits & 0x200); 588 bool H = (layout != 9) && (modeBits & 0x200);
606 589
607 if (H) { 590 if (H) {
608 const uint32_t maxWeights[6] = {9, 11, 15, 19, 23, 31}; 591 const u32 maxWeights[6] = {9, 11, 15, 19, 23, 31};
609 params.m_MaxWeight = maxWeights[R - 2]; 592 params.m_MaxWeight = maxWeights[R - 2];
610 } else { 593 } else {
611 const uint32_t maxWeights[6] = {1, 2, 3, 4, 5, 7}; 594 const u32 maxWeights[6] = {1, 2, 3, 4, 5, 7};
612 params.m_MaxWeight = maxWeights[R - 2]; 595 params.m_MaxWeight = maxWeights[R - 2];
613 } 596 }
614 597
@@ -617,32 +600,32 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
617 return params; 600 return params;
618} 601}
619 602
620static void FillVoidExtentLDR(InputBitStream& strm, uint32_t* const outBuf, uint32_t blockWidth, 603static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 blockWidth,
621 uint32_t blockHeight) { 604 u32 blockHeight) {
622 // Don't actually care about the void extent, just read the bits... 605 // Don't actually care about the void extent, just read the bits...
623 for (int i = 0; i < 4; ++i) { 606 for (s32 i = 0; i < 4; ++i) {
624 strm.ReadBits(13); 607 strm.ReadBits<13>();
625 } 608 }
626 609
627 // Decode the RGBA components and renormalize them to the range [0, 255] 610 // Decode the RGBA components and renormalize them to the range [0, 255]
628 uint16_t r = static_cast<uint16_t>(strm.ReadBits(16)); 611 u16 r = static_cast<u16>(strm.ReadBits<16>());
629 uint16_t g = static_cast<uint16_t>(strm.ReadBits(16)); 612 u16 g = static_cast<u16>(strm.ReadBits<16>());
630 uint16_t b = static_cast<uint16_t>(strm.ReadBits(16)); 613 u16 b = static_cast<u16>(strm.ReadBits<16>());
631 uint16_t a = static_cast<uint16_t>(strm.ReadBits(16)); 614 u16 a = static_cast<u16>(strm.ReadBits<16>());
632 615
633 uint32_t rgba = (r >> 8) | (g & 0xFF00) | (static_cast<uint32_t>(b) & 0xFF00) << 8 | 616 u32 rgba = (r >> 8) | (g & 0xFF00) | (static_cast<u32>(b) & 0xFF00) << 8 |
634 (static_cast<uint32_t>(a) & 0xFF00) << 16; 617 (static_cast<u32>(a) & 0xFF00) << 16;
635 618
636 for (uint32_t j = 0; j < blockHeight; j++) { 619 for (u32 j = 0; j < blockHeight; j++) {
637 for (uint32_t i = 0; i < blockWidth; i++) { 620 for (u32 i = 0; i < blockWidth; i++) {
638 outBuf[j * blockWidth + i] = rgba; 621 outBuf[j * blockWidth + i] = rgba;
639 } 622 }
640 } 623 }
641} 624}
642 625
643static void FillError(uint32_t* outBuf, uint32_t blockWidth, uint32_t blockHeight) { 626static void FillError(u32* outBuf, u32 blockWidth, u32 blockHeight) {
644 for (uint32_t j = 0; j < blockHeight; j++) { 627 for (u32 j = 0; j < blockHeight; j++) {
645 for (uint32_t i = 0; i < blockWidth; i++) { 628 for (u32 i = 0; i < blockWidth; i++) {
646 outBuf[j * blockWidth + i] = 0xFFFF00FF; 629 outBuf[j * blockWidth + i] = 0xFFFF00FF;
647 } 630 }
648 } 631 }
@@ -651,18 +634,18 @@ static void FillError(uint32_t* outBuf, uint32_t blockWidth, uint32_t blockHeigh
651// Replicates low numBits such that [(toBit - 1):(toBit - 1 - fromBit)] 634// Replicates low numBits such that [(toBit - 1):(toBit - 1 - fromBit)]
652// is the same as [(numBits - 1):0] and repeats all the way down. 635// is the same as [(numBits - 1):0] and repeats all the way down.
653template <typename IntType> 636template <typename IntType>
654static IntType Replicate(const IntType& val, uint32_t numBits, uint32_t toBit) { 637static IntType Replicate(IntType val, u32 numBits, u32 toBit) {
655 if (numBits == 0) 638 if (numBits == 0)
656 return 0; 639 return 0;
657 if (toBit == 0) 640 if (toBit == 0)
658 return 0; 641 return 0;
659 IntType v = val & static_cast<IntType>((1 << numBits) - 1); 642 IntType v = val & static_cast<IntType>((1 << numBits) - 1);
660 IntType res = v; 643 IntType res = v;
661 uint32_t reslen = numBits; 644 u32 reslen = numBits;
662 while (reslen < toBit) { 645 while (reslen < toBit) {
663 uint32_t comp = 0; 646 u32 comp = 0;
664 if (numBits > toBit - reslen) { 647 if (numBits > toBit - reslen) {
665 uint32_t newshift = toBit - reslen; 648 u32 newshift = toBit - reslen;
666 comp = numBits - newshift; 649 comp = numBits - newshift;
667 numBits = newshift; 650 numBits = newshift;
668 } 651 }
@@ -675,14 +658,14 @@ static IntType Replicate(const IntType& val, uint32_t numBits, uint32_t toBit) {
675 658
676class Pixel { 659class Pixel {
677protected: 660protected:
678 using ChannelType = int16_t; 661 using ChannelType = s16;
679 uint8_t m_BitDepth[4] = {8, 8, 8, 8}; 662 u8 m_BitDepth[4] = {8, 8, 8, 8};
680 int16_t color[4] = {}; 663 s16 color[4] = {};
681 664
682public: 665public:
683 Pixel() = default; 666 Pixel() = default;
684 Pixel(uint32_t a, uint32_t r, uint32_t g, uint32_t b, unsigned bitDepth = 8) 667 Pixel(u32 a, u32 r, u32 g, u32 b, u32 bitDepth = 8)
685 : m_BitDepth{uint8_t(bitDepth), uint8_t(bitDepth), uint8_t(bitDepth), uint8_t(bitDepth)}, 668 : m_BitDepth{u8(bitDepth), u8(bitDepth), u8(bitDepth), u8(bitDepth)},
686 color{static_cast<ChannelType>(a), static_cast<ChannelType>(r), 669 color{static_cast<ChannelType>(a), static_cast<ChannelType>(r),
687 static_cast<ChannelType>(g), static_cast<ChannelType>(b)} {} 670 static_cast<ChannelType>(g), static_cast<ChannelType>(b)} {}
688 671
@@ -691,22 +674,22 @@ public:
691 // significant bits when going from larger to smaller bit depth 674 // significant bits when going from larger to smaller bit depth
692 // or by repeating the most significant bits when going from 675 // or by repeating the most significant bits when going from
693 // smaller to larger bit depths. 676 // smaller to larger bit depths.
694 void ChangeBitDepth(const uint8_t (&depth)[4]) { 677 void ChangeBitDepth(const u8 (&depth)[4]) {
695 for (uint32_t i = 0; i < 4; i++) { 678 for (u32 i = 0; i < 4; i++) {
696 Component(i) = ChangeBitDepth(Component(i), m_BitDepth[i], depth[i]); 679 Component(i) = ChangeBitDepth(Component(i), m_BitDepth[i], depth[i]);
697 m_BitDepth[i] = depth[i]; 680 m_BitDepth[i] = depth[i];
698 } 681 }
699 } 682 }
700 683
701 template <typename IntType> 684 template <typename IntType>
702 static float ConvertChannelToFloat(IntType channel, uint8_t bitDepth) { 685 static float ConvertChannelToFloat(IntType channel, u8 bitDepth) {
703 float denominator = static_cast<float>((1 << bitDepth) - 1); 686 float denominator = static_cast<float>((1 << bitDepth) - 1);
704 return static_cast<float>(channel) / denominator; 687 return static_cast<float>(channel) / denominator;
705 } 688 }
706 689
707 // Changes the bit depth of a single component. See the comment 690 // Changes the bit depth of a single component. See the comment
708 // above for how we do this. 691 // above for how we do this.
709 static ChannelType ChangeBitDepth(Pixel::ChannelType val, uint8_t oldDepth, uint8_t newDepth) { 692 static ChannelType ChangeBitDepth(Pixel::ChannelType val, u8 oldDepth, u8 newDepth) {
710 assert(newDepth <= 8); 693 assert(newDepth <= 8);
711 assert(oldDepth <= 8); 694 assert(oldDepth <= 8);
712 695
@@ -722,16 +705,15 @@ public:
722 if (newDepth == 0) { 705 if (newDepth == 0) {
723 return 0xFF; 706 return 0xFF;
724 } else { 707 } else {
725 uint8_t bitsWasted = static_cast<uint8_t>(oldDepth - newDepth); 708 u8 bitsWasted = static_cast<u8>(oldDepth - newDepth);
726 uint16_t v = static_cast<uint16_t>(val); 709 u16 v = static_cast<u16>(val);
727 v = static_cast<uint16_t>((v + (1 << (bitsWasted - 1))) >> bitsWasted); 710 v = static_cast<u16>((v + (1 << (bitsWasted - 1))) >> bitsWasted);
728 v = ::std::min<uint16_t>(::std::max<uint16_t>(0, v), 711 v = ::std::min<u16>(::std::max<u16>(0, v), static_cast<u16>((1 << newDepth) - 1));
729 static_cast<uint16_t>((1 << newDepth) - 1)); 712 return static_cast<u8>(v);
730 return static_cast<uint8_t>(v);
731 } 713 }
732 } 714 }
733 715
734 assert(!"We shouldn't get here."); 716 assert(false && "We shouldn't get here.");
735 return 0; 717 return 0;
736 } 718 }
737 719
@@ -759,15 +741,15 @@ public:
759 ChannelType& B() { 741 ChannelType& B() {
760 return color[3]; 742 return color[3];
761 } 743 }
762 const ChannelType& Component(uint32_t idx) const { 744 const ChannelType& Component(u32 idx) const {
763 return color[idx]; 745 return color[idx];
764 } 746 }
765 ChannelType& Component(uint32_t idx) { 747 ChannelType& Component(u32 idx) {
766 return color[idx]; 748 return color[idx];
767 } 749 }
768 750
769 void GetBitDepth(uint8_t (&outDepth)[4]) const { 751 void GetBitDepth(u8 (&outDepth)[4]) const {
770 for (int i = 0; i < 4; i++) { 752 for (s32 i = 0; i < 4; i++) {
771 outDepth[i] = m_BitDepth[i]; 753 outDepth[i] = m_BitDepth[i];
772 } 754 }
773 } 755 }
@@ -776,12 +758,12 @@ public:
776 // and then pack each channel into an R8G8B8A8 32-bit integer. We assume 758 // and then pack each channel into an R8G8B8A8 32-bit integer. We assume
777 // that the architecture is little-endian, so the alpha channel will end 759 // that the architecture is little-endian, so the alpha channel will end
778 // up in the most-significant byte. 760 // up in the most-significant byte.
779 uint32_t Pack() const { 761 u32 Pack() const {
780 Pixel eightBit(*this); 762 Pixel eightBit(*this);
781 const uint8_t eightBitDepth[4] = {8, 8, 8, 8}; 763 const u8 eightBitDepth[4] = {8, 8, 8, 8};
782 eightBit.ChangeBitDepth(eightBitDepth); 764 eightBit.ChangeBitDepth(eightBitDepth);
783 765
784 uint32_t r = 0; 766 u32 r = 0;
785 r |= eightBit.A(); 767 r |= eightBit.A();
786 r <<= 8; 768 r <<= 8;
787 r |= eightBit.B(); 769 r |= eightBit.B();
@@ -794,7 +776,7 @@ public:
794 776
795 // Clamps the pixel to the range [0,255] 777 // Clamps the pixel to the range [0,255]
796 void ClampByte() { 778 void ClampByte() {
797 for (uint32_t i = 0; i < 4; i++) { 779 for (u32 i = 0; i < 4; i++) {
798 color[i] = (color[i] < 0) ? 0 : ((color[i] > 255) ? 255 : color[i]); 780 color[i] = (color[i] < 0) ? 0 : ((color[i] > 255) ? 255 : color[i]);
799 } 781 }
800 } 782 }
@@ -804,24 +786,24 @@ public:
804 } 786 }
805}; 787};
806 788
807static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* modes, 789static void DecodeColorValues(u32* out, u8* data, const u32* modes, const u32 nPartitions,
808 const uint32_t nPartitions, const uint32_t nBitsForColorData) { 790 const u32 nBitsForColorData) {
809 // First figure out how many color values we have 791 // First figure out how many color values we have
810 uint32_t nValues = 0; 792 u32 nValues = 0;
811 for (uint32_t i = 0; i < nPartitions; i++) { 793 for (u32 i = 0; i < nPartitions; i++) {
812 nValues += ((modes[i] >> 2) + 1) << 1; 794 nValues += ((modes[i] >> 2) + 1) << 1;
813 } 795 }
814 796
815 // Then based on the number of values and the remaining number of bits, 797 // Then based on the number of values and the remaining number of bits,
816 // figure out the max value for each of them... 798 // figure out the max value for each of them...
817 uint32_t range = 256; 799 u32 range = 256;
818 while (--range > 0) { 800 while (--range > 0) {
819 IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(range); 801 IntegerEncodedValue val = EncodingsValues[range];
820 uint32_t bitLength = val.GetBitLength(nValues); 802 u32 bitLength = val.GetBitLength(nValues);
821 if (bitLength <= nBitsForColorData) { 803 if (bitLength <= nBitsForColorData) {
822 // Find the smallest possible range that matches the given encoding 804 // Find the smallest possible range that matches the given encoding
823 while (--range > 0) { 805 while (--range > 0) {
824 IntegerEncodedValue newval = IntegerEncodedValue::CreateEncoding(range); 806 IntegerEncodedValue newval = EncodingsValues[range];
825 if (!newval.MatchesEncoding(val)) { 807 if (!newval.MatchesEncoding(val)) {
826 break; 808 break;
827 } 809 }
@@ -835,12 +817,14 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode
835 817
836 // We now have enough to decode our integer sequence. 818 // We now have enough to decode our integer sequence.
837 std::vector<IntegerEncodedValue> decodedColorValues; 819 std::vector<IntegerEncodedValue> decodedColorValues;
820 decodedColorValues.reserve(32);
821
838 InputBitStream colorStream(data); 822 InputBitStream colorStream(data);
839 IntegerEncodedValue::DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues); 823 DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues);
840 824
841 // Once we have the decoded values, we need to dequantize them to the 0-255 range 825 // Once we have the decoded values, we need to dequantize them to the 0-255 range
842 // This procedure is outlined in ASTC spec C.2.13 826 // This procedure is outlined in ASTC spec C.2.13
843 uint32_t outIdx = 0; 827 u32 outIdx = 0;
844 for (auto itr = decodedColorValues.begin(); itr != decodedColorValues.end(); ++itr) { 828 for (auto itr = decodedColorValues.begin(); itr != decodedColorValues.end(); ++itr) {
845 // Have we already decoded all that we need? 829 // Have we already decoded all that we need?
846 if (outIdx >= nValues) { 830 if (outIdx >= nValues) {
@@ -848,25 +832,25 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode
848 } 832 }
849 833
850 const IntegerEncodedValue& val = *itr; 834 const IntegerEncodedValue& val = *itr;
851 uint32_t bitlen = val.BaseBitLength(); 835 u32 bitlen = val.num_bits;
852 uint32_t bitval = val.GetBitValue(); 836 u32 bitval = val.bit_value;
853 837
854 assert(bitlen >= 1); 838 assert(bitlen >= 1);
855 839
856 uint32_t A = 0, B = 0, C = 0, D = 0; 840 u32 A = 0, B = 0, C = 0, D = 0;
857 // A is just the lsb replicated 9 times. 841 // A is just the lsb replicated 9 times.
858 A = Replicate(bitval & 1, 1, 9); 842 A = Replicate(bitval & 1, 1, 9);
859 843
860 switch (val.GetEncoding()) { 844 switch (val.encoding) {
861 // Replicate bits 845 // Replicate bits
862 case eIntegerEncoding_JustBits: 846 case IntegerEncoding::JustBits:
863 out[outIdx++] = Replicate(bitval, bitlen, 8); 847 out[outIdx++] = Replicate(bitval, bitlen, 8);
864 break; 848 break;
865 849
866 // Use algorithm in C.2.13 850 // Use algorithm in C.2.13
867 case eIntegerEncoding_Trit: { 851 case IntegerEncoding::Trit: {
868 852
869 D = val.GetTritValue(); 853 D = val.trit_value;
870 854
871 switch (bitlen) { 855 switch (bitlen) {
872 case 1: { 856 case 1: {
@@ -876,48 +860,48 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode
876 case 2: { 860 case 2: {
877 C = 93; 861 C = 93;
878 // B = b000b0bb0 862 // B = b000b0bb0
879 uint32_t b = (bitval >> 1) & 1; 863 u32 b = (bitval >> 1) & 1;
880 B = (b << 8) | (b << 4) | (b << 2) | (b << 1); 864 B = (b << 8) | (b << 4) | (b << 2) | (b << 1);
881 } break; 865 } break;
882 866
883 case 3: { 867 case 3: {
884 C = 44; 868 C = 44;
885 // B = cb000cbcb 869 // B = cb000cbcb
886 uint32_t cb = (bitval >> 1) & 3; 870 u32 cb = (bitval >> 1) & 3;
887 B = (cb << 7) | (cb << 2) | cb; 871 B = (cb << 7) | (cb << 2) | cb;
888 } break; 872 } break;
889 873
890 case 4: { 874 case 4: {
891 C = 22; 875 C = 22;
892 // B = dcb000dcb 876 // B = dcb000dcb
893 uint32_t dcb = (bitval >> 1) & 7; 877 u32 dcb = (bitval >> 1) & 7;
894 B = (dcb << 6) | dcb; 878 B = (dcb << 6) | dcb;
895 } break; 879 } break;
896 880
897 case 5: { 881 case 5: {
898 C = 11; 882 C = 11;
899 // B = edcb000ed 883 // B = edcb000ed
900 uint32_t edcb = (bitval >> 1) & 0xF; 884 u32 edcb = (bitval >> 1) & 0xF;
901 B = (edcb << 5) | (edcb >> 2); 885 B = (edcb << 5) | (edcb >> 2);
902 } break; 886 } break;
903 887
904 case 6: { 888 case 6: {
905 C = 5; 889 C = 5;
906 // B = fedcb000f 890 // B = fedcb000f
907 uint32_t fedcb = (bitval >> 1) & 0x1F; 891 u32 fedcb = (bitval >> 1) & 0x1F;
908 B = (fedcb << 4) | (fedcb >> 4); 892 B = (fedcb << 4) | (fedcb >> 4);
909 } break; 893 } break;
910 894
911 default: 895 default:
912 assert(!"Unsupported trit encoding for color values!"); 896 assert(false && "Unsupported trit encoding for color values!");
913 break; 897 break;
914 } // switch(bitlen) 898 } // switch(bitlen)
915 } // case eIntegerEncoding_Trit 899 } // case IntegerEncoding::Trit
916 break; 900 break;
917 901
918 case eIntegerEncoding_Quint: { 902 case IntegerEncoding::Qus32: {
919 903
920 D = val.GetQuintValue(); 904 D = val.qus32_value;
921 905
922 switch (bitlen) { 906 switch (bitlen) {
923 case 1: { 907 case 1: {
@@ -927,41 +911,41 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode
927 case 2: { 911 case 2: {
928 C = 54; 912 C = 54;
929 // B = b0000bb00 913 // B = b0000bb00
930 uint32_t b = (bitval >> 1) & 1; 914 u32 b = (bitval >> 1) & 1;
931 B = (b << 8) | (b << 3) | (b << 2); 915 B = (b << 8) | (b << 3) | (b << 2);
932 } break; 916 } break;
933 917
934 case 3: { 918 case 3: {
935 C = 26; 919 C = 26;
936 // B = cb0000cbc 920 // B = cb0000cbc
937 uint32_t cb = (bitval >> 1) & 3; 921 u32 cb = (bitval >> 1) & 3;
938 B = (cb << 7) | (cb << 1) | (cb >> 1); 922 B = (cb << 7) | (cb << 1) | (cb >> 1);
939 } break; 923 } break;
940 924
941 case 4: { 925 case 4: {
942 C = 13; 926 C = 13;
943 // B = dcb0000dc 927 // B = dcb0000dc
944 uint32_t dcb = (bitval >> 1) & 7; 928 u32 dcb = (bitval >> 1) & 7;
945 B = (dcb << 6) | (dcb >> 1); 929 B = (dcb << 6) | (dcb >> 1);
946 } break; 930 } break;
947 931
948 case 5: { 932 case 5: {
949 C = 6; 933 C = 6;
950 // B = edcb0000e 934 // B = edcb0000e
951 uint32_t edcb = (bitval >> 1) & 0xF; 935 u32 edcb = (bitval >> 1) & 0xF;
952 B = (edcb << 5) | (edcb >> 3); 936 B = (edcb << 5) | (edcb >> 3);
953 } break; 937 } break;
954 938
955 default: 939 default:
956 assert(!"Unsupported quint encoding for color values!"); 940 assert(false && "Unsupported quint encoding for color values!");
957 break; 941 break;
958 } // switch(bitlen) 942 } // switch(bitlen)
959 } // case eIntegerEncoding_Quint 943 } // case IntegerEncoding::Qus32
960 break; 944 break;
961 } // switch(val.GetEncoding()) 945 } // switch(val.encoding)
962 946
963 if (val.GetEncoding() != eIntegerEncoding_JustBits) { 947 if (val.encoding != IntegerEncoding::JustBits) {
964 uint32_t T = D * C + B; 948 u32 T = D * C + B;
965 T ^= A; 949 T ^= A;
966 T = (A & 0x80) | (T >> 2); 950 T = (A & 0x80) | (T >> 2);
967 out[outIdx++] = T; 951 out[outIdx++] = T;
@@ -969,31 +953,31 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode
969 } 953 }
970 954
971 // Make sure that each of our values is in the proper range... 955 // Make sure that each of our values is in the proper range...
972 for (uint32_t i = 0; i < nValues; i++) { 956 for (u32 i = 0; i < nValues; i++) {
973 assert(out[i] <= 255); 957 assert(out[i] <= 255);
974 } 958 }
975} 959}
976 960
977static uint32_t UnquantizeTexelWeight(const IntegerEncodedValue& val) { 961static u32 UnquantizeTexelWeight(const IntegerEncodedValue& val) {
978 uint32_t bitval = val.GetBitValue(); 962 u32 bitval = val.bit_value;
979 uint32_t bitlen = val.BaseBitLength(); 963 u32 bitlen = val.num_bits;
980 964
981 uint32_t A = Replicate(bitval & 1, 1, 7); 965 u32 A = Replicate(bitval & 1, 1, 7);
982 uint32_t B = 0, C = 0, D = 0; 966 u32 B = 0, C = 0, D = 0;
983 967
984 uint32_t result = 0; 968 u32 result = 0;
985 switch (val.GetEncoding()) { 969 switch (val.encoding) {
986 case eIntegerEncoding_JustBits: 970 case IntegerEncoding::JustBits:
987 result = Replicate(bitval, bitlen, 6); 971 result = Replicate(bitval, bitlen, 6);
988 break; 972 break;
989 973
990 case eIntegerEncoding_Trit: { 974 case IntegerEncoding::Trit: {
991 D = val.GetTritValue(); 975 D = val.trit_value;
992 assert(D < 3); 976 assert(D < 3);
993 977
994 switch (bitlen) { 978 switch (bitlen) {
995 case 0: { 979 case 0: {
996 uint32_t results[3] = {0, 32, 63}; 980 u32 results[3] = {0, 32, 63};
997 result = results[D]; 981 result = results[D];
998 } break; 982 } break;
999 983
@@ -1003,29 +987,29 @@ static uint32_t UnquantizeTexelWeight(const IntegerEncodedValue& val) {
1003 987
1004 case 2: { 988 case 2: {
1005 C = 23; 989 C = 23;
1006 uint32_t b = (bitval >> 1) & 1; 990 u32 b = (bitval >> 1) & 1;
1007 B = (b << 6) | (b << 2) | b; 991 B = (b << 6) | (b << 2) | b;
1008 } break; 992 } break;
1009 993
1010 case 3: { 994 case 3: {
1011 C = 11; 995 C = 11;
1012 uint32_t cb = (bitval >> 1) & 3; 996 u32 cb = (bitval >> 1) & 3;
1013 B = (cb << 5) | cb; 997 B = (cb << 5) | cb;
1014 } break; 998 } break;
1015 999
1016 default: 1000 default:
1017 assert(!"Invalid trit encoding for texel weight"); 1001 assert(false && "Invalid trit encoding for texel weight");
1018 break; 1002 break;
1019 } 1003 }
1020 } break; 1004 } break;
1021 1005
1022 case eIntegerEncoding_Quint: { 1006 case IntegerEncoding::Qus32: {
1023 D = val.GetQuintValue(); 1007 D = val.qus32_value;
1024 assert(D < 5); 1008 assert(D < 5);
1025 1009
1026 switch (bitlen) { 1010 switch (bitlen) {
1027 case 0: { 1011 case 0: {
1028 uint32_t results[5] = {0, 16, 32, 47, 63}; 1012 u32 results[5] = {0, 16, 32, 47, 63};
1029 result = results[D]; 1013 result = results[D];
1030 } break; 1014 } break;
1031 1015
@@ -1035,18 +1019,18 @@ static uint32_t UnquantizeTexelWeight(const IntegerEncodedValue& val) {
1035 1019
1036 case 2: { 1020 case 2: {
1037 C = 13; 1021 C = 13;
1038 uint32_t b = (bitval >> 1) & 1; 1022 u32 b = (bitval >> 1) & 1;
1039 B = (b << 6) | (b << 1); 1023 B = (b << 6) | (b << 1);
1040 } break; 1024 } break;
1041 1025
1042 default: 1026 default:
1043 assert(!"Invalid quint encoding for texel weight"); 1027 assert(false && "Invalid quint encoding for texel weight");
1044 break; 1028 break;
1045 } 1029 }
1046 } break; 1030 } break;
1047 } 1031 }
1048 1032
1049 if (val.GetEncoding() != eIntegerEncoding_JustBits && bitlen > 0) { 1033 if (val.encoding != IntegerEncoding::JustBits && bitlen > 0) {
1050 // Decode the value... 1034 // Decode the value...
1051 result = D * C + B; 1035 result = D * C + B;
1052 result ^= A; 1036 result ^= A;
@@ -1063,12 +1047,11 @@ static uint32_t UnquantizeTexelWeight(const IntegerEncodedValue& val) {
1063 return result; 1047 return result;
1064} 1048}
1065 1049
1066static void UnquantizeTexelWeights(uint32_t out[2][144], 1050static void UnquantizeTexelWeights(u32 out[2][144], const std::vector<IntegerEncodedValue>& weights,
1067 const std::vector<IntegerEncodedValue>& weights, 1051 const TexelWeightParams& params, const u32 blockWidth,
1068 const TexelWeightParams& params, const uint32_t blockWidth, 1052 const u32 blockHeight) {
1069 const uint32_t blockHeight) { 1053 u32 weightIdx = 0;
1070 uint32_t weightIdx = 0; 1054 u32 unquantized[2][144];
1071 uint32_t unquantized[2][144];
1072 1055
1073 for (auto itr = weights.begin(); itr != weights.end(); ++itr) { 1056 for (auto itr = weights.begin(); itr != weights.end(); ++itr) {
1074 unquantized[0][weightIdx] = UnquantizeTexelWeight(*itr); 1057 unquantized[0][weightIdx] = UnquantizeTexelWeight(*itr);
@@ -1086,34 +1069,34 @@ static void UnquantizeTexelWeights(uint32_t out[2][144],
1086 } 1069 }
1087 1070
1088 // Do infill if necessary (Section C.2.18) ... 1071 // Do infill if necessary (Section C.2.18) ...
1089 uint32_t Ds = (1024 + (blockWidth / 2)) / (blockWidth - 1); 1072 u32 Ds = (1024 + (blockWidth / 2)) / (blockWidth - 1);
1090 uint32_t Dt = (1024 + (blockHeight / 2)) / (blockHeight - 1); 1073 u32 Dt = (1024 + (blockHeight / 2)) / (blockHeight - 1);
1091 1074
1092 const uint32_t kPlaneScale = params.m_bDualPlane ? 2U : 1U; 1075 const u32 kPlaneScale = params.m_bDualPlane ? 2U : 1U;
1093 for (uint32_t plane = 0; plane < kPlaneScale; plane++) 1076 for (u32 plane = 0; plane < kPlaneScale; plane++)
1094 for (uint32_t t = 0; t < blockHeight; t++) 1077 for (u32 t = 0; t < blockHeight; t++)
1095 for (uint32_t s = 0; s < blockWidth; s++) { 1078 for (u32 s = 0; s < blockWidth; s++) {
1096 uint32_t cs = Ds * s; 1079 u32 cs = Ds * s;
1097 uint32_t ct = Dt * t; 1080 u32 ct = Dt * t;
1098 1081
1099 uint32_t gs = (cs * (params.m_Width - 1) + 32) >> 6; 1082 u32 gs = (cs * (params.m_Width - 1) + 32) >> 6;
1100 uint32_t gt = (ct * (params.m_Height - 1) + 32) >> 6; 1083 u32 gt = (ct * (params.m_Height - 1) + 32) >> 6;
1101 1084
1102 uint32_t js = gs >> 4; 1085 u32 js = gs >> 4;
1103 uint32_t fs = gs & 0xF; 1086 u32 fs = gs & 0xF;
1104 1087
1105 uint32_t jt = gt >> 4; 1088 u32 jt = gt >> 4;
1106 uint32_t ft = gt & 0x0F; 1089 u32 ft = gt & 0x0F;
1107 1090
1108 uint32_t w11 = (fs * ft + 8) >> 4; 1091 u32 w11 = (fs * ft + 8) >> 4;
1109 uint32_t w10 = ft - w11; 1092 u32 w10 = ft - w11;
1110 uint32_t w01 = fs - w11; 1093 u32 w01 = fs - w11;
1111 uint32_t w00 = 16 - fs - ft + w11; 1094 u32 w00 = 16 - fs - ft + w11;
1112 1095
1113 uint32_t v0 = js + jt * params.m_Width; 1096 u32 v0 = js + jt * params.m_Width;
1114 1097
1115#define FIND_TEXEL(tidx, bidx) \ 1098#define FIND_TEXEL(tidx, bidx) \
1116 uint32_t p##bidx = 0; \ 1099 u32 p##bidx = 0; \
1117 do { \ 1100 do { \
1118 if ((tidx) < (params.m_Width * params.m_Height)) { \ 1101 if ((tidx) < (params.m_Width * params.m_Height)) { \
1119 p##bidx = unquantized[plane][(tidx)]; \ 1102 p##bidx = unquantized[plane][(tidx)]; \
@@ -1133,7 +1116,7 @@ static void UnquantizeTexelWeights(uint32_t out[2][144],
1133} 1116}
1134 1117
1135// Transfers a bit as described in C.2.14 1118// Transfers a bit as described in C.2.14
1136static inline void BitTransferSigned(int32_t& a, int32_t& b) { 1119static inline void BitTransferSigned(s32& a, s32& b) {
1137 b >>= 1; 1120 b >>= 1;
1138 b |= a & 0x80; 1121 b |= a & 0x80;
1139 a >>= 1; 1122 a >>= 1;
@@ -1144,14 +1127,14 @@ static inline void BitTransferSigned(int32_t& a, int32_t& b) {
1144 1127
1145// Adds more precision to the blue channel as described 1128// Adds more precision to the blue channel as described
1146// in C.2.14 1129// in C.2.14
1147static inline Pixel BlueContract(int32_t a, int32_t r, int32_t g, int32_t b) { 1130static inline Pixel BlueContract(s32 a, s32 r, s32 g, s32 b) {
1148 return Pixel(static_cast<int16_t>(a), static_cast<int16_t>((r + b) >> 1), 1131 return Pixel(static_cast<s16>(a), static_cast<s16>((r + b) >> 1),
1149 static_cast<int16_t>((g + b) >> 1), static_cast<int16_t>(b)); 1132 static_cast<s16>((g + b) >> 1), static_cast<s16>(b));
1150} 1133}
1151 1134
1152// Partition selection functions as specified in 1135// Partition selection functions as specified in
1153// C.2.21 1136// C.2.21
1154static inline uint32_t hash52(uint32_t p) { 1137static inline u32 hash52(u32 p) {
1155 p ^= p >> 15; 1138 p ^= p >> 15;
1156 p -= p << 17; 1139 p -= p << 17;
1157 p += p << 7; 1140 p += p << 7;
@@ -1165,8 +1148,7 @@ static inline uint32_t hash52(uint32_t p) {
1165 return p; 1148 return p;
1166} 1149}
1167 1150
1168static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z, 1151static u32 SelectPartition(s32 seed, s32 x, s32 y, s32 z, s32 partitionCount, s32 smallBlock) {
1169 int32_t partitionCount, int32_t smallBlock) {
1170 if (1 == partitionCount) 1152 if (1 == partitionCount)
1171 return 0; 1153 return 0;
1172 1154
@@ -1178,34 +1160,34 @@ static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z,
1178 1160
1179 seed += (partitionCount - 1) * 1024; 1161 seed += (partitionCount - 1) * 1024;
1180 1162
1181 uint32_t rnum = hash52(static_cast<uint32_t>(seed)); 1163 u32 rnum = hash52(static_cast<u32>(seed));
1182 uint8_t seed1 = static_cast<uint8_t>(rnum & 0xF); 1164 u8 seed1 = static_cast<u8>(rnum & 0xF);
1183 uint8_t seed2 = static_cast<uint8_t>((rnum >> 4) & 0xF); 1165 u8 seed2 = static_cast<u8>((rnum >> 4) & 0xF);
1184 uint8_t seed3 = static_cast<uint8_t>((rnum >> 8) & 0xF); 1166 u8 seed3 = static_cast<u8>((rnum >> 8) & 0xF);
1185 uint8_t seed4 = static_cast<uint8_t>((rnum >> 12) & 0xF); 1167 u8 seed4 = static_cast<u8>((rnum >> 12) & 0xF);
1186 uint8_t seed5 = static_cast<uint8_t>((rnum >> 16) & 0xF); 1168 u8 seed5 = static_cast<u8>((rnum >> 16) & 0xF);
1187 uint8_t seed6 = static_cast<uint8_t>((rnum >> 20) & 0xF); 1169 u8 seed6 = static_cast<u8>((rnum >> 20) & 0xF);
1188 uint8_t seed7 = static_cast<uint8_t>((rnum >> 24) & 0xF); 1170 u8 seed7 = static_cast<u8>((rnum >> 24) & 0xF);
1189 uint8_t seed8 = static_cast<uint8_t>((rnum >> 28) & 0xF); 1171 u8 seed8 = static_cast<u8>((rnum >> 28) & 0xF);
1190 uint8_t seed9 = static_cast<uint8_t>((rnum >> 18) & 0xF); 1172 u8 seed9 = static_cast<u8>((rnum >> 18) & 0xF);
1191 uint8_t seed10 = static_cast<uint8_t>((rnum >> 22) & 0xF); 1173 u8 seed10 = static_cast<u8>((rnum >> 22) & 0xF);
1192 uint8_t seed11 = static_cast<uint8_t>((rnum >> 26) & 0xF); 1174 u8 seed11 = static_cast<u8>((rnum >> 26) & 0xF);
1193 uint8_t seed12 = static_cast<uint8_t>(((rnum >> 30) | (rnum << 2)) & 0xF); 1175 u8 seed12 = static_cast<u8>(((rnum >> 30) | (rnum << 2)) & 0xF);
1194 1176
1195 seed1 = static_cast<uint8_t>(seed1 * seed1); 1177 seed1 = static_cast<u8>(seed1 * seed1);
1196 seed2 = static_cast<uint8_t>(seed2 * seed2); 1178 seed2 = static_cast<u8>(seed2 * seed2);
1197 seed3 = static_cast<uint8_t>(seed3 * seed3); 1179 seed3 = static_cast<u8>(seed3 * seed3);
1198 seed4 = static_cast<uint8_t>(seed4 * seed4); 1180 seed4 = static_cast<u8>(seed4 * seed4);
1199 seed5 = static_cast<uint8_t>(seed5 * seed5); 1181 seed5 = static_cast<u8>(seed5 * seed5);
1200 seed6 = static_cast<uint8_t>(seed6 * seed6); 1182 seed6 = static_cast<u8>(seed6 * seed6);
1201 seed7 = static_cast<uint8_t>(seed7 * seed7); 1183 seed7 = static_cast<u8>(seed7 * seed7);
1202 seed8 = static_cast<uint8_t>(seed8 * seed8); 1184 seed8 = static_cast<u8>(seed8 * seed8);
1203 seed9 = static_cast<uint8_t>(seed9 * seed9); 1185 seed9 = static_cast<u8>(seed9 * seed9);
1204 seed10 = static_cast<uint8_t>(seed10 * seed10); 1186 seed10 = static_cast<u8>(seed10 * seed10);
1205 seed11 = static_cast<uint8_t>(seed11 * seed11); 1187 seed11 = static_cast<u8>(seed11 * seed11);
1206 seed12 = static_cast<uint8_t>(seed12 * seed12); 1188 seed12 = static_cast<u8>(seed12 * seed12);
1207 1189
1208 int32_t sh1, sh2, sh3; 1190 s32 sh1, sh2, sh3;
1209 if (seed & 1) { 1191 if (seed & 1) {
1210 sh1 = (seed & 2) ? 4 : 5; 1192 sh1 = (seed & 2) ? 4 : 5;
1211 sh2 = (partitionCount == 3) ? 6 : 5; 1193 sh2 = (partitionCount == 3) ? 6 : 5;
@@ -1215,23 +1197,23 @@ static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z,
1215 } 1197 }
1216 sh3 = (seed & 0x10) ? sh1 : sh2; 1198 sh3 = (seed & 0x10) ? sh1 : sh2;
1217 1199
1218 seed1 = static_cast<uint8_t>(seed1 >> sh1); 1200 seed1 = static_cast<u8>(seed1 >> sh1);
1219 seed2 = static_cast<uint8_t>(seed2 >> sh2); 1201 seed2 = static_cast<u8>(seed2 >> sh2);
1220 seed3 = static_cast<uint8_t>(seed3 >> sh1); 1202 seed3 = static_cast<u8>(seed3 >> sh1);
1221 seed4 = static_cast<uint8_t>(seed4 >> sh2); 1203 seed4 = static_cast<u8>(seed4 >> sh2);
1222 seed5 = static_cast<uint8_t>(seed5 >> sh1); 1204 seed5 = static_cast<u8>(seed5 >> sh1);
1223 seed6 = static_cast<uint8_t>(seed6 >> sh2); 1205 seed6 = static_cast<u8>(seed6 >> sh2);
1224 seed7 = static_cast<uint8_t>(seed7 >> sh1); 1206 seed7 = static_cast<u8>(seed7 >> sh1);
1225 seed8 = static_cast<uint8_t>(seed8 >> sh2); 1207 seed8 = static_cast<u8>(seed8 >> sh2);
1226 seed9 = static_cast<uint8_t>(seed9 >> sh3); 1208 seed9 = static_cast<u8>(seed9 >> sh3);
1227 seed10 = static_cast<uint8_t>(seed10 >> sh3); 1209 seed10 = static_cast<u8>(seed10 >> sh3);
1228 seed11 = static_cast<uint8_t>(seed11 >> sh3); 1210 seed11 = static_cast<u8>(seed11 >> sh3);
1229 seed12 = static_cast<uint8_t>(seed12 >> sh3); 1211 seed12 = static_cast<u8>(seed12 >> sh3);
1230 1212
1231 int32_t a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); 1213 s32 a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
1232 int32_t b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); 1214 s32 b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
1233 int32_t c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6); 1215 s32 c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
1234 int32_t d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2); 1216 s32 d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
1235 1217
1236 a &= 0x3F; 1218 a &= 0x3F;
1237 b &= 0x3F; 1219 b &= 0x3F;
@@ -1252,27 +1234,26 @@ static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z,
1252 return 3; 1234 return 3;
1253} 1235}
1254 1236
1255static inline uint32_t Select2DPartition(int32_t seed, int32_t x, int32_t y, int32_t partitionCount, 1237static inline u32 Select2DPartition(s32 seed, s32 x, s32 y, s32 partitionCount, s32 smallBlock) {
1256 int32_t smallBlock) {
1257 return SelectPartition(seed, x, y, 0, partitionCount, smallBlock); 1238 return SelectPartition(seed, x, y, 0, partitionCount, smallBlock);
1258} 1239}
1259 1240
1260// Section C.2.14 1241// Section C.2.14
1261static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValues, 1242static void ComputeEndpos32s(Pixel& ep1, Pixel& ep2, const u32*& colorValues,
1262 uint32_t colorEndpointMode) { 1243 u32 colorEndpos32Mode) {
1263#define READ_UINT_VALUES(N) \ 1244#define READ_UINT_VALUES(N) \
1264 uint32_t v[N]; \ 1245 u32 v[N]; \
1265 for (uint32_t i = 0; i < N; i++) { \ 1246 for (u32 i = 0; i < N; i++) { \
1266 v[i] = *(colorValues++); \ 1247 v[i] = *(colorValues++); \
1267 } 1248 }
1268 1249
1269#define READ_INT_VALUES(N) \ 1250#define READ_INT_VALUES(N) \
1270 int32_t v[N]; \ 1251 s32 v[N]; \
1271 for (uint32_t i = 0; i < N; i++) { \ 1252 for (u32 i = 0; i < N; i++) { \
1272 v[i] = static_cast<int32_t>(*(colorValues++)); \ 1253 v[i] = static_cast<s32>(*(colorValues++)); \
1273 } 1254 }
1274 1255
1275 switch (colorEndpointMode) { 1256 switch (colorEndpos32Mode) {
1276 case 0: { 1257 case 0: {
1277 READ_UINT_VALUES(2) 1258 READ_UINT_VALUES(2)
1278 ep1 = Pixel(0xFF, v[0], v[0], v[0]); 1259 ep1 = Pixel(0xFF, v[0], v[0], v[0]);
@@ -1281,8 +1262,8 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue
1281 1262
1282 case 1: { 1263 case 1: {
1283 READ_UINT_VALUES(2) 1264 READ_UINT_VALUES(2)
1284 uint32_t L0 = (v[0] >> 2) | (v[1] & 0xC0); 1265 u32 L0 = (v[0] >> 2) | (v[1] & 0xC0);
1285 uint32_t L1 = std::max(L0 + (v[1] & 0x3F), 0xFFU); 1266 u32 L1 = std::max(L0 + (v[1] & 0x3F), 0xFFU);
1286 ep1 = Pixel(0xFF, L0, L0, L0); 1267 ep1 = Pixel(0xFF, L0, L0, L0);
1287 ep2 = Pixel(0xFF, L1, L1, L1); 1268 ep2 = Pixel(0xFF, L1, L1, L1);
1288 } break; 1269 } break;
@@ -1371,7 +1352,7 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue
1371 } break; 1352 } break;
1372 1353
1373 default: 1354 default:
1374 assert(!"Unsupported color endpoint mode (is it HDR?)"); 1355 assert(false && "Unsupported color endpoint mode (is it HDR?)");
1375 break; 1356 break;
1376 } 1357 }
1377 1358
@@ -1379,14 +1360,14 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue
1379#undef READ_INT_VALUES 1360#undef READ_INT_VALUES
1380} 1361}
1381 1362
1382static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth, 1363static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 blockHeight,
1383 const uint32_t blockHeight, uint32_t* outBuf) { 1364 u32* outBuf) {
1384 InputBitStream strm(inBuf); 1365 InputBitStream strm(inBuf);
1385 TexelWeightParams weightParams = DecodeBlockInfo(strm); 1366 TexelWeightParams weightParams = DecodeBlockInfo(strm);
1386 1367
1387 // Was there an error? 1368 // Was there an error?
1388 if (weightParams.m_bError) { 1369 if (weightParams.m_bError) {
1389 assert(!"Invalid block mode"); 1370 assert(false && "Invalid block mode");
1390 FillError(outBuf, blockWidth, blockHeight); 1371 FillError(outBuf, blockWidth, blockHeight);
1391 return; 1372 return;
1392 } 1373 }
@@ -1397,63 +1378,63 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth,
1397 } 1378 }
1398 1379
1399 if (weightParams.m_bVoidExtentHDR) { 1380 if (weightParams.m_bVoidExtentHDR) {
1400 assert(!"HDR void extent blocks are unsupported!"); 1381 assert(false && "HDR void extent blocks are unsupported!");
1401 FillError(outBuf, blockWidth, blockHeight); 1382 FillError(outBuf, blockWidth, blockHeight);
1402 return; 1383 return;
1403 } 1384 }
1404 1385
1405 if (weightParams.m_Width > blockWidth) { 1386 if (weightParams.m_Width > blockWidth) {
1406 assert(!"Texel weight grid width should be smaller than block width"); 1387 assert(false && "Texel weight grid width should be smaller than block width");
1407 FillError(outBuf, blockWidth, blockHeight); 1388 FillError(outBuf, blockWidth, blockHeight);
1408 return; 1389 return;
1409 } 1390 }
1410 1391
1411 if (weightParams.m_Height > blockHeight) { 1392 if (weightParams.m_Height > blockHeight) {
1412 assert(!"Texel weight grid height should be smaller than block height"); 1393 assert(false && "Texel weight grid height should be smaller than block height");
1413 FillError(outBuf, blockWidth, blockHeight); 1394 FillError(outBuf, blockWidth, blockHeight);
1414 return; 1395 return;
1415 } 1396 }
1416 1397
1417 // Read num partitions 1398 // Read num partitions
1418 uint32_t nPartitions = strm.ReadBits(2) + 1; 1399 u32 nPartitions = strm.ReadBits<2>() + 1;
1419 assert(nPartitions <= 4); 1400 assert(nPartitions <= 4);
1420 1401
1421 if (nPartitions == 4 && weightParams.m_bDualPlane) { 1402 if (nPartitions == 4 && weightParams.m_bDualPlane) {
1422 assert(!"Dual plane mode is incompatible with four partition blocks"); 1403 assert(false && "Dual plane mode is incompatible with four partition blocks");
1423 FillError(outBuf, blockWidth, blockHeight); 1404 FillError(outBuf, blockWidth, blockHeight);
1424 return; 1405 return;
1425 } 1406 }
1426 1407
1427 // Based on the number of partitions, read the color endpoint mode for 1408 // Based on the number of partitions, read the color endpos32 mode for
1428 // each partition. 1409 // each partition.
1429 1410
1430 // Determine partitions, partition index, and color endpoint modes 1411 // Determine partitions, partition index, and color endpos32 modes
1431 int32_t planeIdx = -1; 1412 s32 planeIdx = -1;
1432 uint32_t partitionIndex; 1413 u32 partitionIndex;
1433 uint32_t colorEndpointMode[4] = {0, 0, 0, 0}; 1414 u32 colorEndpos32Mode[4] = {0, 0, 0, 0};
1434 1415
1435 // Define color data. 1416 // Define color data.
1436 uint8_t colorEndpointData[16]; 1417 u8 colorEndpos32Data[16];
1437 memset(colorEndpointData, 0, sizeof(colorEndpointData)); 1418 memset(colorEndpos32Data, 0, sizeof(colorEndpos32Data));
1438 OutputBitStream colorEndpointStream(colorEndpointData, 16 * 8, 0); 1419 OutputBitStream colorEndpos32Stream(colorEndpos32Data, 16 * 8, 0);
1439 1420
1440 // Read extra config data... 1421 // Read extra config data...
1441 uint32_t baseCEM = 0; 1422 u32 baseCEM = 0;
1442 if (nPartitions == 1) { 1423 if (nPartitions == 1) {
1443 colorEndpointMode[0] = strm.ReadBits(4); 1424 colorEndpos32Mode[0] = strm.ReadBits<4>();
1444 partitionIndex = 0; 1425 partitionIndex = 0;
1445 } else { 1426 } else {
1446 partitionIndex = strm.ReadBits(10); 1427 partitionIndex = strm.ReadBits<10>();
1447 baseCEM = strm.ReadBits(6); 1428 baseCEM = strm.ReadBits<6>();
1448 } 1429 }
1449 uint32_t baseMode = (baseCEM & 3); 1430 u32 baseMode = (baseCEM & 3);
1450 1431
1451 // Remaining bits are color endpoint data... 1432 // Remaining bits are color endpos32 data...
1452 uint32_t nWeightBits = weightParams.GetPackedBitSize(); 1433 u32 nWeightBits = weightParams.GetPackedBitSize();
1453 int32_t remainingBits = 128 - nWeightBits - strm.GetBitsRead(); 1434 s32 remainingBits = 128 - nWeightBits - static_cast<s32>(strm.GetBitsRead());
1454 1435
1455 // Consider extra bits prior to texel data... 1436 // Consider extra bits prior to texel data...
1456 uint32_t extraCEMbits = 0; 1437 u32 extraCEMbits = 0;
1457 if (baseMode) { 1438 if (baseMode) {
1458 switch (nPartitions) { 1439 switch (nPartitions) {
1459 case 2: 1440 case 2:
@@ -1473,18 +1454,18 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth,
1473 remainingBits -= extraCEMbits; 1454 remainingBits -= extraCEMbits;
1474 1455
1475 // Do we have a dual plane situation? 1456 // Do we have a dual plane situation?
1476 uint32_t planeSelectorBits = 0; 1457 u32 planeSelectorBits = 0;
1477 if (weightParams.m_bDualPlane) { 1458 if (weightParams.m_bDualPlane) {
1478 planeSelectorBits = 2; 1459 planeSelectorBits = 2;
1479 } 1460 }
1480 remainingBits -= planeSelectorBits; 1461 remainingBits -= planeSelectorBits;
1481 1462
1482 // Read color data... 1463 // Read color data...
1483 uint32_t colorDataBits = remainingBits; 1464 u32 colorDataBits = remainingBits;
1484 while (remainingBits > 0) { 1465 while (remainingBits > 0) {
1485 uint32_t nb = std::min(remainingBits, 8); 1466 u32 nb = std::min(remainingBits, 8);
1486 uint32_t b = strm.ReadBits(nb); 1467 u32 b = strm.ReadBits(nb);
1487 colorEndpointStream.WriteBits(b, nb); 1468 colorEndpos32Stream.WriteBits(b, nb);
1488 remainingBits -= 8; 1469 remainingBits -= 8;
1489 } 1470 }
1490 1471
@@ -1493,64 +1474,64 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth,
1493 1474
1494 // Read the rest of the CEM 1475 // Read the rest of the CEM
1495 if (baseMode) { 1476 if (baseMode) {
1496 uint32_t extraCEM = strm.ReadBits(extraCEMbits); 1477 u32 extraCEM = strm.ReadBits(extraCEMbits);
1497 uint32_t CEM = (extraCEM << 6) | baseCEM; 1478 u32 CEM = (extraCEM << 6) | baseCEM;
1498 CEM >>= 2; 1479 CEM >>= 2;
1499 1480
1500 bool C[4] = {0}; 1481 bool C[4] = {0};
1501 for (uint32_t i = 0; i < nPartitions; i++) { 1482 for (u32 i = 0; i < nPartitions; i++) {
1502 C[i] = CEM & 1; 1483 C[i] = CEM & 1;
1503 CEM >>= 1; 1484 CEM >>= 1;
1504 } 1485 }
1505 1486
1506 uint8_t M[4] = {0}; 1487 u8 M[4] = {0};
1507 for (uint32_t i = 0; i < nPartitions; i++) { 1488 for (u32 i = 0; i < nPartitions; i++) {
1508 M[i] = CEM & 3; 1489 M[i] = CEM & 3;
1509 CEM >>= 2; 1490 CEM >>= 2;
1510 assert(M[i] <= 3); 1491 assert(M[i] <= 3);
1511 } 1492 }
1512 1493
1513 for (uint32_t i = 0; i < nPartitions; i++) { 1494 for (u32 i = 0; i < nPartitions; i++) {
1514 colorEndpointMode[i] = baseMode; 1495 colorEndpos32Mode[i] = baseMode;
1515 if (!(C[i])) 1496 if (!(C[i]))
1516 colorEndpointMode[i] -= 1; 1497 colorEndpos32Mode[i] -= 1;
1517 colorEndpointMode[i] <<= 2; 1498 colorEndpos32Mode[i] <<= 2;
1518 colorEndpointMode[i] |= M[i]; 1499 colorEndpos32Mode[i] |= M[i];
1519 } 1500 }
1520 } else if (nPartitions > 1) { 1501 } else if (nPartitions > 1) {
1521 uint32_t CEM = baseCEM >> 2; 1502 u32 CEM = baseCEM >> 2;
1522 for (uint32_t i = 0; i < nPartitions; i++) { 1503 for (u32 i = 0; i < nPartitions; i++) {
1523 colorEndpointMode[i] = CEM; 1504 colorEndpos32Mode[i] = CEM;
1524 } 1505 }
1525 } 1506 }
1526 1507
1527 // Make sure everything up till here is sane. 1508 // Make sure everything up till here is sane.
1528 for (uint32_t i = 0; i < nPartitions; i++) { 1509 for (u32 i = 0; i < nPartitions; i++) {
1529 assert(colorEndpointMode[i] < 16); 1510 assert(colorEndpos32Mode[i] < 16);
1530 } 1511 }
1531 assert(strm.GetBitsRead() + weightParams.GetPackedBitSize() == 128); 1512 assert(strm.GetBitsRead() + weightParams.GetPackedBitSize() == 128);
1532 1513
1533 // Decode both color data and texel weight data 1514 // Decode both color data and texel weight data
1534 uint32_t colorValues[32]; // Four values, two endpoints, four maximum paritions 1515 u32 colorValues[32]; // Four values, two endpos32s, four maximum paritions
1535 DecodeColorValues(colorValues, colorEndpointData, colorEndpointMode, nPartitions, 1516 DecodeColorValues(colorValues, colorEndpos32Data, colorEndpos32Mode, nPartitions,
1536 colorDataBits); 1517 colorDataBits);
1537 1518
1538 Pixel endpoints[4][2]; 1519 Pixel endpos32s[4][2];
1539 const uint32_t* colorValuesPtr = colorValues; 1520 const u32* colorValuesPtr = colorValues;
1540 for (uint32_t i = 0; i < nPartitions; i++) { 1521 for (u32 i = 0; i < nPartitions; i++) {
1541 ComputeEndpoints(endpoints[i][0], endpoints[i][1], colorValuesPtr, colorEndpointMode[i]); 1522 ComputeEndpos32s(endpos32s[i][0], endpos32s[i][1], colorValuesPtr, colorEndpos32Mode[i]);
1542 } 1523 }
1543 1524
1544 // Read the texel weight data.. 1525 // Read the texel weight data..
1545 uint8_t texelWeightData[16]; 1526 u8 texelWeightData[16];
1546 memcpy(texelWeightData, inBuf, sizeof(texelWeightData)); 1527 memcpy(texelWeightData, inBuf, sizeof(texelWeightData));
1547 1528
1548 // Reverse everything 1529 // Reverse everything
1549 for (uint32_t i = 0; i < 8; i++) { 1530 for (u32 i = 0; i < 8; i++) {
1550// Taken from http://graphics.stanford.edu/~seander/bithacks.html#ReverseByteWith64Bits 1531// Taken from http://graphics.stanford.edu/~seander/bithacks.html#ReverseByteWith64Bits
1551#define REVERSE_BYTE(b) (((b)*0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32 1532#define REVERSE_BYTE(b) (((b)*0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32
1552 unsigned char a = static_cast<unsigned char>(REVERSE_BYTE(texelWeightData[i])); 1533 u8 a = static_cast<u8>(REVERSE_BYTE(texelWeightData[i]));
1553 unsigned char b = static_cast<unsigned char>(REVERSE_BYTE(texelWeightData[15 - i])); 1534 u8 b = static_cast<u8>(REVERSE_BYTE(texelWeightData[15 - i]));
1554#undef REVERSE_BYTE 1535#undef REVERSE_BYTE
1555 1536
1556 texelWeightData[i] = b; 1537 texelWeightData[i] = b;
@@ -1558,50 +1539,51 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth,
1558 } 1539 }
1559 1540
1560 // Make sure that higher non-texel bits are set to zero 1541 // Make sure that higher non-texel bits are set to zero
1561 const uint32_t clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1; 1542 const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1;
1562 texelWeightData[clearByteStart - 1] = 1543 texelWeightData[clearByteStart - 1] =
1563 texelWeightData[clearByteStart - 1] & 1544 texelWeightData[clearByteStart - 1] &
1564 static_cast<uint8_t>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); 1545 static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1);
1565 memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); 1546 memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart);
1566 1547
1567 std::vector<IntegerEncodedValue> texelWeightValues; 1548 std::vector<IntegerEncodedValue> texelWeightValues;
1549 texelWeightValues.reserve(64);
1550
1568 InputBitStream weightStream(texelWeightData); 1551 InputBitStream weightStream(texelWeightData);
1569 1552
1570 IntegerEncodedValue::DecodeIntegerSequence(texelWeightValues, weightStream, 1553 DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight,
1571 weightParams.m_MaxWeight, 1554 weightParams.GetNumWeightValues());
1572 weightParams.GetNumWeightValues());
1573 1555
1574 // Blocks can be at most 12x12, so we can have as many as 144 weights 1556 // Blocks can be at most 12x12, so we can have as many as 144 weights
1575 uint32_t weights[2][144]; 1557 u32 weights[2][144];
1576 UnquantizeTexelWeights(weights, texelWeightValues, weightParams, blockWidth, blockHeight); 1558 UnquantizeTexelWeights(weights, texelWeightValues, weightParams, blockWidth, blockHeight);
1577 1559
1578 // Now that we have endpoints and weights, we can interpolate and generate 1560 // Now that we have endpos32s and weights, we can s32erpolate and generate
1579 // the proper decoding... 1561 // the proper decoding...
1580 for (uint32_t j = 0; j < blockHeight; j++) 1562 for (u32 j = 0; j < blockHeight; j++)
1581 for (uint32_t i = 0; i < blockWidth; i++) { 1563 for (u32 i = 0; i < blockWidth; i++) {
1582 uint32_t partition = Select2DPartition(partitionIndex, i, j, nPartitions, 1564 u32 partition = Select2DPartition(partitionIndex, i, j, nPartitions,
1583 (blockHeight * blockWidth) < 32); 1565 (blockHeight * blockWidth) < 32);
1584 assert(partition < nPartitions); 1566 assert(partition < nPartitions);
1585 1567
1586 Pixel p; 1568 Pixel p;
1587 for (uint32_t c = 0; c < 4; c++) { 1569 for (u32 c = 0; c < 4; c++) {
1588 uint32_t C0 = endpoints[partition][0].Component(c); 1570 u32 C0 = endpos32s[partition][0].Component(c);
1589 C0 = Replicate(C0, 8, 16); 1571 C0 = Replicate(C0, 8, 16);
1590 uint32_t C1 = endpoints[partition][1].Component(c); 1572 u32 C1 = endpos32s[partition][1].Component(c);
1591 C1 = Replicate(C1, 8, 16); 1573 C1 = Replicate(C1, 8, 16);
1592 1574
1593 uint32_t plane = 0; 1575 u32 plane = 0;
1594 if (weightParams.m_bDualPlane && (((planeIdx + 1) & 3) == c)) { 1576 if (weightParams.m_bDualPlane && (((planeIdx + 1) & 3) == c)) {
1595 plane = 1; 1577 plane = 1;
1596 } 1578 }
1597 1579
1598 uint32_t weight = weights[plane][j * blockWidth + i]; 1580 u32 weight = weights[plane][j * blockWidth + i];
1599 uint32_t C = (C0 * (64 - weight) + C1 * weight + 32) / 64; 1581 u32 C = (C0 * (64 - weight) + C1 * weight + 32) / 64;
1600 if (C == 65535) { 1582 if (C == 65535) {
1601 p.Component(c) = 255; 1583 p.Component(c) = 255;
1602 } else { 1584 } else {
1603 double Cf = static_cast<double>(C); 1585 double Cf = static_cast<double>(C);
1604 p.Component(c) = static_cast<uint16_t>(255.0 * (Cf / 65536.0) + 0.5); 1586 p.Component(c) = static_cast<u16>(255.0 * (Cf / 65536.0) + 0.5);
1605 } 1587 }
1606 } 1588 }
1607 1589
@@ -1613,26 +1595,26 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth,
1613 1595
1614namespace Tegra::Texture::ASTC { 1596namespace Tegra::Texture::ASTC {
1615 1597
1616std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height, 1598std::vector<u8> Decompress(const u8* data, u32 width, u32 height, u32 depth, u32 block_width,
1617 uint32_t depth, uint32_t block_width, uint32_t block_height) { 1599 u32 block_height) {
1618 uint32_t blockIdx = 0; 1600 u32 blockIdx = 0;
1619 std::size_t depth_offset = 0; 1601 std::size_t depth_offset = 0;
1620 std::vector<uint8_t> outData(height * width * depth * 4); 1602 std::vector<u8> outData(height * width * depth * 4);
1621 for (uint32_t k = 0; k < depth; k++) { 1603 for (u32 k = 0; k < depth; k++) {
1622 for (uint32_t j = 0; j < height; j += block_height) { 1604 for (u32 j = 0; j < height; j += block_height) {
1623 for (uint32_t i = 0; i < width; i += block_width) { 1605 for (u32 i = 0; i < width; i += block_width) {
1624 1606
1625 const uint8_t* blockPtr = data + blockIdx * 16; 1607 const u8* blockPtr = data + blockIdx * 16;
1626 1608
1627 // Blocks can be at most 12x12 1609 // Blocks can be at most 12x12
1628 uint32_t uncompData[144]; 1610 u32 uncompData[144];
1629 ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData); 1611 ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData);
1630 1612
1631 uint32_t decompWidth = std::min(block_width, width - i); 1613 u32 decompWidth = std::min(block_width, width - i);
1632 uint32_t decompHeight = std::min(block_height, height - j); 1614 u32 decompHeight = std::min(block_height, height - j);
1633 1615
1634 uint8_t* outRow = depth_offset + outData.data() + (j * width + i) * 4; 1616 u8* outRow = depth_offset + outData.data() + (j * width + i) * 4;
1635 for (uint32_t jj = 0; jj < decompHeight; jj++) { 1617 for (u32 jj = 0; jj < decompHeight; jj++) {
1636 memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4); 1618 memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4);
1637 } 1619 }
1638 1620
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index 8e82c6748..7edc4abe1 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -8,6 +8,7 @@
8#include "common/assert.h" 8#include "common/assert.h"
9#include "common/bit_field.h" 9#include "common/bit_field.h"
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "core/settings.h"
11 12
12namespace Tegra::Texture { 13namespace Tegra::Texture {
13 14
@@ -294,6 +295,14 @@ enum class TextureMipmapFilter : u32 {
294 Linear = 3, 295 Linear = 3,
295}; 296};
296 297
298enum class Anisotropy {
299 Default,
300 Filter2x,
301 Filter4x,
302 Filter8x,
303 Filter16x,
304};
305
297struct TSCEntry { 306struct TSCEntry {
298 union { 307 union {
299 struct { 308 struct {
@@ -328,7 +337,22 @@ struct TSCEntry {
328 }; 337 };
329 338
330 float GetMaxAnisotropy() const { 339 float GetMaxAnisotropy() const {
331 return static_cast<float>(1U << max_anisotropy); 340 const u32 min_value = [] {
341 switch (static_cast<Anisotropy>(Settings::values.max_anisotropy)) {
342 default:
343 case Anisotropy::Default:
344 return 1U;
345 case Anisotropy::Filter2x:
346 return 2U;
347 case Anisotropy::Filter4x:
348 return 4U;
349 case Anisotropy::Filter8x:
350 return 8U;
351 case Anisotropy::Filter16x:
352 return 16U;
353 }
354 }();
355 return static_cast<float>(std::max(1U << max_anisotropy, min_value));
332 } 356 }
333 357
334 float GetMinLod() const { 358 float GetMinLod() const {
diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt
index b841e63fa..d34b47b3f 100644
--- a/src/yuzu/CMakeLists.txt
+++ b/src/yuzu/CMakeLists.txt
@@ -42,6 +42,9 @@ add_executable(yuzu
42 configuration/configure_graphics.cpp 42 configuration/configure_graphics.cpp
43 configuration/configure_graphics.h 43 configuration/configure_graphics.h
44 configuration/configure_graphics.ui 44 configuration/configure_graphics.ui
45 configuration/configure_graphics_advanced.cpp
46 configuration/configure_graphics_advanced.h
47 configuration/configure_graphics_advanced.ui
45 configuration/configure_hotkeys.cpp 48 configuration/configure_hotkeys.cpp
46 configuration/configure_hotkeys.h 49 configuration/configure_hotkeys.h
47 configuration/configure_hotkeys.ui 50 configuration/configure_hotkeys.ui
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index 55a37fffa..c3dbb1a88 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -9,6 +9,9 @@
9#include <QKeyEvent> 9#include <QKeyEvent>
10#include <QMessageBox> 10#include <QMessageBox>
11#include <QOffscreenSurface> 11#include <QOffscreenSurface>
12#include <QOpenGLContext>
13#include <QOpenGLFunctions>
14#include <QOpenGLFunctions_4_3_Core>
12#include <QOpenGLWindow> 15#include <QOpenGLWindow>
13#include <QPainter> 16#include <QPainter>
14#include <QScreen> 17#include <QScreen>
@@ -23,9 +26,10 @@
23#include "common/assert.h" 26#include "common/assert.h"
24#include "common/microprofile.h" 27#include "common/microprofile.h"
25#include "common/scm_rev.h" 28#include "common/scm_rev.h"
29#include "common/scope_exit.h"
26#include "core/core.h" 30#include "core/core.h"
27#include "core/frontend/framebuffer_layout.h" 31#include "core/frontend/framebuffer_layout.h"
28#include "core/frontend/scope_acquire_window_context.h" 32#include "core/frontend/scope_acquire_context.h"
29#include "core/settings.h" 33#include "core/settings.h"
30#include "input_common/keyboard.h" 34#include "input_common/keyboard.h"
31#include "input_common/main.h" 35#include "input_common/main.h"
@@ -35,15 +39,27 @@
35#include "yuzu/bootmanager.h" 39#include "yuzu/bootmanager.h"
36#include "yuzu/main.h" 40#include "yuzu/main.h"
37 41
38EmuThread::EmuThread(GRenderWindow* render_window) : render_window(render_window) {} 42EmuThread::EmuThread(GRenderWindow& window)
43 : shared_context{window.CreateSharedContext()},
44 context{(Settings::values.use_asynchronous_gpu_emulation && shared_context) ? *shared_context
45 : window} {}
39 46
40EmuThread::~EmuThread() = default; 47EmuThread::~EmuThread() = default;
41 48
42void EmuThread::run() { 49static GMainWindow* GetMainWindow() {
43 render_window->MakeCurrent(); 50 for (QWidget* w : qApp->topLevelWidgets()) {
51 if (GMainWindow* main = qobject_cast<GMainWindow*>(w)) {
52 return main;
53 }
54 }
55 return nullptr;
56}
44 57
58void EmuThread::run() {
45 MicroProfileOnThreadCreate("EmuThread"); 59 MicroProfileOnThreadCreate("EmuThread");
46 60
61 Core::Frontend::ScopeAcquireContext acquire_context{context};
62
47 emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0); 63 emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0);
48 64
49 Core::System::GetInstance().Renderer().Rasterizer().LoadDiskResources( 65 Core::System::GetInstance().Renderer().Rasterizer().LoadDiskResources(
@@ -53,11 +69,6 @@ void EmuThread::run() {
53 69
54 emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0); 70 emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0);
55 71
56 if (Settings::values.use_asynchronous_gpu_emulation) {
57 // Release OpenGL context for the GPU thread
58 render_window->DoneCurrent();
59 }
60
61 // Holds whether the cpu was running during the last iteration, 72 // Holds whether the cpu was running during the last iteration,
62 // so that the DebugModeLeft signal can be emitted before the 73 // so that the DebugModeLeft signal can be emitted before the
63 // next execution step 74 // next execution step
@@ -98,190 +109,202 @@ void EmuThread::run() {
98#if MICROPROFILE_ENABLED 109#if MICROPROFILE_ENABLED
99 MicroProfileOnThreadExit(); 110 MicroProfileOnThreadExit();
100#endif 111#endif
101
102 render_window->moveContext();
103} 112}
104 113
105class GGLContext : public Core::Frontend::GraphicsContext { 114class GGLContext : public Core::Frontend::GraphicsContext {
106public: 115public:
107 explicit GGLContext(QOpenGLContext* shared_context) : shared_context{shared_context} { 116 explicit GGLContext(QOpenGLContext* shared_context)
108 context.setFormat(shared_context->format()); 117 : context(new QOpenGLContext(shared_context->parent())),
109 context.setShareContext(shared_context); 118 surface(new QOffscreenSurface(nullptr)) {
110 context.create(); 119
120 // disable vsync for any shared contexts
121 auto format = shared_context->format();
122 format.setSwapInterval(0);
123
124 context->setShareContext(shared_context);
125 context->setFormat(format);
126 context->create();
127 surface->setParent(shared_context->parent());
128 surface->setFormat(format);
129 surface->create();
111 } 130 }
112 131
113 void MakeCurrent() override { 132 void MakeCurrent() override {
114 context.makeCurrent(shared_context->surface()); 133 context->makeCurrent(surface);
115 } 134 }
116 135
117 void DoneCurrent() override { 136 void DoneCurrent() override {
118 context.doneCurrent(); 137 context->doneCurrent();
119 } 138 }
120 139
121 void SwapBuffers() override {}
122
123private: 140private:
124 QOpenGLContext* shared_context; 141 QOpenGLContext* context;
125 QOpenGLContext context; 142 QOffscreenSurface* surface;
126}; 143};
127 144
128class GWidgetInternal : public QWindow { 145class ChildRenderWindow : public QWindow {
129public: 146public:
130 GWidgetInternal(GRenderWindow* parent) : parent(parent) {} 147 ChildRenderWindow(QWindow* parent, QWidget* event_handler)
131 virtual ~GWidgetInternal() = default; 148 : QWindow{parent}, event_handler{event_handler} {}
132 149
133 void resizeEvent(QResizeEvent* ev) override { 150 virtual ~ChildRenderWindow() = default;
134 parent->OnClientAreaResized(ev->size().width(), ev->size().height());
135 parent->OnFramebufferSizeChanged();
136 }
137 151
138 void keyPressEvent(QKeyEvent* event) override { 152 virtual void Present() = 0;
139 InputCommon::GetKeyboard()->PressKey(event->key());
140 }
141 153
142 void keyReleaseEvent(QKeyEvent* event) override { 154protected:
143 InputCommon::GetKeyboard()->ReleaseKey(event->key()); 155 bool event(QEvent* event) override {
156 switch (event->type()) {
157 case QEvent::UpdateRequest:
158 Present();
159 return true;
160 case QEvent::MouseButtonPress:
161 case QEvent::MouseButtonRelease:
162 case QEvent::MouseButtonDblClick:
163 case QEvent::MouseMove:
164 case QEvent::KeyPress:
165 case QEvent::KeyRelease:
166 case QEvent::FocusIn:
167 case QEvent::FocusOut:
168 case QEvent::FocusAboutToChange:
169 case QEvent::Enter:
170 case QEvent::Leave:
171 case QEvent::Wheel:
172 case QEvent::TabletMove:
173 case QEvent::TabletPress:
174 case QEvent::TabletRelease:
175 case QEvent::TabletEnterProximity:
176 case QEvent::TabletLeaveProximity:
177 case QEvent::TouchBegin:
178 case QEvent::TouchUpdate:
179 case QEvent::TouchEnd:
180 case QEvent::InputMethodQuery:
181 case QEvent::TouchCancel:
182 return QCoreApplication::sendEvent(event_handler, event);
183 case QEvent::Drop:
184 GetMainWindow()->DropAction(static_cast<QDropEvent*>(event));
185 return true;
186 case QEvent::DragResponse:
187 case QEvent::DragEnter:
188 case QEvent::DragLeave:
189 case QEvent::DragMove:
190 GetMainWindow()->AcceptDropEvent(static_cast<QDropEvent*>(event));
191 return true;
192 default:
193 return QWindow::event(event);
194 }
144 } 195 }
145 196
146 void mousePressEvent(QMouseEvent* event) override { 197 void exposeEvent(QExposeEvent* event) override {
147 if (event->source() == Qt::MouseEventSynthesizedBySystem) 198 QWindow::requestUpdate();
148 return; // touch input is handled in TouchBeginEvent 199 QWindow::exposeEvent(event);
149
150 const auto pos{event->pos()};
151 if (event->button() == Qt::LeftButton) {
152 const auto [x, y] = parent->ScaleTouch(pos);
153 parent->TouchPressed(x, y);
154 } else if (event->button() == Qt::RightButton) {
155 InputCommon::GetMotionEmu()->BeginTilt(pos.x(), pos.y());
156 }
157 } 200 }
158 201
159 void mouseMoveEvent(QMouseEvent* event) override { 202private:
160 if (event->source() == Qt::MouseEventSynthesizedBySystem) 203 QWidget* event_handler{};
161 return; // touch input is handled in TouchUpdateEvent 204};
162 205
163 const auto pos{event->pos()}; 206class OpenGLWindow final : public ChildRenderWindow {
164 const auto [x, y] = parent->ScaleTouch(pos); 207public:
165 parent->TouchMoved(x, y); 208 OpenGLWindow(QWindow* parent, QWidget* event_handler, QOpenGLContext* shared_context)
166 InputCommon::GetMotionEmu()->Tilt(pos.x(), pos.y()); 209 : ChildRenderWindow{parent, event_handler},
167 } 210 context(new QOpenGLContext(shared_context->parent())) {
168 211
169 void mouseReleaseEvent(QMouseEvent* event) override { 212 // disable vsync for any shared contexts
170 if (event->source() == Qt::MouseEventSynthesizedBySystem) 213 auto format = shared_context->format();
171 return; // touch input is handled in TouchEndEvent 214 format.setSwapInterval(Settings::values.use_vsync ? 1 : 0);
215 this->setFormat(format);
172 216
173 if (event->button() == Qt::LeftButton) 217 context->setShareContext(shared_context);
174 parent->TouchReleased(); 218 context->setScreen(this->screen());
175 else if (event->button() == Qt::RightButton) 219 context->setFormat(format);
176 InputCommon::GetMotionEmu()->EndTilt(); 220 context->create();
177 }
178 221
179 void DisablePainting() { 222 setSurfaceType(QWindow::OpenGLSurface);
180 do_painting = false;
181 }
182 223
183 void EnablePainting() { 224 // TODO: One of these flags might be interesting: WA_OpaquePaintEvent, WA_NoBackground,
184 do_painting = true; 225 // WA_DontShowOnScreen, WA_DeleteOnClose
185 } 226 }
186 227
187 std::pair<unsigned, unsigned> GetSize() const { 228 ~OpenGLWindow() override {
188 return std::make_pair(width(), height()); 229 context->doneCurrent();
189 } 230 }
190 231
191protected: 232 void Present() override {
192 bool IsPaintingEnabled() const { 233 if (!isExposed()) {
193 return do_painting; 234 return;
235 }
236
237 context->makeCurrent(this);
238 Core::System::GetInstance().Renderer().TryPresent(100);
239 context->swapBuffers(this);
240 auto f = context->versionFunctions<QOpenGLFunctions_4_3_Core>();
241 f->glFinish();
242 QWindow::requestUpdate();
194 } 243 }
195 244
196private: 245private:
197 GRenderWindow* parent; 246 QOpenGLContext* context{};
198 bool do_painting = false;
199};
200
201// This class overrides paintEvent and resizeEvent to prevent the GUI thread from stealing GL
202// context.
203// The corresponding functionality is handled in EmuThread instead
204class GGLWidgetInternal final : public GWidgetInternal, public QOpenGLWindow {
205public:
206 GGLWidgetInternal(GRenderWindow* parent, QOpenGLContext* shared_context)
207 : GWidgetInternal(parent), QOpenGLWindow(shared_context) {}
208 ~GGLWidgetInternal() override = default;
209
210 void paintEvent(QPaintEvent* ev) override {
211 if (IsPaintingEnabled()) {
212 QPainter painter(this);
213 }
214 }
215}; 247};
216 248
217#ifdef HAS_VULKAN 249#ifdef HAS_VULKAN
218class GVKWidgetInternal final : public GWidgetInternal { 250class VulkanWindow final : public ChildRenderWindow {
219public: 251public:
220 GVKWidgetInternal(GRenderWindow* parent, QVulkanInstance* instance) : GWidgetInternal(parent) { 252 VulkanWindow(QWindow* parent, QWidget* event_handler, QVulkanInstance* instance)
253 : ChildRenderWindow{parent, event_handler} {
221 setSurfaceType(QSurface::SurfaceType::VulkanSurface); 254 setSurfaceType(QSurface::SurfaceType::VulkanSurface);
222 setVulkanInstance(instance); 255 setVulkanInstance(instance);
223 } 256 }
224 ~GVKWidgetInternal() override = default; 257
258 ~VulkanWindow() override = default;
259
260 void Present() override {
261 // TODO(bunnei): ImplementMe
262 }
263
264private:
265 QWidget* event_handler{};
225}; 266};
226#endif 267#endif
227 268
228GRenderWindow::GRenderWindow(GMainWindow* parent, EmuThread* emu_thread) 269GRenderWindow::GRenderWindow(QWidget* parent_, EmuThread* emu_thread)
229 : QWidget(parent), emu_thread(emu_thread) { 270 : QWidget(parent_), emu_thread(emu_thread) {
230 setWindowTitle(QStringLiteral("yuzu %1 | %2-%3") 271 setWindowTitle(QStringLiteral("yuzu %1 | %2-%3")
231 .arg(QString::fromUtf8(Common::g_build_name), 272 .arg(QString::fromUtf8(Common::g_build_name),
232 QString::fromUtf8(Common::g_scm_branch), 273 QString::fromUtf8(Common::g_scm_branch),
233 QString::fromUtf8(Common::g_scm_desc))); 274 QString::fromUtf8(Common::g_scm_desc)));
234 setAttribute(Qt::WA_AcceptTouchEvents); 275 setAttribute(Qt::WA_AcceptTouchEvents);
235 276 auto layout = new QHBoxLayout(this);
277 layout->setMargin(0);
278 setLayout(layout);
236 InputCommon::Init(); 279 InputCommon::Init();
280
281 GMainWindow* parent = GetMainWindow();
237 connect(this, &GRenderWindow::FirstFrameDisplayed, parent, &GMainWindow::OnLoadComplete); 282 connect(this, &GRenderWindow::FirstFrameDisplayed, parent, &GMainWindow::OnLoadComplete);
238} 283}
239 284
240GRenderWindow::~GRenderWindow() { 285GRenderWindow::~GRenderWindow() {
241 InputCommon::Shutdown(); 286 InputCommon::Shutdown();
242
243 // Avoid an unordered destruction that generates a segfault
244 delete child;
245} 287}
246 288
247void GRenderWindow::moveContext() { 289void GRenderWindow::MakeCurrent() {
248 if (!context) { 290 if (core_context) {
249 return; 291 core_context->MakeCurrent();
250 } 292 }
251 DoneCurrent();
252
253 // If the thread started running, move the GL Context to the new thread. Otherwise, move it
254 // back.
255 auto thread = (QThread::currentThread() == qApp->thread() && emu_thread != nullptr)
256 ? emu_thread
257 : qApp->thread();
258 context->moveToThread(thread);
259} 293}
260 294
261void GRenderWindow::SwapBuffers() { 295void GRenderWindow::DoneCurrent() {
262 if (context) { 296 if (core_context) {
263 context->swapBuffers(child); 297 core_context->DoneCurrent();
264 } 298 }
299}
300
301void GRenderWindow::PollEvents() {
265 if (!first_frame) { 302 if (!first_frame) {
266 first_frame = true; 303 first_frame = true;
267 emit FirstFrameDisplayed(); 304 emit FirstFrameDisplayed();
268 } 305 }
269} 306}
270 307
271void GRenderWindow::MakeCurrent() {
272 if (context) {
273 context->makeCurrent(child);
274 }
275}
276
277void GRenderWindow::DoneCurrent() {
278 if (context) {
279 context->doneCurrent();
280 }
281}
282
283void GRenderWindow::PollEvents() {}
284
285bool GRenderWindow::IsShown() const { 308bool GRenderWindow::IsShown() const {
286 return !isMinimized(); 309 return !isMinimized();
287} 310}
@@ -291,7 +314,7 @@ void GRenderWindow::RetrieveVulkanHandlers(void* get_instance_proc_addr, void* i
291#ifdef HAS_VULKAN 314#ifdef HAS_VULKAN
292 const auto instance_proc_addr = vk_instance->getInstanceProcAddr("vkGetInstanceProcAddr"); 315 const auto instance_proc_addr = vk_instance->getInstanceProcAddr("vkGetInstanceProcAddr");
293 const VkInstance instance_copy = vk_instance->vkInstance(); 316 const VkInstance instance_copy = vk_instance->vkInstance();
294 const VkSurfaceKHR surface_copy = vk_instance->surfaceForWindow(child); 317 const VkSurfaceKHR surface_copy = vk_instance->surfaceForWindow(child_window);
295 318
296 std::memcpy(get_instance_proc_addr, &instance_proc_addr, sizeof(instance_proc_addr)); 319 std::memcpy(get_instance_proc_addr, &instance_proc_addr, sizeof(instance_proc_addr));
297 std::memcpy(instance, &instance_copy, sizeof(instance_copy)); 320 std::memcpy(instance, &instance_copy, sizeof(instance_copy));
@@ -309,21 +332,10 @@ void GRenderWindow::RetrieveVulkanHandlers(void* get_instance_proc_addr, void* i
309void GRenderWindow::OnFramebufferSizeChanged() { 332void GRenderWindow::OnFramebufferSizeChanged() {
310 // Screen changes potentially incur a change in screen DPI, hence we should update the 333 // Screen changes potentially incur a change in screen DPI, hence we should update the
311 // framebuffer size 334 // framebuffer size
312 const qreal pixelRatio{GetWindowPixelRatio()}; 335 const qreal pixel_ratio = windowPixelRatio();
313 const auto size{child->GetSize()}; 336 const u32 width = this->width() * pixel_ratio;
314 UpdateCurrentFramebufferLayout(size.first * pixelRatio, size.second * pixelRatio); 337 const u32 height = this->height() * pixel_ratio;
315} 338 UpdateCurrentFramebufferLayout(width, height);
316
317void GRenderWindow::ForwardKeyPressEvent(QKeyEvent* event) {
318 if (child) {
319 child->keyPressEvent(event);
320 }
321}
322
323void GRenderWindow::ForwardKeyReleaseEvent(QKeyEvent* event) {
324 if (child) {
325 child->keyReleaseEvent(event);
326 }
327} 339}
328 340
329void GRenderWindow::BackupGeometry() { 341void GRenderWindow::BackupGeometry() {
@@ -351,13 +363,12 @@ QByteArray GRenderWindow::saveGeometry() {
351 return geometry; 363 return geometry;
352} 364}
353 365
354qreal GRenderWindow::GetWindowPixelRatio() const { 366qreal GRenderWindow::windowPixelRatio() const {
355 // windowHandle() might not be accessible until the window is displayed to screen. 367 return devicePixelRatio();
356 return windowHandle() ? windowHandle()->screen()->devicePixelRatio() : 1.0f;
357} 368}
358 369
359std::pair<u32, u32> GRenderWindow::ScaleTouch(const QPointF pos) const { 370std::pair<u32, u32> GRenderWindow::ScaleTouch(const QPointF pos) const {
360 const qreal pixel_ratio{GetWindowPixelRatio()}; 371 const qreal pixel_ratio = windowPixelRatio();
361 return {static_cast<u32>(std::max(std::round(pos.x() * pixel_ratio), qreal{0.0})), 372 return {static_cast<u32>(std::max(std::round(pos.x() * pixel_ratio), qreal{0.0})),
362 static_cast<u32>(std::max(std::round(pos.y() * pixel_ratio), qreal{0.0}))}; 373 static_cast<u32>(std::max(std::round(pos.y() * pixel_ratio), qreal{0.0}))};
363} 374}
@@ -367,6 +378,47 @@ void GRenderWindow::closeEvent(QCloseEvent* event) {
367 QWidget::closeEvent(event); 378 QWidget::closeEvent(event);
368} 379}
369 380
381void GRenderWindow::keyPressEvent(QKeyEvent* event) {
382 InputCommon::GetKeyboard()->PressKey(event->key());
383}
384
385void GRenderWindow::keyReleaseEvent(QKeyEvent* event) {
386 InputCommon::GetKeyboard()->ReleaseKey(event->key());
387}
388
389void GRenderWindow::mousePressEvent(QMouseEvent* event) {
390 if (event->source() == Qt::MouseEventSynthesizedBySystem)
391 return; // touch input is handled in TouchBeginEvent
392
393 auto pos = event->pos();
394 if (event->button() == Qt::LeftButton) {
395 const auto [x, y] = ScaleTouch(pos);
396 this->TouchPressed(x, y);
397 } else if (event->button() == Qt::RightButton) {
398 InputCommon::GetMotionEmu()->BeginTilt(pos.x(), pos.y());
399 }
400}
401
402void GRenderWindow::mouseMoveEvent(QMouseEvent* event) {
403 if (event->source() == Qt::MouseEventSynthesizedBySystem)
404 return; // touch input is handled in TouchUpdateEvent
405
406 auto pos = event->pos();
407 const auto [x, y] = ScaleTouch(pos);
408 this->TouchMoved(x, y);
409 InputCommon::GetMotionEmu()->Tilt(pos.x(), pos.y());
410}
411
412void GRenderWindow::mouseReleaseEvent(QMouseEvent* event) {
413 if (event->source() == Qt::MouseEventSynthesizedBySystem)
414 return; // touch input is handled in TouchEndEvent
415
416 if (event->button() == Qt::LeftButton)
417 this->TouchReleased();
418 else if (event->button() == Qt::RightButton)
419 InputCommon::GetMotionEmu()->EndTilt();
420}
421
370void GRenderWindow::TouchBeginEvent(const QTouchEvent* event) { 422void GRenderWindow::TouchBeginEvent(const QTouchEvent* event) {
371 // TouchBegin always has exactly one touch point, so take the .first() 423 // TouchBegin always has exactly one touch point, so take the .first()
372 const auto [x, y] = ScaleTouch(event->touchPoints().first().pos()); 424 const auto [x, y] = ScaleTouch(event->touchPoints().first().pos());
@@ -415,26 +467,20 @@ void GRenderWindow::focusOutEvent(QFocusEvent* event) {
415 InputCommon::GetKeyboard()->ReleaseAllKeys(); 467 InputCommon::GetKeyboard()->ReleaseAllKeys();
416} 468}
417 469
418void GRenderWindow::OnClientAreaResized(u32 width, u32 height) { 470void GRenderWindow::resizeEvent(QResizeEvent* event) {
419 NotifyClientAreaSizeChanged(std::make_pair(width, height)); 471 QWidget::resizeEvent(event);
472 OnFramebufferSizeChanged();
420} 473}
421 474
422std::unique_ptr<Core::Frontend::GraphicsContext> GRenderWindow::CreateSharedContext() const { 475std::unique_ptr<Core::Frontend::GraphicsContext> GRenderWindow::CreateSharedContext() const {
423 return std::make_unique<GGLContext>(context.get()); 476 if (Settings::values.renderer_backend == Settings::RendererBackend::OpenGL) {
477 return std::make_unique<GGLContext>(QOpenGLContext::globalShareContext());
478 }
479 return {};
424} 480}
425 481
426bool GRenderWindow::InitRenderTarget() { 482bool GRenderWindow::InitRenderTarget() {
427 shared_context.reset(); 483 ReleaseRenderTarget();
428 context.reset();
429 if (child) {
430 delete child;
431 }
432 if (container) {
433 delete container;
434 }
435 if (layout()) {
436 delete layout();
437 }
438 484
439 first_frame = false; 485 first_frame = false;
440 486
@@ -451,13 +497,6 @@ bool GRenderWindow::InitRenderTarget() {
451 break; 497 break;
452 } 498 }
453 499
454 container = QWidget::createWindowContainer(child, this);
455 QBoxLayout* layout = new QHBoxLayout(this);
456
457 layout->addWidget(container);
458 layout->setMargin(0);
459 setLayout(layout);
460
461 // Reset minimum required size to avoid resizing issues on the main window after restarting. 500 // Reset minimum required size to avoid resizing issues on the main window after restarting.
462 setMinimumSize(1, 1); 501 setMinimumSize(1, 1);
463 502
@@ -467,14 +506,9 @@ bool GRenderWindow::InitRenderTarget() {
467 hide(); 506 hide();
468 507
469 resize(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height); 508 resize(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height);
470 child->resize(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height);
471 container->resize(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height);
472 509
473 OnMinimalClientAreaChangeRequest(GetActiveConfig().min_client_area_size); 510 OnMinimalClientAreaChangeRequest(GetActiveConfig().min_client_area_size);
474
475 OnFramebufferSizeChanged(); 511 OnFramebufferSizeChanged();
476 NotifyClientAreaSizeChanged(child->GetSize());
477
478 BackupGeometry(); 512 BackupGeometry();
479 513
480 if (Settings::values.renderer_backend == Settings::RendererBackend::OpenGL) { 514 if (Settings::values.renderer_backend == Settings::RendererBackend::OpenGL) {
@@ -486,6 +520,14 @@ bool GRenderWindow::InitRenderTarget() {
486 return true; 520 return true;
487} 521}
488 522
523void GRenderWindow::ReleaseRenderTarget() {
524 if (child_widget) {
525 layout()->removeWidget(child_widget);
526 delete child_widget;
527 child_widget = nullptr;
528 }
529}
530
489void GRenderWindow::CaptureScreenshot(u32 res_scale, const QString& screenshot_path) { 531void GRenderWindow::CaptureScreenshot(u32 res_scale, const QString& screenshot_path) {
490 auto& renderer = Core::System::GetInstance().Renderer(); 532 auto& renderer = Core::System::GetInstance().Renderer();
491 533
@@ -521,16 +563,19 @@ bool GRenderWindow::InitializeOpenGL() {
521 fmt.setOption(QSurfaceFormat::FormatOption::DeprecatedFunctions); 563 fmt.setOption(QSurfaceFormat::FormatOption::DeprecatedFunctions);
522 // TODO: expose a setting for buffer value (ie default/single/double/triple) 564 // TODO: expose a setting for buffer value (ie default/single/double/triple)
523 fmt.setSwapBehavior(QSurfaceFormat::DefaultSwapBehavior); 565 fmt.setSwapBehavior(QSurfaceFormat::DefaultSwapBehavior);
524 shared_context = std::make_unique<QOpenGLContext>(); 566 fmt.setSwapInterval(0);
525 shared_context->setFormat(fmt); 567 QSurfaceFormat::setDefaultFormat(fmt);
526 shared_context->create(); 568
527 context = std::make_unique<QOpenGLContext>(); 569 GMainWindow* parent = GetMainWindow();
528 context->setShareContext(shared_context.get()); 570 QWindow* parent_win_handle = parent ? parent->windowHandle() : nullptr;
529 context->setFormat(fmt); 571 child_window = new OpenGLWindow(parent_win_handle, this, QOpenGLContext::globalShareContext());
530 context->create(); 572 child_window->create();
531 fmt.setSwapInterval(false); 573 child_widget = createWindowContainer(child_window, this);
532 574 child_widget->resize(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height);
533 child = new GGLWidgetInternal(this, shared_context.get()); 575 layout()->addWidget(child_widget);
576
577 core_context = CreateSharedContext();
578
534 return true; 579 return true;
535} 580}
536 581
@@ -559,7 +604,14 @@ bool GRenderWindow::InitializeVulkan() {
559 return false; 604 return false;
560 } 605 }
561 606
562 child = new GVKWidgetInternal(this, vk_instance.get()); 607 GMainWindow* parent = GetMainWindow();
608 QWindow* parent_win_handle = parent ? parent->windowHandle() : nullptr;
609 child_window = new VulkanWindow(parent_win_handle, this, vk_instance.get());
610 child_window->create();
611 child_widget = createWindowContainer(child_window, this);
612 child_widget->resize(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height);
613 layout()->addWidget(child_widget);
614
563 return true; 615 return true;
564#else 616#else
565 QMessageBox::critical(this, tr("Vulkan not available!"), 617 QMessageBox::critical(this, tr("Vulkan not available!"),
@@ -569,7 +621,7 @@ bool GRenderWindow::InitializeVulkan() {
569} 621}
570 622
571bool GRenderWindow::LoadOpenGL() { 623bool GRenderWindow::LoadOpenGL() {
572 Core::Frontend::ScopeAcquireWindowContext acquire_context{*this}; 624 Core::Frontend::ScopeAcquireContext acquire_context{*this};
573 if (!gladLoadGL()) { 625 if (!gladLoadGL()) {
574 QMessageBox::critical(this, tr("Error while initializing OpenGL 4.3!"), 626 QMessageBox::critical(this, tr("Error while initializing OpenGL 4.3!"),
575 tr("Your GPU may not support OpenGL 4.3, or you do not have the " 627 tr("Your GPU may not support OpenGL 4.3, or you do not have the "
@@ -621,12 +673,10 @@ QStringList GRenderWindow::GetUnsupportedGLExtensions() const {
621 673
622void GRenderWindow::OnEmulationStarting(EmuThread* emu_thread) { 674void GRenderWindow::OnEmulationStarting(EmuThread* emu_thread) {
623 this->emu_thread = emu_thread; 675 this->emu_thread = emu_thread;
624 child->DisablePainting();
625} 676}
626 677
627void GRenderWindow::OnEmulationStopping() { 678void GRenderWindow::OnEmulationStopping() {
628 emu_thread = nullptr; 679 emu_thread = nullptr;
629 child->EnablePainting();
630} 680}
631 681
632void GRenderWindow::showEvent(QShowEvent* event) { 682void GRenderWindow::showEvent(QShowEvent* event) {
diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h
index 71a2fa321..79b030304 100644
--- a/src/yuzu/bootmanager.h
+++ b/src/yuzu/bootmanager.h
@@ -11,11 +11,13 @@
11#include <QImage> 11#include <QImage>
12#include <QThread> 12#include <QThread>
13#include <QWidget> 13#include <QWidget>
14#include <QWindow>
14 15
15#include "common/thread.h" 16#include "common/thread.h"
16#include "core/core.h" 17#include "core/core.h"
17#include "core/frontend/emu_window.h" 18#include "core/frontend/emu_window.h"
18 19
20class GRenderWindow;
19class QKeyEvent; 21class QKeyEvent;
20class QScreen; 22class QScreen;
21class QTouchEvent; 23class QTouchEvent;
@@ -26,14 +28,6 @@ class QOpenGLContext;
26class QVulkanInstance; 28class QVulkanInstance;
27#endif 29#endif
28 30
29class GWidgetInternal;
30class GGLWidgetInternal;
31class GVKWidgetInternal;
32class GMainWindow;
33class GRenderWindow;
34class QSurface;
35class QOpenGLContext;
36
37namespace VideoCore { 31namespace VideoCore {
38enum class LoadCallbackStage; 32enum class LoadCallbackStage;
39} 33}
@@ -42,7 +36,7 @@ class EmuThread final : public QThread {
42 Q_OBJECT 36 Q_OBJECT
43 37
44public: 38public:
45 explicit EmuThread(GRenderWindow* render_window); 39 explicit EmuThread(GRenderWindow& window);
46 ~EmuThread() override; 40 ~EmuThread() override;
47 41
48 /** 42 /**
@@ -96,7 +90,11 @@ private:
96 std::mutex running_mutex; 90 std::mutex running_mutex;
97 std::condition_variable running_cv; 91 std::condition_variable running_cv;
98 92
99 GRenderWindow* render_window; 93 /// Only used in asynchronous GPU mode
94 std::unique_ptr<Core::Frontend::GraphicsContext> shared_context;
95
96 /// This is shared_context in asynchronous GPU mode, core_context in synchronous GPU mode
97 Core::Frontend::GraphicsContext& context;
100 98
101signals: 99signals:
102 /** 100 /**
@@ -126,11 +124,10 @@ class GRenderWindow : public QWidget, public Core::Frontend::EmuWindow {
126 Q_OBJECT 124 Q_OBJECT
127 125
128public: 126public:
129 GRenderWindow(GMainWindow* parent, EmuThread* emu_thread); 127 GRenderWindow(QWidget* parent, EmuThread* emu_thread);
130 ~GRenderWindow() override; 128 ~GRenderWindow() override;
131 129
132 // EmuWindow implementation 130 // EmuWindow implementation.
133 void SwapBuffers() override;
134 void MakeCurrent() override; 131 void MakeCurrent() override;
135 void DoneCurrent() override; 132 void DoneCurrent() override;
136 void PollEvents() override; 133 void PollEvents() override;
@@ -139,30 +136,36 @@ public:
139 void* surface) const override; 136 void* surface) const override;
140 std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; 137 std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override;
141 138
142 void ForwardKeyPressEvent(QKeyEvent* event);
143 void ForwardKeyReleaseEvent(QKeyEvent* event);
144
145 void BackupGeometry(); 139 void BackupGeometry();
146 void RestoreGeometry(); 140 void RestoreGeometry();
147 void restoreGeometry(const QByteArray& geometry); // overridden 141 void restoreGeometry(const QByteArray& geometry); // overridden
148 QByteArray saveGeometry(); // overridden 142 QByteArray saveGeometry(); // overridden
149 143
150 qreal GetWindowPixelRatio() const; 144 qreal windowPixelRatio() const;
151 std::pair<u32, u32> ScaleTouch(QPointF pos) const;
152 145
153 void closeEvent(QCloseEvent* event) override; 146 void closeEvent(QCloseEvent* event) override;
147
148 void resizeEvent(QResizeEvent* event) override;
149
150 void keyPressEvent(QKeyEvent* event) override;
151 void keyReleaseEvent(QKeyEvent* event) override;
152
153 void mousePressEvent(QMouseEvent* event) override;
154 void mouseMoveEvent(QMouseEvent* event) override;
155 void mouseReleaseEvent(QMouseEvent* event) override;
156
154 bool event(QEvent* event) override; 157 bool event(QEvent* event) override;
155 void focusOutEvent(QFocusEvent* event) override;
156 158
157 void OnClientAreaResized(u32 width, u32 height); 159 void focusOutEvent(QFocusEvent* event) override;
158 160
159 bool InitRenderTarget(); 161 bool InitRenderTarget();
160 162
163 /// Destroy the previous run's child_widget which should also destroy the child_window
164 void ReleaseRenderTarget();
165
161 void CaptureScreenshot(u32 res_scale, const QString& screenshot_path); 166 void CaptureScreenshot(u32 res_scale, const QString& screenshot_path);
162 167
163public slots: 168public slots:
164 void moveContext(); // overridden
165
166 void OnEmulationStarting(EmuThread* emu_thread); 169 void OnEmulationStarting(EmuThread* emu_thread);
167 void OnEmulationStopping(); 170 void OnEmulationStopping();
168 void OnFramebufferSizeChanged(); 171 void OnFramebufferSizeChanged();
@@ -173,6 +176,7 @@ signals:
173 void FirstFrameDisplayed(); 176 void FirstFrameDisplayed();
174 177
175private: 178private:
179 std::pair<u32, u32> ScaleTouch(QPointF pos) const;
176 void TouchBeginEvent(const QTouchEvent* event); 180 void TouchBeginEvent(const QTouchEvent* event);
177 void TouchUpdateEvent(const QTouchEvent* event); 181 void TouchUpdateEvent(const QTouchEvent* event);
178 void TouchEndEvent(); 182 void TouchEndEvent();
@@ -184,15 +188,9 @@ private:
184 bool LoadOpenGL(); 188 bool LoadOpenGL();
185 QStringList GetUnsupportedGLExtensions() const; 189 QStringList GetUnsupportedGLExtensions() const;
186 190
187 QWidget* container = nullptr;
188 GWidgetInternal* child = nullptr;
189
190 EmuThread* emu_thread; 191 EmuThread* emu_thread;
191 // Context that backs the GGLWidgetInternal (and will be used by core to render) 192
192 std::unique_ptr<QOpenGLContext> context; 193 std::unique_ptr<GraphicsContext> core_context;
193 // Context that will be shared between all newly created contexts. This should never be made
194 // current
195 std::unique_ptr<QOpenGLContext> shared_context;
196 194
197#ifdef HAS_VULKAN 195#ifdef HAS_VULKAN
198 std::unique_ptr<QVulkanInstance> vk_instance; 196 std::unique_ptr<QVulkanInstance> vk_instance;
@@ -202,6 +200,15 @@ private:
202 QImage screenshot_image; 200 QImage screenshot_image;
203 201
204 QByteArray geometry; 202 QByteArray geometry;
203
204 /// Native window handle that backs this presentation widget
205 QWindow* child_window = nullptr;
206
207 /// In order to embed the window into GRenderWindow, you need to use createWindowContainer to
208 /// put the child_window into a widget then add it to the layout. This child_widget can be
209 /// parented to GRenderWindow and use Qt's lifetime system
210 QWidget* child_widget = nullptr;
211
205 bool first_frame = false; 212 bool first_frame = false;
206 213
207protected: 214protected:
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index 6209fff75..3b9ab38dd 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -539,7 +539,7 @@ void Config::ReadDebuggingValues() {
539void Config::ReadServiceValues() { 539void Config::ReadServiceValues() {
540 qt_config->beginGroup(QStringLiteral("Services")); 540 qt_config->beginGroup(QStringLiteral("Services"));
541 Settings::values.bcat_backend = 541 Settings::values.bcat_backend =
542 ReadSetting(QStringLiteral("bcat_backend"), QStringLiteral("boxcat")) 542 ReadSetting(QStringLiteral("bcat_backend"), QStringLiteral("null"))
543 .toString() 543 .toString()
544 .toStdString(); 544 .toStdString();
545 Settings::values.bcat_boxcat_local = 545 Settings::values.bcat_boxcat_local =
@@ -631,6 +631,7 @@ void Config::ReadRendererValues() {
631 Settings::values.resolution_factor = 631 Settings::values.resolution_factor =
632 ReadSetting(QStringLiteral("resolution_factor"), 1.0).toFloat(); 632 ReadSetting(QStringLiteral("resolution_factor"), 1.0).toFloat();
633 Settings::values.aspect_ratio = ReadSetting(QStringLiteral("aspect_ratio"), 0).toInt(); 633 Settings::values.aspect_ratio = ReadSetting(QStringLiteral("aspect_ratio"), 0).toInt();
634 Settings::values.max_anisotropy = ReadSetting(QStringLiteral("max_anisotropy"), 0).toInt();
634 Settings::values.use_frame_limit = 635 Settings::values.use_frame_limit =
635 ReadSetting(QStringLiteral("use_frame_limit"), true).toBool(); 636 ReadSetting(QStringLiteral("use_frame_limit"), true).toBool();
636 Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt(); 637 Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt();
@@ -640,6 +641,7 @@ void Config::ReadRendererValues() {
640 ReadSetting(QStringLiteral("use_accurate_gpu_emulation"), false).toBool(); 641 ReadSetting(QStringLiteral("use_accurate_gpu_emulation"), false).toBool();
641 Settings::values.use_asynchronous_gpu_emulation = 642 Settings::values.use_asynchronous_gpu_emulation =
642 ReadSetting(QStringLiteral("use_asynchronous_gpu_emulation"), false).toBool(); 643 ReadSetting(QStringLiteral("use_asynchronous_gpu_emulation"), false).toBool();
644 Settings::values.use_vsync = ReadSetting(QStringLiteral("use_vsync"), true).toBool();
643 Settings::values.force_30fps_mode = 645 Settings::values.force_30fps_mode =
644 ReadSetting(QStringLiteral("force_30fps_mode"), false).toBool(); 646 ReadSetting(QStringLiteral("force_30fps_mode"), false).toBool();
645 647
@@ -680,6 +682,8 @@ void Config::ReadSystemValues() {
680 682
681 Settings::values.language_index = ReadSetting(QStringLiteral("language_index"), 1).toInt(); 683 Settings::values.language_index = ReadSetting(QStringLiteral("language_index"), 1).toInt();
682 684
685 Settings::values.region_index = ReadSetting(QStringLiteral("region_index"), 1).toInt();
686
683 const auto rng_seed_enabled = ReadSetting(QStringLiteral("rng_seed_enabled"), false).toBool(); 687 const auto rng_seed_enabled = ReadSetting(QStringLiteral("rng_seed_enabled"), false).toBool();
684 if (rng_seed_enabled) { 688 if (rng_seed_enabled) {
685 Settings::values.rng_seed = ReadSetting(QStringLiteral("rng_seed"), 0).toULongLong(); 689 Settings::values.rng_seed = ReadSetting(QStringLiteral("rng_seed"), 0).toULongLong();
@@ -696,6 +700,8 @@ void Config::ReadSystemValues() {
696 Settings::values.custom_rtc = std::nullopt; 700 Settings::values.custom_rtc = std::nullopt;
697 } 701 }
698 702
703 Settings::values.sound_index = ReadSetting(QStringLiteral("sound_index"), 1).toInt();
704
699 qt_config->endGroup(); 705 qt_config->endGroup();
700} 706}
701 707
@@ -1066,6 +1072,7 @@ void Config::SaveRendererValues() {
1066 WriteSetting(QStringLiteral("resolution_factor"), 1072 WriteSetting(QStringLiteral("resolution_factor"),
1067 static_cast<double>(Settings::values.resolution_factor), 1.0); 1073 static_cast<double>(Settings::values.resolution_factor), 1.0);
1068 WriteSetting(QStringLiteral("aspect_ratio"), Settings::values.aspect_ratio, 0); 1074 WriteSetting(QStringLiteral("aspect_ratio"), Settings::values.aspect_ratio, 0);
1075 WriteSetting(QStringLiteral("max_anisotropy"), Settings::values.max_anisotropy, 0);
1069 WriteSetting(QStringLiteral("use_frame_limit"), Settings::values.use_frame_limit, true); 1076 WriteSetting(QStringLiteral("use_frame_limit"), Settings::values.use_frame_limit, true);
1070 WriteSetting(QStringLiteral("frame_limit"), Settings::values.frame_limit, 100); 1077 WriteSetting(QStringLiteral("frame_limit"), Settings::values.frame_limit, 100);
1071 WriteSetting(QStringLiteral("use_disk_shader_cache"), Settings::values.use_disk_shader_cache, 1078 WriteSetting(QStringLiteral("use_disk_shader_cache"), Settings::values.use_disk_shader_cache,
@@ -1074,6 +1081,7 @@ void Config::SaveRendererValues() {
1074 Settings::values.use_accurate_gpu_emulation, false); 1081 Settings::values.use_accurate_gpu_emulation, false);
1075 WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"), 1082 WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"),
1076 Settings::values.use_asynchronous_gpu_emulation, false); 1083 Settings::values.use_asynchronous_gpu_emulation, false);
1084 WriteSetting(QStringLiteral("use_vsync"), Settings::values.use_vsync, true);
1077 WriteSetting(QStringLiteral("force_30fps_mode"), Settings::values.force_30fps_mode, false); 1085 WriteSetting(QStringLiteral("force_30fps_mode"), Settings::values.force_30fps_mode, false);
1078 1086
1079 // Cast to double because Qt's written float values are not human-readable 1087 // Cast to double because Qt's written float values are not human-readable
@@ -1110,6 +1118,7 @@ void Config::SaveSystemValues() {
1110 WriteSetting(QStringLiteral("use_docked_mode"), Settings::values.use_docked_mode, false); 1118 WriteSetting(QStringLiteral("use_docked_mode"), Settings::values.use_docked_mode, false);
1111 WriteSetting(QStringLiteral("current_user"), Settings::values.current_user, 0); 1119 WriteSetting(QStringLiteral("current_user"), Settings::values.current_user, 0);
1112 WriteSetting(QStringLiteral("language_index"), Settings::values.language_index, 1); 1120 WriteSetting(QStringLiteral("language_index"), Settings::values.language_index, 1);
1121 WriteSetting(QStringLiteral("region_index"), Settings::values.region_index, 1);
1113 1122
1114 WriteSetting(QStringLiteral("rng_seed_enabled"), Settings::values.rng_seed.has_value(), false); 1123 WriteSetting(QStringLiteral("rng_seed_enabled"), Settings::values.rng_seed.has_value(), false);
1115 WriteSetting(QStringLiteral("rng_seed"), Settings::values.rng_seed.value_or(0), 0); 1124 WriteSetting(QStringLiteral("rng_seed"), Settings::values.rng_seed.value_or(0), 0);
@@ -1121,6 +1130,8 @@ void Config::SaveSystemValues() {
1121 Settings::values.custom_rtc.value_or(std::chrono::seconds{}).count()), 1130 Settings::values.custom_rtc.value_or(std::chrono::seconds{}).count()),
1122 0); 1131 0);
1123 1132
1133 WriteSetting(QStringLiteral("sound_index"), Settings::values.sound_index, 1);
1134
1124 qt_config->endGroup(); 1135 qt_config->endGroup();
1125} 1136}
1126 1137
diff --git a/src/yuzu/configuration/configure.ui b/src/yuzu/configuration/configure.ui
index 67b990f1a..9aec1bd09 100644
--- a/src/yuzu/configuration/configure.ui
+++ b/src/yuzu/configuration/configure.ui
@@ -83,6 +83,11 @@
83 <string>Graphics</string> 83 <string>Graphics</string>
84 </attribute> 84 </attribute>
85 </widget> 85 </widget>
86 <widget class="ConfigureGraphicsAdvanced" name="graphicsAdvancedTab">
87 <attribute name="title">
88 <string>GraphicsAdvanced</string>
89 </attribute>
90 </widget>
86 <widget class="ConfigureAudio" name="audioTab"> 91 <widget class="ConfigureAudio" name="audioTab">
87 <attribute name="title"> 92 <attribute name="title">
88 <string>Audio</string> 93 <string>Audio</string>
@@ -160,6 +165,12 @@
160 <container>1</container> 165 <container>1</container>
161 </customwidget> 166 </customwidget>
162 <customwidget> 167 <customwidget>
168 <class>ConfigureGraphicsAdvanced</class>
169 <extends>QWidget</extends>
170 <header>configuration/configure_graphics_advanced.h</header>
171 <container>1</container>
172 </customwidget>
173 <customwidget>
163 <class>ConfigureWeb</class> 174 <class>ConfigureWeb</class>
164 <extends>QWidget</extends> 175 <extends>QWidget</extends>
165 <header>configuration/configure_web.h</header> 176 <header>configuration/configure_web.h</header>
diff --git a/src/yuzu/configuration/configure_dialog.cpp b/src/yuzu/configuration/configure_dialog.cpp
index db3b19352..df4473b46 100644
--- a/src/yuzu/configuration/configure_dialog.cpp
+++ b/src/yuzu/configuration/configure_dialog.cpp
@@ -41,6 +41,7 @@ void ConfigureDialog::ApplyConfiguration() {
41 ui->inputTab->ApplyConfiguration(); 41 ui->inputTab->ApplyConfiguration();
42 ui->hotkeysTab->ApplyConfiguration(registry); 42 ui->hotkeysTab->ApplyConfiguration(registry);
43 ui->graphicsTab->ApplyConfiguration(); 43 ui->graphicsTab->ApplyConfiguration();
44 ui->graphicsAdvancedTab->ApplyConfiguration();
44 ui->audioTab->ApplyConfiguration(); 45 ui->audioTab->ApplyConfiguration();
45 ui->debugTab->ApplyConfiguration(); 46 ui->debugTab->ApplyConfiguration();
46 ui->webTab->ApplyConfiguration(); 47 ui->webTab->ApplyConfiguration();
@@ -76,7 +77,7 @@ void ConfigureDialog::PopulateSelectionList() {
76 const std::array<std::pair<QString, QList<QWidget*>>, 5> items{ 77 const std::array<std::pair<QString, QList<QWidget*>>, 5> items{
77 {{tr("General"), {ui->generalTab, ui->webTab, ui->debugTab, ui->uiTab}}, 78 {{tr("General"), {ui->generalTab, ui->webTab, ui->debugTab, ui->uiTab}},
78 {tr("System"), {ui->systemTab, ui->profileManagerTab, ui->serviceTab, ui->filesystemTab}}, 79 {tr("System"), {ui->systemTab, ui->profileManagerTab, ui->serviceTab, ui->filesystemTab}},
79 {tr("Graphics"), {ui->graphicsTab}}, 80 {tr("Graphics"), {ui->graphicsTab, ui->graphicsAdvancedTab}},
80 {tr("Audio"), {ui->audioTab}}, 81 {tr("Audio"), {ui->audioTab}},
81 {tr("Controls"), {ui->inputTab, ui->hotkeysTab}}}, 82 {tr("Controls"), {ui->inputTab, ui->hotkeysTab}}},
82 }; 83 };
@@ -105,6 +106,7 @@ void ConfigureDialog::UpdateVisibleTabs() {
105 {ui->inputTab, tr("Input")}, 106 {ui->inputTab, tr("Input")},
106 {ui->hotkeysTab, tr("Hotkeys")}, 107 {ui->hotkeysTab, tr("Hotkeys")},
107 {ui->graphicsTab, tr("Graphics")}, 108 {ui->graphicsTab, tr("Graphics")},
109 {ui->graphicsAdvancedTab, tr("Advanced")},
108 {ui->audioTab, tr("Audio")}, 110 {ui->audioTab, tr("Audio")},
109 {ui->debugTab, tr("Debug")}, 111 {ui->debugTab, tr("Debug")},
110 {ui->webTab, tr("Web")}, 112 {ui->webTab, tr("Web")},
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index ea899c080..a821c7b3c 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -100,11 +100,8 @@ void ConfigureGraphics::SetConfiguration() {
100 ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio); 100 ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio);
101 ui->use_disk_shader_cache->setEnabled(runtime_lock); 101 ui->use_disk_shader_cache->setEnabled(runtime_lock);
102 ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache); 102 ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache);
103 ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation);
104 ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock); 103 ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock);
105 ui->use_asynchronous_gpu_emulation->setChecked(Settings::values.use_asynchronous_gpu_emulation); 104 ui->use_asynchronous_gpu_emulation->setChecked(Settings::values.use_asynchronous_gpu_emulation);
106 ui->force_30fps_mode->setEnabled(runtime_lock);
107 ui->force_30fps_mode->setChecked(Settings::values.force_30fps_mode);
108 UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green, 105 UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green,
109 Settings::values.bg_blue)); 106 Settings::values.bg_blue));
110 UpdateDeviceComboBox(); 107 UpdateDeviceComboBox();
@@ -117,10 +114,8 @@ void ConfigureGraphics::ApplyConfiguration() {
117 ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex())); 114 ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex()));
118 Settings::values.aspect_ratio = ui->aspect_ratio_combobox->currentIndex(); 115 Settings::values.aspect_ratio = ui->aspect_ratio_combobox->currentIndex();
119 Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked(); 116 Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked();
120 Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked();
121 Settings::values.use_asynchronous_gpu_emulation = 117 Settings::values.use_asynchronous_gpu_emulation =
122 ui->use_asynchronous_gpu_emulation->isChecked(); 118 ui->use_asynchronous_gpu_emulation->isChecked();
123 Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked();
124 Settings::values.bg_red = static_cast<float>(bg_color.redF()); 119 Settings::values.bg_red = static_cast<float>(bg_color.redF());
125 Settings::values.bg_green = static_cast<float>(bg_color.greenF()); 120 Settings::values.bg_green = static_cast<float>(bg_color.greenF());
126 Settings::values.bg_blue = static_cast<float>(bg_color.blueF()); 121 Settings::values.bg_blue = static_cast<float>(bg_color.blueF());
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui
index db60426ab..c816d6108 100644
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -85,20 +85,6 @@
85 </widget> 85 </widget>
86 </item> 86 </item>
87 <item> 87 <item>
88 <widget class="QCheckBox" name="use_accurate_gpu_emulation">
89 <property name="text">
90 <string>Use accurate GPU emulation (slow)</string>
91 </property>
92 </widget>
93 </item>
94 <item>
95 <widget class="QCheckBox" name="force_30fps_mode">
96 <property name="text">
97 <string>Force 30 FPS mode</string>
98 </property>
99 </widget>
100 </item>
101 <item>
102 <layout class="QHBoxLayout" name="horizontalLayout_2"> 88 <layout class="QHBoxLayout" name="horizontalLayout_2">
103 <item> 89 <item>
104 <widget class="QLabel" name="label"> 90 <widget class="QLabel" name="label">
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp
new file mode 100644
index 000000000..b9f429f84
--- /dev/null
+++ b/src/yuzu/configuration/configure_graphics_advanced.cpp
@@ -0,0 +1,48 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/core.h"
6#include "core/settings.h"
7#include "ui_configure_graphics_advanced.h"
8#include "yuzu/configuration/configure_graphics_advanced.h"
9
10ConfigureGraphicsAdvanced::ConfigureGraphicsAdvanced(QWidget* parent)
11 : QWidget(parent), ui(new Ui::ConfigureGraphicsAdvanced) {
12
13 ui->setupUi(this);
14
15 SetConfiguration();
16}
17
18ConfigureGraphicsAdvanced::~ConfigureGraphicsAdvanced() = default;
19
20void ConfigureGraphicsAdvanced::SetConfiguration() {
21 const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn();
22 ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation);
23 ui->use_vsync->setEnabled(runtime_lock);
24 ui->use_vsync->setChecked(Settings::values.use_vsync);
25 ui->force_30fps_mode->setEnabled(runtime_lock);
26 ui->force_30fps_mode->setChecked(Settings::values.force_30fps_mode);
27 ui->anisotropic_filtering_combobox->setEnabled(runtime_lock);
28 ui->anisotropic_filtering_combobox->setCurrentIndex(Settings::values.max_anisotropy);
29}
30
31void ConfigureGraphicsAdvanced::ApplyConfiguration() {
32 Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked();
33 Settings::values.use_vsync = ui->use_vsync->isChecked();
34 Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked();
35 Settings::values.max_anisotropy = ui->anisotropic_filtering_combobox->currentIndex();
36}
37
38void ConfigureGraphicsAdvanced::changeEvent(QEvent* event) {
39 if (event->type() == QEvent::LanguageChange) {
40 RetranslateUI();
41 }
42
43 QWidget::changeEvent(event);
44}
45
46void ConfigureGraphicsAdvanced::RetranslateUI() {
47 ui->retranslateUi(this);
48}
diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h
new file mode 100644
index 000000000..bbc9d4355
--- /dev/null
+++ b/src/yuzu/configuration/configure_graphics_advanced.h
@@ -0,0 +1,30 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <QWidget>
9
10namespace Ui {
11class ConfigureGraphicsAdvanced;
12}
13
14class ConfigureGraphicsAdvanced : public QWidget {
15 Q_OBJECT
16
17public:
18 explicit ConfigureGraphicsAdvanced(QWidget* parent = nullptr);
19 ~ConfigureGraphicsAdvanced() override;
20
21 void ApplyConfiguration();
22
23private:
24 void changeEvent(QEvent* event) override;
25 void RetranslateUI();
26
27 void SetConfiguration();
28
29 std::unique_ptr<Ui::ConfigureGraphicsAdvanced> ui;
30};
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui
new file mode 100644
index 000000000..42eec278e
--- /dev/null
+++ b/src/yuzu/configuration/configure_graphics_advanced.ui
@@ -0,0 +1,111 @@
1<?xml version="1.0" encoding="UTF-8"?>
2<ui version="4.0">
3 <class>ConfigureGraphicsAdvanced</class>
4 <widget class="QWidget" name="ConfigureGraphicsAdvanced">
5 <property name="geometry">
6 <rect>
7 <x>0</x>
8 <y>0</y>
9 <width>400</width>
10 <height>321</height>
11 </rect>
12 </property>
13 <property name="windowTitle">
14 <string>Form</string>
15 </property>
16 <layout class="QVBoxLayout" name="verticalLayout_1">
17 <item>
18 <layout class="QVBoxLayout" name="verticalLayout_2">
19 <item>
20 <widget class="QGroupBox" name="groupBox_1">
21 <property name="title">
22 <string>Advanced Graphics Settings</string>
23 </property>
24 <layout class="QVBoxLayout" name="verticalLayout_3">
25 <item>
26 <widget class="QCheckBox" name="use_accurate_gpu_emulation">
27 <property name="text">
28 <string>Use accurate GPU emulation (slow)</string>
29 </property>
30 </widget>
31 </item>
32 <item>
33 <widget class="QCheckBox" name="use_vsync">
34 <property name="toolTip">
35 <string>VSync prevents the screen from tearing, but some graphics cards have lower performance with VSync enabled. Keep it enabled if you don't notice a performance difference.</string>
36 </property>
37 <property name="text">
38 <string>Use VSync (OpenGL only)</string>
39 </property>
40 </widget>
41 </item>
42 <item>
43 <widget class="QCheckBox" name="force_30fps_mode">
44 <property name="text">
45 <string>Force 30 FPS mode</string>
46 </property>
47 </widget>
48 </item>
49 <item>
50 <layout class="QHBoxLayout" name="horizontalLayout_1">
51 <item>
52 <widget class="QLabel" name="af_label">
53 <property name="text">
54 <string>Anisotropic Filtering:</string>
55 </property>
56 </widget>
57 </item>
58 <item>
59 <widget class="QComboBox" name="anisotropic_filtering_combobox">
60 <item>
61 <property name="text">
62 <string>Default</string>
63 </property>
64 </item>
65 <item>
66 <property name="text">
67 <string>2x</string>
68 </property>
69 </item>
70 <item>
71 <property name="text">
72 <string>4x</string>
73 </property>
74 </item>
75 <item>
76 <property name="text">
77 <string>8x</string>
78 </property>
79 </item>
80 <item>
81 <property name="text">
82 <string>16x</string>
83 </property>
84 </item>
85 </widget>
86 </item>
87 </layout>
88 </item>
89 </layout>
90 </widget>
91 </item>
92 </layout>
93 </item>
94 <item>
95 <spacer name="verticalSpacer">
96 <property name="orientation">
97 <enum>Qt::Vertical</enum>
98 </property>
99 <property name="sizeHint" stdset="0">
100 <size>
101 <width>20</width>
102 <height>40</height>
103 </size>
104 </property>
105 </spacer>
106 </item>
107 </layout>
108 </widget>
109 <resources/>
110 <connections/>
111</ui>
diff --git a/src/yuzu/configuration/configure_system.cpp b/src/yuzu/configuration/configure_system.cpp
index e1b52f8d9..f49cd4c8f 100644
--- a/src/yuzu/configuration/configure_system.cpp
+++ b/src/yuzu/configuration/configure_system.cpp
@@ -56,6 +56,8 @@ void ConfigureSystem::SetConfiguration() {
56 enabled = !Core::System::GetInstance().IsPoweredOn(); 56 enabled = !Core::System::GetInstance().IsPoweredOn();
57 57
58 ui->combo_language->setCurrentIndex(Settings::values.language_index); 58 ui->combo_language->setCurrentIndex(Settings::values.language_index);
59 ui->combo_region->setCurrentIndex(Settings::values.region_index);
60 ui->combo_sound->setCurrentIndex(Settings::values.sound_index);
59 61
60 ui->rng_seed_checkbox->setChecked(Settings::values.rng_seed.has_value()); 62 ui->rng_seed_checkbox->setChecked(Settings::values.rng_seed.has_value());
61 ui->rng_seed_edit->setEnabled(Settings::values.rng_seed.has_value()); 63 ui->rng_seed_edit->setEnabled(Settings::values.rng_seed.has_value());
@@ -81,6 +83,8 @@ void ConfigureSystem::ApplyConfiguration() {
81 } 83 }
82 84
83 Settings::values.language_index = ui->combo_language->currentIndex(); 85 Settings::values.language_index = ui->combo_language->currentIndex();
86 Settings::values.region_index = ui->combo_region->currentIndex();
87 Settings::values.sound_index = ui->combo_sound->currentIndex();
84 88
85 if (ui->rng_seed_checkbox->isChecked()) { 89 if (ui->rng_seed_checkbox->isChecked()) {
86 Settings::values.rng_seed = ui->rng_seed_edit->text().toULongLong(nullptr, 16); 90 Settings::values.rng_seed = ui->rng_seed_edit->text().toULongLong(nullptr, 16);
diff --git a/src/yuzu/configuration/configure_system.h b/src/yuzu/configuration/configure_system.h
index 1eab3781d..d8fa2d2cc 100644
--- a/src/yuzu/configuration/configure_system.h
+++ b/src/yuzu/configuration/configure_system.h
@@ -36,5 +36,6 @@ private:
36 bool enabled = false; 36 bool enabled = false;
37 37
38 int language_index = 0; 38 int language_index = 0;
39 int region_index = 0;
39 int sound_index = 0; 40 int sound_index = 0;
40}; 41};
diff --git a/src/yuzu/configuration/configure_system.ui b/src/yuzu/configuration/configure_system.ui
index 65745a2f8..4e2c7e76e 100644
--- a/src/yuzu/configuration/configure_system.ui
+++ b/src/yuzu/configuration/configure_system.ui
@@ -22,14 +22,14 @@
22 <string>System Settings</string> 22 <string>System Settings</string>
23 </property> 23 </property>
24 <layout class="QGridLayout" name="gridLayout"> 24 <layout class="QGridLayout" name="gridLayout">
25 <item row="1" column="0"> 25 <item row="2" column="0">
26 <widget class="QLabel" name="label_sound"> 26 <widget class="QLabel" name="label_sound">
27 <property name="text"> 27 <property name="text">
28 <string>Sound output mode</string> 28 <string>Sound output mode</string>
29 </property> 29 </property>
30 </widget> 30 </widget>
31 </item> 31 </item>
32 <item row="2" column="0"> 32 <item row="3" column="0">
33 <widget class="QLabel" name="label_console_id"> 33 <widget class="QLabel" name="label_console_id">
34 <property name="text"> 34 <property name="text">
35 <string>Console ID:</string> 35 <string>Console ID:</string>
@@ -128,14 +128,60 @@
128 </item> 128 </item>
129 </widget> 129 </widget>
130 </item> 130 </item>
131 <item row="4" column="0"> 131 <item row="1" column="0">
132 <widget class="QLabel" name="label_region">
133 <property name="text">
134 <string>Region:</string>
135 </property>
136 </widget>
137 </item>
138 <item row="1" column="1">
139 <widget class="QComboBox" name="combo_region">
140 <item>
141 <property name="text">
142 <string>Japan</string>
143 </property>
144 </item>
145 <item>
146 <property name="text">
147 <string>USA</string>
148 </property>
149 </item>
150 <item>
151 <property name="text">
152 <string>Europe</string>
153 </property>
154 </item>
155 <item>
156 <property name="text">
157 <string>Australia</string>
158 </property>
159 </item>
160 <item>
161 <property name="text">
162 <string>China</string>
163 </property>
164 </item>
165 <item>
166 <property name="text">
167 <string>Korea</string>
168 </property>
169 </item>
170 <item>
171 <property name="text">
172 <string>Taiwan</string>
173 </property>
174 </item>
175 </widget>
176 </item>
177 <item row="5" column="0">
132 <widget class="QCheckBox" name="rng_seed_checkbox"> 178 <widget class="QCheckBox" name="rng_seed_checkbox">
133 <property name="text"> 179 <property name="text">
134 <string>RNG Seed</string> 180 <string>RNG Seed</string>
135 </property> 181 </property>
136 </widget> 182 </widget>
137 </item> 183 </item>
138 <item row="1" column="1"> 184 <item row="2" column="1">
139 <widget class="QComboBox" name="combo_sound"> 185 <widget class="QComboBox" name="combo_sound">
140 <item> 186 <item>
141 <property name="text"> 187 <property name="text">
@@ -161,7 +207,7 @@
161 </property> 207 </property>
162 </widget> 208 </widget>
163 </item> 209 </item>
164 <item row="2" column="1"> 210 <item row="3" column="1">
165 <widget class="QPushButton" name="button_regenerate_console_id"> 211 <widget class="QPushButton" name="button_regenerate_console_id">
166 <property name="sizePolicy"> 212 <property name="sizePolicy">
167 <sizepolicy hsizetype="Fixed" vsizetype="Fixed"> 213 <sizepolicy hsizetype="Fixed" vsizetype="Fixed">
@@ -177,14 +223,14 @@
177 </property> 223 </property>
178 </widget> 224 </widget>
179 </item> 225 </item>
180 <item row="3" column="0"> 226 <item row="4" column="0">
181 <widget class="QCheckBox" name="custom_rtc_checkbox"> 227 <widget class="QCheckBox" name="custom_rtc_checkbox">
182 <property name="text"> 228 <property name="text">
183 <string>Custom RTC</string> 229 <string>Custom RTC</string>
184 </property> 230 </property>
185 </widget> 231 </widget>
186 </item> 232 </item>
187 <item row="3" column="1"> 233 <item row="4" column="1">
188 <widget class="QDateTimeEdit" name="custom_rtc_edit"> 234 <widget class="QDateTimeEdit" name="custom_rtc_edit">
189 <property name="minimumDate"> 235 <property name="minimumDate">
190 <date> 236 <date>
@@ -198,7 +244,7 @@
198 </property> 244 </property>
199 </widget> 245 </widget>
200 </item> 246 </item>
201 <item row="4" column="1"> 247 <item row="5" column="1">
202 <widget class="QLineEdit" name="rng_seed_edit"> 248 <widget class="QLineEdit" name="rng_seed_edit">
203 <property name="sizePolicy"> 249 <property name="sizePolicy">
204 <sizepolicy hsizetype="Minimum" vsizetype="Fixed"> 250 <sizepolicy hsizetype="Minimum" vsizetype="Fixed">
diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp
index 3f1a94627..c1ea25fb8 100644
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@@ -116,7 +116,7 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeCallstack::GetChildren() cons
116 116
117 constexpr std::size_t BaseRegister = 29; 117 constexpr std::size_t BaseRegister = 29;
118 auto& memory = Core::System::GetInstance().Memory(); 118 auto& memory = Core::System::GetInstance().Memory();
119 u64 base_pointer = thread.GetContext().cpu_registers[BaseRegister]; 119 u64 base_pointer = thread.GetContext64().cpu_registers[BaseRegister];
120 120
121 while (base_pointer != 0) { 121 while (base_pointer != 0) {
122 const u64 lr = memory.Read64(base_pointer + sizeof(u64)); 122 const u64 lr = memory.Read64(base_pointer + sizeof(u64));
@@ -240,7 +240,7 @@ QString WaitTreeThread::GetText() const {
240 break; 240 break;
241 } 241 }
242 242
243 const auto& context = thread.GetContext(); 243 const auto& context = thread.GetContext64();
244 const QString pc_info = tr(" PC = 0x%1 LR = 0x%2") 244 const QString pc_info = tr(" PC = 0x%1 LR = 0x%2")
245 .arg(context.pc, 8, 16, QLatin1Char{'0'}) 245 .arg(context.pc, 8, 16, QLatin1Char{'0'})
246 .arg(context.cpu_registers[30], 8, 16, QLatin1Char{'0'}); 246 .arg(context.cpu_registers[30], 8, 16, QLatin1Char{'0'});
diff --git a/src/yuzu/loading_screen.cpp b/src/yuzu/loading_screen.cpp
index 4f2bfab48..2a6483370 100644
--- a/src/yuzu/loading_screen.cpp
+++ b/src/yuzu/loading_screen.cpp
@@ -34,18 +34,6 @@ constexpr char PROGRESSBAR_STYLE_PREPARE[] = R"(
34QProgressBar {} 34QProgressBar {}
35QProgressBar::chunk {})"; 35QProgressBar::chunk {})";
36 36
37constexpr char PROGRESSBAR_STYLE_DECOMPILE[] = R"(
38QProgressBar {
39 background-color: black;
40 border: 2px solid white;
41 border-radius: 4px;
42 padding: 2px;
43}
44QProgressBar::chunk {
45 background-color: #0ab9e6;
46 width: 1px;
47})";
48
49constexpr char PROGRESSBAR_STYLE_BUILD[] = R"( 37constexpr char PROGRESSBAR_STYLE_BUILD[] = R"(
50QProgressBar { 38QProgressBar {
51 background-color: black; 39 background-color: black;
@@ -100,13 +88,11 @@ LoadingScreen::LoadingScreen(QWidget* parent)
100 88
101 stage_translations = { 89 stage_translations = {
102 {VideoCore::LoadCallbackStage::Prepare, tr("Loading...")}, 90 {VideoCore::LoadCallbackStage::Prepare, tr("Loading...")},
103 {VideoCore::LoadCallbackStage::Decompile, tr("Preparing Shaders %1 / %2")},
104 {VideoCore::LoadCallbackStage::Build, tr("Loading Shaders %1 / %2")}, 91 {VideoCore::LoadCallbackStage::Build, tr("Loading Shaders %1 / %2")},
105 {VideoCore::LoadCallbackStage::Complete, tr("Launching...")}, 92 {VideoCore::LoadCallbackStage::Complete, tr("Launching...")},
106 }; 93 };
107 progressbar_style = { 94 progressbar_style = {
108 {VideoCore::LoadCallbackStage::Prepare, PROGRESSBAR_STYLE_PREPARE}, 95 {VideoCore::LoadCallbackStage::Prepare, PROGRESSBAR_STYLE_PREPARE},
109 {VideoCore::LoadCallbackStage::Decompile, PROGRESSBAR_STYLE_DECOMPILE},
110 {VideoCore::LoadCallbackStage::Build, PROGRESSBAR_STYLE_BUILD}, 96 {VideoCore::LoadCallbackStage::Build, PROGRESSBAR_STYLE_BUILD},
111 {VideoCore::LoadCallbackStage::Complete, PROGRESSBAR_STYLE_COMPLETE}, 97 {VideoCore::LoadCallbackStage::Complete, PROGRESSBAR_STYLE_COMPLETE},
112 }; 98 };
@@ -192,8 +178,7 @@ void LoadingScreen::OnLoadProgress(VideoCore::LoadCallbackStage stage, std::size
192 } 178 }
193 179
194 // update labels and progress bar 180 // update labels and progress bar
195 if (stage == VideoCore::LoadCallbackStage::Decompile || 181 if (stage == VideoCore::LoadCallbackStage::Build) {
196 stage == VideoCore::LoadCallbackStage::Build) {
197 ui->stage->setText(stage_translations[stage].arg(value).arg(total)); 182 ui->stage->setText(stage_translations[stage].arg(value).arg(total));
198 } else { 183 } else {
199 ui->stage->setText(stage_translations[stage]); 184 ui->stage->setText(stage_translations[stage]);
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 1be61bd48..4769a612e 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -20,7 +20,6 @@
20#include "core/file_sys/vfs.h" 20#include "core/file_sys/vfs.h"
21#include "core/file_sys/vfs_real.h" 21#include "core/file_sys/vfs_real.h"
22#include "core/frontend/applets/general_frontend.h" 22#include "core/frontend/applets/general_frontend.h"
23#include "core/frontend/scope_acquire_window_context.h"
24#include "core/hle/service/acc/profile_manager.h" 23#include "core/hle/service/acc/profile_manager.h"
25#include "core/hle/service/am/applet_ae.h" 24#include "core/hle/service/am/applet_ae.h"
26#include "core/hle/service/am/applet_oe.h" 25#include "core/hle/service/am/applet_oe.h"
@@ -985,11 +984,8 @@ void GMainWindow::BootGame(const QString& filename) {
985 return; 984 return;
986 985
987 // Create and start the emulation thread 986 // Create and start the emulation thread
988 emu_thread = std::make_unique<EmuThread>(render_window); 987 emu_thread = std::make_unique<EmuThread>(*render_window);
989 emit EmulationStarting(emu_thread.get()); 988 emit EmulationStarting(emu_thread.get());
990 if (Settings::values.renderer_backend == Settings::RendererBackend::OpenGL) {
991 render_window->moveContext();
992 }
993 emu_thread->start(); 989 emu_thread->start();
994 990
995 connect(render_window, &GRenderWindow::Closed, this, &GMainWindow::OnStopGame); 991 connect(render_window, &GRenderWindow::Closed, this, &GMainWindow::OnStopGame);
@@ -1087,6 +1083,9 @@ void GMainWindow::ShutdownGame() {
1087 emulation_running = false; 1083 emulation_running = false;
1088 1084
1089 game_path.clear(); 1085 game_path.clear();
1086
1087 // When closing the game, destroy the GLWindow to clear the context after the game is closed
1088 render_window->ReleaseRenderTarget();
1090} 1089}
1091 1090
1092void GMainWindow::StoreRecentFile(const QString& filename) { 1091void GMainWindow::StoreRecentFile(const QString& filename) {
@@ -2210,48 +2209,47 @@ void GMainWindow::closeEvent(QCloseEvent* event) {
2210 QWidget::closeEvent(event); 2209 QWidget::closeEvent(event);
2211} 2210}
2212 2211
2213void GMainWindow::keyPressEvent(QKeyEvent* event) { 2212static bool IsSingleFileDropEvent(const QMimeData* mime) {
2214 if (render_window) { 2213 return mime->hasUrls() && mime->urls().length() == 1;
2215 render_window->ForwardKeyPressEvent(event);
2216 }
2217} 2214}
2218 2215
2219void GMainWindow::keyReleaseEvent(QKeyEvent* event) { 2216void GMainWindow::AcceptDropEvent(QDropEvent* event) {
2220 if (render_window) { 2217 if (IsSingleFileDropEvent(event->mimeData())) {
2221 render_window->ForwardKeyReleaseEvent(event); 2218 event->setDropAction(Qt::DropAction::LinkAction);
2219 event->accept();
2222 } 2220 }
2223} 2221}
2224 2222
2225static bool IsSingleFileDropEvent(QDropEvent* event) { 2223bool GMainWindow::DropAction(QDropEvent* event) {
2226 const QMimeData* mimeData = event->mimeData(); 2224 if (!IsSingleFileDropEvent(event->mimeData())) {
2227 return mimeData->hasUrls() && mimeData->urls().length() == 1; 2225 return false;
2228}
2229
2230void GMainWindow::dropEvent(QDropEvent* event) {
2231 if (!IsSingleFileDropEvent(event)) {
2232 return;
2233 } 2226 }
2234 2227
2235 const QMimeData* mime_data = event->mimeData(); 2228 const QMimeData* mime_data = event->mimeData();
2236 const QString filename = mime_data->urls().at(0).toLocalFile(); 2229 const QString& filename = mime_data->urls().at(0).toLocalFile();
2237 2230
2238 if (emulation_running && QFileInfo(filename).suffix() == QStringLiteral("bin")) { 2231 if (emulation_running && QFileInfo(filename).suffix() == QStringLiteral("bin")) {
2232 // Amiibo
2239 LoadAmiibo(filename); 2233 LoadAmiibo(filename);
2240 } else { 2234 } else {
2235 // Game
2241 if (ConfirmChangeGame()) { 2236 if (ConfirmChangeGame()) {
2242 BootGame(filename); 2237 BootGame(filename);
2243 } 2238 }
2244 } 2239 }
2240 return true;
2241}
2242
2243void GMainWindow::dropEvent(QDropEvent* event) {
2244 DropAction(event);
2245} 2245}
2246 2246
2247void GMainWindow::dragEnterEvent(QDragEnterEvent* event) { 2247void GMainWindow::dragEnterEvent(QDragEnterEvent* event) {
2248 if (IsSingleFileDropEvent(event)) { 2248 AcceptDropEvent(event);
2249 event->acceptProposedAction();
2250 }
2251} 2249}
2252 2250
2253void GMainWindow::dragMoveEvent(QDragMoveEvent* event) { 2251void GMainWindow::dragMoveEvent(QDragMoveEvent* event) {
2254 event->acceptProposedAction(); 2252 AcceptDropEvent(event);
2255} 2253}
2256 2254
2257bool GMainWindow::ConfirmChangeGame() { 2255bool GMainWindow::ConfirmChangeGame() {
@@ -2372,6 +2370,7 @@ int main(int argc, char* argv[]) {
2372 2370
2373 // Enables the core to make the qt created contexts current on std::threads 2371 // Enables the core to make the qt created contexts current on std::threads
2374 QCoreApplication::setAttribute(Qt::AA_DontCheckOpenGLContextThreadAffinity); 2372 QCoreApplication::setAttribute(Qt::AA_DontCheckOpenGLContextThreadAffinity);
2373 QCoreApplication::setAttribute(Qt::AA_ShareOpenGLContexts);
2375 QApplication app(argc, argv); 2374 QApplication app(argc, argv);
2376 2375
2377 // Qt changes the locale and causes issues in float conversion using std::to_string() when 2376 // Qt changes the locale and causes issues in float conversion using std::to_string() when
diff --git a/src/yuzu/main.h b/src/yuzu/main.h
index 8eba2172c..a67125567 100644
--- a/src/yuzu/main.h
+++ b/src/yuzu/main.h
@@ -78,6 +78,9 @@ public:
78 78
79 std::unique_ptr<DiscordRPC::DiscordInterface> discord_rpc; 79 std::unique_ptr<DiscordRPC::DiscordInterface> discord_rpc;
80 80
81 bool DropAction(QDropEvent* event);
82 void AcceptDropEvent(QDropEvent* event);
83
81signals: 84signals:
82 85
83 /** 86 /**
@@ -264,8 +267,4 @@ protected:
264 void dropEvent(QDropEvent* event) override; 267 void dropEvent(QDropEvent* event) override;
265 void dragEnterEvent(QDragEnterEvent* event) override; 268 void dragEnterEvent(QDragEnterEvent* event) override;
266 void dragMoveEvent(QDragMoveEvent* event) override; 269 void dragMoveEvent(QDragMoveEvent* event) override;
267
268 // Overrides used to forward signals to the render window when the focus moves out.
269 void keyPressEvent(QKeyEvent* event) override;
270 void keyReleaseEvent(QKeyEvent* event) override;
271}; 270};
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 96f1ce3af..f4cd905c9 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -381,6 +381,8 @@ void Config::ReadValues() {
381 static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0)); 381 static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0));
382 Settings::values.aspect_ratio = 382 Settings::values.aspect_ratio =
383 static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0)); 383 static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0));
384 Settings::values.max_anisotropy =
385 static_cast<int>(sdl2_config->GetInteger("Renderer", "max_anisotropy", 0));
384 Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true); 386 Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true);
385 Settings::values.frame_limit = 387 Settings::values.frame_limit =
386 static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100)); 388 static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100));
@@ -390,6 +392,8 @@ void Config::ReadValues() {
390 sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false); 392 sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false);
391 Settings::values.use_asynchronous_gpu_emulation = 393 Settings::values.use_asynchronous_gpu_emulation =
392 sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false); 394 sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false);
395 Settings::values.use_vsync =
396 static_cast<u16>(sdl2_config->GetInteger("Renderer", "use_vsync", 1));
393 397
394 Settings::values.bg_red = static_cast<float>(sdl2_config->GetReal("Renderer", "bg_red", 0.0)); 398 Settings::values.bg_red = static_cast<float>(sdl2_config->GetReal("Renderer", "bg_red", 0.0));
395 Settings::values.bg_green = 399 Settings::values.bg_green =
@@ -448,7 +452,7 @@ void Config::ReadValues() {
448 Settings::values.yuzu_token = sdl2_config->Get("WebService", "yuzu_token", ""); 452 Settings::values.yuzu_token = sdl2_config->Get("WebService", "yuzu_token", "");
449 453
450 // Services 454 // Services
451 Settings::values.bcat_backend = sdl2_config->Get("Services", "bcat_backend", "boxcat"); 455 Settings::values.bcat_backend = sdl2_config->Get("Services", "bcat_backend", "null");
452 Settings::values.bcat_boxcat_local = 456 Settings::values.bcat_boxcat_local =
453 sdl2_config->GetBoolean("Services", "bcat_boxcat_local", false); 457 sdl2_config->GetBoolean("Services", "bcat_boxcat_local", false);
454} 458}
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index 8a2b658cd..d63d7a58e 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -84,7 +84,7 @@ touch_device=
84# from any cemuhook compatible motion program. 84# from any cemuhook compatible motion program.
85 85
86# IPv4 address of the udp input server (Default "127.0.0.1") 86# IPv4 address of the udp input server (Default "127.0.0.1")
87udp_input_address= 87udp_input_address=127.0.0.1
88 88
89# Port of the udp input server. (Default 26760) 89# Port of the udp input server. (Default 26760)
90udp_input_port= 90udp_input_port=
@@ -126,6 +126,10 @@ resolution_factor =
126# 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window 126# 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window
127aspect_ratio = 127aspect_ratio =
128 128
129# Anisotropic filtering
130# 0: Default, 1: 2x, 2: 4x, 3: 8x, 4: 16x
131max_anisotropy =
132
129# Whether to enable V-Sync (caps the framerate at 60FPS) or not. 133# Whether to enable V-Sync (caps the framerate at 60FPS) or not.
130# 0 (default): Off, 1: On 134# 0 (default): Off, 1: On
131use_vsync = 135use_vsync =
@@ -150,6 +154,11 @@ use_accurate_gpu_emulation =
150# 0 : Off (slow), 1 (default): On (fast) 154# 0 : Off (slow), 1 (default): On (fast)
151use_asynchronous_gpu_emulation = 155use_asynchronous_gpu_emulation =
152 156
157# Forces VSync on the display thread. Usually doesn't impact performance, but on some drivers it can
158# so only turn this off if you notice a speed difference.
159# 0: Off, 1 (default): On
160use_vsync =
161
153# The clear color for the renderer. What shows up on the sides of the bottom screen. 162# The clear color for the renderer. What shows up on the sides of the bottom screen.
154# Must be in range of 0.0-1.0. Defaults to 1.0 for all. 163# Must be in range of 0.0-1.0. Defaults to 1.0 for all.
155bg_red = 164bg_red =
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
index e96139885..19584360c 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp
@@ -13,7 +13,7 @@
13#include "input_common/sdl/sdl.h" 13#include "input_common/sdl/sdl.h"
14#include "yuzu_cmd/emu_window/emu_window_sdl2.h" 14#include "yuzu_cmd/emu_window/emu_window_sdl2.h"
15 15
16EmuWindow_SDL2::EmuWindow_SDL2(bool fullscreen) { 16EmuWindow_SDL2::EmuWindow_SDL2(Core::System& system, bool fullscreen) : system{system} {
17 if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_JOYSTICK) < 0) { 17 if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_JOYSTICK) < 0) {
18 LOG_CRITICAL(Frontend, "Failed to initialize SDL2! Exiting..."); 18 LOG_CRITICAL(Frontend, "Failed to initialize SDL2! Exiting...");
19 exit(1); 19 exit(1);
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.h b/src/yuzu_cmd/emu_window/emu_window_sdl2.h
index b38f56661..fffac4252 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2.h
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.h
@@ -10,9 +10,13 @@
10 10
11struct SDL_Window; 11struct SDL_Window;
12 12
13namespace Core {
14class System;
15}
16
13class EmuWindow_SDL2 : public Core::Frontend::EmuWindow { 17class EmuWindow_SDL2 : public Core::Frontend::EmuWindow {
14public: 18public:
15 explicit EmuWindow_SDL2(bool fullscreen); 19 explicit EmuWindow_SDL2(Core::System& system, bool fullscreen);
16 ~EmuWindow_SDL2(); 20 ~EmuWindow_SDL2();
17 21
18 /// Polls window events 22 /// Polls window events
@@ -24,6 +28,9 @@ public:
24 /// Returns if window is shown (not minimized) 28 /// Returns if window is shown (not minimized)
25 bool IsShown() const override; 29 bool IsShown() const override;
26 30
31 /// Presents the next frame
32 virtual void Present() = 0;
33
27protected: 34protected:
28 /// Called by PollEvents when a key is pressed or released. 35 /// Called by PollEvents when a key is pressed or released.
29 void OnKeyEvent(int key, u8 state); 36 void OnKeyEvent(int key, u8 state);
@@ -55,6 +62,9 @@ protected:
55 /// Called when a configuration change affects the minimal size of the window 62 /// Called when a configuration change affects the minimal size of the window
56 void OnMinimalClientAreaChangeRequest(std::pair<unsigned, unsigned> minimal_size) override; 63 void OnMinimalClientAreaChangeRequest(std::pair<unsigned, unsigned> minimal_size) override;
57 64
65 /// Instance of the system, used to access renderer for the presentation thread
66 Core::System& system;
67
58 /// Is the window still open? 68 /// Is the window still open?
59 bool is_open = true; 69 bool is_open = true;
60 70
@@ -62,7 +72,7 @@ protected:
62 bool is_shown = true; 72 bool is_shown = true;
63 73
64 /// Internal SDL2 render window 74 /// Internal SDL2 render window
65 SDL_Window* render_window; 75 SDL_Window* render_window{};
66 76
67 /// Keeps track of how often to update the title bar during gameplay 77 /// Keeps track of how often to update the title bar during gameplay
68 u32 last_time = 0; 78 u32 last_time = 0;
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
index 7ffa0ac09..c0d373477 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
@@ -13,24 +13,25 @@
13#include "common/logging/log.h" 13#include "common/logging/log.h"
14#include "common/scm_rev.h" 14#include "common/scm_rev.h"
15#include "common/string_util.h" 15#include "common/string_util.h"
16#include "core/core.h"
16#include "core/settings.h" 17#include "core/settings.h"
17#include "input_common/keyboard.h" 18#include "input_common/keyboard.h"
18#include "input_common/main.h" 19#include "input_common/main.h"
19#include "input_common/motion_emu.h" 20#include "input_common/motion_emu.h"
21#include "video_core/renderer_base.h"
20#include "yuzu_cmd/emu_window/emu_window_sdl2_gl.h" 22#include "yuzu_cmd/emu_window/emu_window_sdl2_gl.h"
21 23
22class SDLGLContext : public Core::Frontend::GraphicsContext { 24class SDLGLContext : public Core::Frontend::GraphicsContext {
23public: 25public:
24 explicit SDLGLContext() { 26 explicit SDLGLContext() {
25 // create a hidden window to make the shared context against 27 // create a hidden window to make the shared context against
26 window = SDL_CreateWindow("", SDL_WINDOWPOS_UNDEFINED, // x position 28 window = SDL_CreateWindow(NULL, SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, 0, 0,
27 SDL_WINDOWPOS_UNDEFINED, // y position 29 SDL_WINDOW_HIDDEN | SDL_WINDOW_OPENGL);
28 Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height,
29 SDL_WINDOW_OPENGL | SDL_WINDOW_HIDDEN);
30 context = SDL_GL_CreateContext(window); 30 context = SDL_GL_CreateContext(window);
31 } 31 }
32 32
33 ~SDLGLContext() { 33 ~SDLGLContext() {
34 DoneCurrent();
34 SDL_GL_DeleteContext(context); 35 SDL_GL_DeleteContext(context);
35 SDL_DestroyWindow(window); 36 SDL_DestroyWindow(window);
36 } 37 }
@@ -43,8 +44,6 @@ public:
43 SDL_GL_MakeCurrent(window, nullptr); 44 SDL_GL_MakeCurrent(window, nullptr);
44 } 45 }
45 46
46 void SwapBuffers() override {}
47
48private: 47private:
49 SDL_Window* window; 48 SDL_Window* window;
50 SDL_GLContext context; 49 SDL_GLContext context;
@@ -80,7 +79,8 @@ bool EmuWindow_SDL2_GL::SupportsRequiredGLExtensions() {
80 return unsupported_ext.empty(); 79 return unsupported_ext.empty();
81} 80}
82 81
83EmuWindow_SDL2_GL::EmuWindow_SDL2_GL(bool fullscreen) : EmuWindow_SDL2(fullscreen) { 82EmuWindow_SDL2_GL::EmuWindow_SDL2_GL(Core::System& system, bool fullscreen)
83 : EmuWindow_SDL2{system, fullscreen} {
84 SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 4); 84 SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 4);
85 SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 3); 85 SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 3);
86 SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_COMPATIBILITY); 86 SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_COMPATIBILITY);
@@ -90,6 +90,7 @@ EmuWindow_SDL2_GL::EmuWindow_SDL2_GL(bool fullscreen) : EmuWindow_SDL2(fullscree
90 SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 8); 90 SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 8);
91 SDL_GL_SetAttribute(SDL_GL_ALPHA_SIZE, 0); 91 SDL_GL_SetAttribute(SDL_GL_ALPHA_SIZE, 0);
92 SDL_GL_SetAttribute(SDL_GL_SHARE_WITH_CURRENT_CONTEXT, 1); 92 SDL_GL_SetAttribute(SDL_GL_SHARE_WITH_CURRENT_CONTEXT, 1);
93 SDL_GL_SetSwapInterval(0);
93 94
94 std::string window_title = fmt::format("yuzu {} | {}-{}", Common::g_build_fullname, 95 std::string window_title = fmt::format("yuzu {} | {}-{}", Common::g_build_fullname,
95 Common::g_scm_branch, Common::g_scm_desc); 96 Common::g_scm_branch, Common::g_scm_desc);
@@ -105,13 +106,22 @@ EmuWindow_SDL2_GL::EmuWindow_SDL2_GL(bool fullscreen) : EmuWindow_SDL2(fullscree
105 exit(1); 106 exit(1);
106 } 107 }
107 108
109 dummy_window = SDL_CreateWindow(NULL, SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, 0, 0,
110 SDL_WINDOW_HIDDEN | SDL_WINDOW_OPENGL);
111
108 if (fullscreen) { 112 if (fullscreen) {
109 Fullscreen(); 113 Fullscreen();
110 } 114 }
111 gl_context = SDL_GL_CreateContext(render_window);
112 115
113 if (gl_context == nullptr) { 116 window_context = SDL_GL_CreateContext(render_window);
114 LOG_CRITICAL(Frontend, "Failed to create SDL2 GL context! {}", SDL_GetError()); 117 core_context = CreateSharedContext();
118
119 if (window_context == nullptr) {
120 LOG_CRITICAL(Frontend, "Failed to create SDL2 GL context: {}", SDL_GetError());
121 exit(1);
122 }
123 if (core_context == nullptr) {
124 LOG_CRITICAL(Frontend, "Failed to create shared SDL2 GL context: {}", SDL_GetError());
115 exit(1); 125 exit(1);
116 } 126 }
117 127
@@ -128,28 +138,22 @@ EmuWindow_SDL2_GL::EmuWindow_SDL2_GL(bool fullscreen) : EmuWindow_SDL2(fullscree
128 OnResize(); 138 OnResize();
129 OnMinimalClientAreaChangeRequest(GetActiveConfig().min_client_area_size); 139 OnMinimalClientAreaChangeRequest(GetActiveConfig().min_client_area_size);
130 SDL_PumpEvents(); 140 SDL_PumpEvents();
131 SDL_GL_SetSwapInterval(false);
132 LOG_INFO(Frontend, "yuzu Version: {} | {}-{}", Common::g_build_fullname, Common::g_scm_branch, 141 LOG_INFO(Frontend, "yuzu Version: {} | {}-{}", Common::g_build_fullname, Common::g_scm_branch,
133 Common::g_scm_desc); 142 Common::g_scm_desc);
134 Settings::LogSettings(); 143 Settings::LogSettings();
135
136 DoneCurrent();
137} 144}
138 145
139EmuWindow_SDL2_GL::~EmuWindow_SDL2_GL() { 146EmuWindow_SDL2_GL::~EmuWindow_SDL2_GL() {
140 SDL_GL_DeleteContext(gl_context); 147 core_context.reset();
141} 148 SDL_GL_DeleteContext(window_context);
142
143void EmuWindow_SDL2_GL::SwapBuffers() {
144 SDL_GL_SwapWindow(render_window);
145} 149}
146 150
147void EmuWindow_SDL2_GL::MakeCurrent() { 151void EmuWindow_SDL2_GL::MakeCurrent() {
148 SDL_GL_MakeCurrent(render_window, gl_context); 152 core_context->MakeCurrent();
149} 153}
150 154
151void EmuWindow_SDL2_GL::DoneCurrent() { 155void EmuWindow_SDL2_GL::DoneCurrent() {
152 SDL_GL_MakeCurrent(render_window, nullptr); 156 core_context->DoneCurrent();
153} 157}
154 158
155void EmuWindow_SDL2_GL::RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, 159void EmuWindow_SDL2_GL::RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance,
@@ -161,3 +165,13 @@ void EmuWindow_SDL2_GL::RetrieveVulkanHandlers(void* get_instance_proc_addr, voi
161std::unique_ptr<Core::Frontend::GraphicsContext> EmuWindow_SDL2_GL::CreateSharedContext() const { 165std::unique_ptr<Core::Frontend::GraphicsContext> EmuWindow_SDL2_GL::CreateSharedContext() const {
162 return std::make_unique<SDLGLContext>(); 166 return std::make_unique<SDLGLContext>();
163} 167}
168
169void EmuWindow_SDL2_GL::Present() {
170 SDL_GL_MakeCurrent(render_window, window_context);
171 SDL_GL_SetSwapInterval(Settings::values.use_vsync ? 1 : 0);
172 while (IsOpen()) {
173 system.Renderer().TryPresent(100);
174 SDL_GL_SwapWindow(render_window);
175 }
176 SDL_GL_MakeCurrent(render_window, nullptr);
177}
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h
index c753085a8..b80669ff0 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h
@@ -10,17 +10,12 @@
10 10
11class EmuWindow_SDL2_GL final : public EmuWindow_SDL2 { 11class EmuWindow_SDL2_GL final : public EmuWindow_SDL2 {
12public: 12public:
13 explicit EmuWindow_SDL2_GL(bool fullscreen); 13 explicit EmuWindow_SDL2_GL(Core::System& system, bool fullscreen);
14 ~EmuWindow_SDL2_GL(); 14 ~EmuWindow_SDL2_GL();
15 15
16 /// Swap buffers to display the next frame
17 void SwapBuffers() override;
18
19 /// Makes the graphics context current for the caller thread
20 void MakeCurrent() override; 16 void MakeCurrent() override;
21
22 /// Releases the GL context from the caller thread
23 void DoneCurrent() override; 17 void DoneCurrent() override;
18 void Present() override;
24 19
25 /// Ignored in OpenGL 20 /// Ignored in OpenGL
26 void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, 21 void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance,
@@ -29,10 +24,17 @@ public:
29 std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; 24 std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override;
30 25
31private: 26private:
27 /// Fake hidden window for the core context
28 SDL_Window* dummy_window{};
29
32 /// Whether the GPU and driver supports the OpenGL extension required 30 /// Whether the GPU and driver supports the OpenGL extension required
33 bool SupportsRequiredGLExtensions(); 31 bool SupportsRequiredGLExtensions();
34 32
35 using SDL_GLContext = void*; 33 using SDL_GLContext = void*;
34
36 /// The OpenGL context associated with the window 35 /// The OpenGL context associated with the window
37 SDL_GLContext gl_context; 36 SDL_GLContext window_context;
37
38 /// The OpenGL context associated with the core
39 std::unique_ptr<Core::Frontend::GraphicsContext> core_context;
38}; 40};
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp
index a203f0da9..abcc58165 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp
@@ -15,7 +15,8 @@
15#include "core/settings.h" 15#include "core/settings.h"
16#include "yuzu_cmd/emu_window/emu_window_sdl2_vk.h" 16#include "yuzu_cmd/emu_window/emu_window_sdl2_vk.h"
17 17
18EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(bool fullscreen) : EmuWindow_SDL2(fullscreen) { 18EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(Core::System& system, bool fullscreen)
19 : EmuWindow_SDL2{system, fullscreen} {
19 if (SDL_Vulkan_LoadLibrary(nullptr) != 0) { 20 if (SDL_Vulkan_LoadLibrary(nullptr) != 0) {
20 LOG_CRITICAL(Frontend, "SDL failed to load the Vulkan library: {}", SDL_GetError()); 21 LOG_CRITICAL(Frontend, "SDL failed to load the Vulkan library: {}", SDL_GetError());
21 exit(EXIT_FAILURE); 22 exit(EXIT_FAILURE);
@@ -110,8 +111,6 @@ EmuWindow_SDL2_VK::~EmuWindow_SDL2_VK() {
110 vkDestroyInstance(vk_instance, nullptr); 111 vkDestroyInstance(vk_instance, nullptr);
111} 112}
112 113
113void EmuWindow_SDL2_VK::SwapBuffers() {}
114
115void EmuWindow_SDL2_VK::MakeCurrent() { 114void EmuWindow_SDL2_VK::MakeCurrent() {
116 // Unused on Vulkan 115 // Unused on Vulkan
117} 116}
@@ -160,3 +159,7 @@ bool EmuWindow_SDL2_VK::UseStandardLayers(PFN_vkGetInstanceProcAddr vkGetInstanc
160 return layer.layerName == std::string("VK_LAYER_LUNARG_standard_validation"); 159 return layer.layerName == std::string("VK_LAYER_LUNARG_standard_validation");
161 }) != layers.end(); 160 }) != layers.end();
162} 161}
162
163void EmuWindow_SDL2_VK::Present() {
164 // TODO (bunnei): ImplementMe
165}
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h
index 2a7c06a24..1eb8c0868 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h
@@ -10,19 +10,12 @@
10 10
11class EmuWindow_SDL2_VK final : public EmuWindow_SDL2 { 11class EmuWindow_SDL2_VK final : public EmuWindow_SDL2 {
12public: 12public:
13 explicit EmuWindow_SDL2_VK(bool fullscreen); 13 explicit EmuWindow_SDL2_VK(Core::System& system, bool fullscreen);
14 ~EmuWindow_SDL2_VK(); 14 ~EmuWindow_SDL2_VK();
15 15
16 /// Swap buffers to display the next frame
17 void SwapBuffers() override;
18
19 /// Makes the graphics context current for the caller thread
20 void MakeCurrent() override; 16 void MakeCurrent() override;
21
22 /// Releases the GL context from the caller thread
23 void DoneCurrent() override; 17 void DoneCurrent() override;
24 18 void Present() override;
25 /// Retrieves Vulkan specific handlers from the window
26 void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, 19 void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance,
27 void* surface) const override; 20 void* surface) const override;
28 21
diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp
index 325795321..babf4c3a4 100644
--- a/src/yuzu_cmd/yuzu.cpp
+++ b/src/yuzu_cmd/yuzu.cpp
@@ -177,14 +177,16 @@ int main(int argc, char** argv) {
177 Settings::values.use_gdbstub = use_gdbstub; 177 Settings::values.use_gdbstub = use_gdbstub;
178 Settings::Apply(); 178 Settings::Apply();
179 179
180 Core::System& system{Core::System::GetInstance()};
181
180 std::unique_ptr<EmuWindow_SDL2> emu_window; 182 std::unique_ptr<EmuWindow_SDL2> emu_window;
181 switch (Settings::values.renderer_backend) { 183 switch (Settings::values.renderer_backend) {
182 case Settings::RendererBackend::OpenGL: 184 case Settings::RendererBackend::OpenGL:
183 emu_window = std::make_unique<EmuWindow_SDL2_GL>(fullscreen); 185 emu_window = std::make_unique<EmuWindow_SDL2_GL>(system, fullscreen);
184 break; 186 break;
185 case Settings::RendererBackend::Vulkan: 187 case Settings::RendererBackend::Vulkan:
186#ifdef HAS_VULKAN 188#ifdef HAS_VULKAN
187 emu_window = std::make_unique<EmuWindow_SDL2_VK>(fullscreen); 189 emu_window = std::make_unique<EmuWindow_SDL2_VK>(system, fullscreen);
188 break; 190 break;
189#else 191#else
190 LOG_CRITICAL(Frontend, "Vulkan backend has not been compiled!"); 192 LOG_CRITICAL(Frontend, "Vulkan backend has not been compiled!");
@@ -192,12 +194,6 @@ int main(int argc, char** argv) {
192#endif 194#endif
193 } 195 }
194 196
195 if (!Settings::values.use_multi_core) {
196 // Single core mode must acquire OpenGL context for entire emulation session
197 emu_window->MakeCurrent();
198 }
199
200 Core::System& system{Core::System::GetInstance()};
201 system.SetContentProvider(std::make_unique<FileSys::ContentProviderUnion>()); 197 system.SetContentProvider(std::make_unique<FileSys::ContentProviderUnion>());
202 system.SetFilesystem(std::make_shared<FileSys::RealVfsFilesystem>()); 198 system.SetFilesystem(std::make_shared<FileSys::RealVfsFilesystem>());
203 system.GetFileSystemController().CreateFactories(*system.GetFilesystem()); 199 system.GetFileSystemController().CreateFactories(*system.GetFilesystem());
@@ -234,12 +230,23 @@ int main(int argc, char** argv) {
234 230
235 system.TelemetrySession().AddField(Telemetry::FieldType::App, "Frontend", "SDL"); 231 system.TelemetrySession().AddField(Telemetry::FieldType::App, "Frontend", "SDL");
236 232
237 emu_window->MakeCurrent();
238 system.Renderer().Rasterizer().LoadDiskResources(); 233 system.Renderer().Rasterizer().LoadDiskResources();
239 234
235 // Acquire render context for duration of the thread if this is the rendering thread
236 if (!Settings::values.use_asynchronous_gpu_emulation) {
237 emu_window->MakeCurrent();
238 }
239 SCOPE_EXIT({
240 if (!Settings::values.use_asynchronous_gpu_emulation) {
241 emu_window->DoneCurrent();
242 }
243 });
244
245 std::thread render_thread([&emu_window] { emu_window->Present(); });
240 while (emu_window->IsOpen()) { 246 while (emu_window->IsOpen()) {
241 system.RunLoop(); 247 system.RunLoop();
242 } 248 }
249 render_thread.join();
243 250
244 system.Shutdown(); 251 system.Shutdown();
245 252
diff --git a/src/yuzu_tester/config.cpp b/src/yuzu_tester/config.cpp
index 0ac93b62a..ee2591c8f 100644
--- a/src/yuzu_tester/config.cpp
+++ b/src/yuzu_tester/config.cpp
@@ -120,6 +120,8 @@ void Config::ReadValues() {
120 static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0)); 120 static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0));
121 Settings::values.aspect_ratio = 121 Settings::values.aspect_ratio =
122 static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0)); 122 static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0));
123 Settings::values.max_anisotropy =
124 static_cast<int>(sdl2_config->GetInteger("Renderer", "max_anisotropy", 0));
123 Settings::values.use_frame_limit = false; 125 Settings::values.use_frame_limit = false;
124 Settings::values.frame_limit = 100; 126 Settings::values.frame_limit = 100;
125 Settings::values.use_disk_shader_cache = 127 Settings::values.use_disk_shader_cache =
diff --git a/src/yuzu_tester/default_ini.h b/src/yuzu_tester/default_ini.h
index 8d93f7b88..ca203b64d 100644
--- a/src/yuzu_tester/default_ini.h
+++ b/src/yuzu_tester/default_ini.h
@@ -30,6 +30,10 @@ resolution_factor =
30# 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window 30# 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window
31aspect_ratio = 31aspect_ratio =
32 32
33# Anisotropic filtering
34# 0: Default, 1: 2x, 2: 4x, 3: 8x, 4: 16x
35max_anisotropy =
36
33# Whether to enable V-Sync (caps the framerate at 60FPS) or not. 37# Whether to enable V-Sync (caps the framerate at 60FPS) or not.
34# 0 (default): Off, 1: On 38# 0 (default): Off, 1: On
35use_vsync = 39use_vsync =
diff --git a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp
index f2cc4a797..a1bdb1a12 100644
--- a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp
+++ b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp
@@ -112,10 +112,6 @@ EmuWindow_SDL2_Hide::~EmuWindow_SDL2_Hide() {
112 SDL_Quit(); 112 SDL_Quit();
113} 113}
114 114
115void EmuWindow_SDL2_Hide::SwapBuffers() {
116 SDL_GL_SwapWindow(render_window);
117}
118
119void EmuWindow_SDL2_Hide::PollEvents() {} 115void EmuWindow_SDL2_Hide::PollEvents() {}
120 116
121void EmuWindow_SDL2_Hide::MakeCurrent() { 117void EmuWindow_SDL2_Hide::MakeCurrent() {
diff --git a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h
index c7fccc002..b13e15309 100644
--- a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h
+++ b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h
@@ -13,9 +13,6 @@ public:
13 explicit EmuWindow_SDL2_Hide(); 13 explicit EmuWindow_SDL2_Hide();
14 ~EmuWindow_SDL2_Hide(); 14 ~EmuWindow_SDL2_Hide();
15 15
16 /// Swap buffers to display the next frame
17 void SwapBuffers() override;
18
19 /// Polls window events 16 /// Polls window events
20 void PollEvents() override; 17 void PollEvents() override;
21 18