summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/audio_core/audio_renderer.cpp14
-rw-r--r--src/audio_core/audio_renderer.h3
-rw-r--r--src/common/CMakeLists.txt2
-rw-r--r--src/core/CMakeLists.txt3
-rw-r--r--src/core/arm/arm_interface.h7
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic.cpp9
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic.h3
-rw-r--r--src/core/arm/unicorn/arm_unicorn.cpp18
-rw-r--r--src/core/arm/unicorn/arm_unicorn.h3
-rw-r--r--src/core/core.cpp14
-rw-r--r--src/core/core.h12
-rw-r--r--src/core/core_cpu.cpp19
-rw-r--r--src/core/file_sys/program_metadata.cpp4
-rw-r--r--src/core/file_sys/program_metadata.h4
-rw-r--r--src/core/hle/kernel/process.cpp45
-rw-r--r--src/core/hle/kernel/process.h45
-rw-r--r--src/core/hle/kernel/svc.cpp138
-rw-r--r--src/core/hle/kernel/svc_wrap.h5
-rw-r--r--src/core/hle/kernel/vm_manager.cpp310
-rw-r--r--src/core/hle/kernel/vm_manager.h48
-rw-r--r--src/core/hle/service/am/am.cpp58
-rw-r--r--src/core/hle/service/am/am.h8
-rw-r--r--src/core/hle/service/am/applet_ae.cpp4
-rw-r--r--src/core/hle/service/am/applet_oe.cpp2
-rw-r--r--src/core/hle/service/apm/apm.cpp13
-rw-r--r--src/core/hle/service/apm/apm.h7
-rw-r--r--src/core/hle/service/apm/controller.cpp68
-rw-r--r--src/core/hle/service/apm/controller.h70
-rw-r--r--src/core/hle/service/apm/interface.cpp82
-rw-r--r--src/core/hle/service/apm/interface.h14
-rw-r--r--src/core/hle/service/audio/audren_u.cpp9
-rw-r--r--src/core/hle/service/audio/audren_u.h1
-rw-r--r--src/core/hle/service/filesystem/filesystem.cpp10
-rw-r--r--src/core/hle/service/filesystem/filesystem.h2
-rw-r--r--src/core/hle/service/filesystem/fsp_srv.cpp54
-rw-r--r--src/core/hle/service/filesystem/fsp_srv.h26
-rw-r--r--src/core/hle/service/friend/friend.cpp35
-rw-r--r--src/core/hle/service/hid/controllers/npad.cpp31
-rw-r--r--src/core/hle/service/hid/controllers/npad.h5
-rw-r--r--src/core/hle/service/hid/errors.h13
-rw-r--r--src/core/hle/service/hid/hid.cpp50
-rw-r--r--src/core/hle/service/hid/hid.h3
-rw-r--r--src/core/hle/service/ldr/ldr.cpp32
-rw-r--r--src/core/hle/service/mii/mii.cpp16
-rw-r--r--src/core/hle/service/pm/pm.cpp124
-rw-r--r--src/core/hle/service/pm/pm.h6
-rw-r--r--src/core/hle/service/service.cpp9
-rw-r--r--src/core/hle/service/service.h3
-rw-r--r--src/core/reporter.cpp18
-rw-r--r--src/core/reporter.h7
-rw-r--r--src/core/settings.cpp1
-rw-r--r--src/core/settings.h1
-rw-r--r--src/core/telemetry_session.cpp1
-rw-r--r--src/video_core/CMakeLists.txt5
-rw-r--r--src/video_core/buffer_cache.h299
-rw-r--r--src/video_core/dma_pusher.cpp2
-rw-r--r--src/video_core/engines/kepler_compute.cpp9
-rw-r--r--src/video_core/engines/kepler_memory.cpp2
-rw-r--r--src/video_core/engines/maxwell_3d.cpp305
-rw-r--r--src/video_core/engines/maxwell_3d.h135
-rw-r--r--src/video_core/engines/maxwell_dma.cpp4
-rw-r--r--src/video_core/engines/shader_bytecode.h47
-rw-r--r--src/video_core/gpu.cpp30
-rw-r--r--src/video_core/gpu.h15
-rw-r--r--src/video_core/macro_interpreter.cpp4
-rw-r--r--src/video_core/memory_manager.cpp24
-rw-r--r--src/video_core/memory_manager.h8
-rw-r--r--src/video_core/rasterizer_interface.h6
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp110
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h76
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_device.h10
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.cpp102
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.h82
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp428
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h50
-rw-r--r--src/video_core/renderer_opengl/gl_sampler_cache.h4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp179
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h12
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp206
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h23
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp12
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h33
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp45
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h5
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.cpp24
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp45
-rw-r--r--src/video_core/renderer_opengl/gl_state.h33
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp10
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp11
-rw-r--r--src/video_core/renderer_opengl/utils.cpp48
-rw-r--r--src/video_core/renderer_opengl/utils.h41
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_sampler_cache.h7
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp16
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h78
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp40
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.cpp8
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.h2
-rw-r--r--src/video_core/shader/control_flow.cpp476
-rw-r--r--src/video_core/shader/control_flow.h63
-rw-r--r--src/video_core/shader/decode.cpp177
-rw-r--r--src/video_core/shader/decode/arithmetic.cpp13
-rw-r--r--src/video_core/shader/decode/arithmetic_half_immediate.cpp4
-rw-r--r--src/video_core/shader/decode/decode_integer_set.cpp0
-rw-r--r--src/video_core/shader/decode/ffma.cpp10
-rw-r--r--src/video_core/shader/decode/half_set_predicate.cpp71
-rw-r--r--src/video_core/shader/decode/hfma2.cpp4
-rw-r--r--src/video_core/shader/decode/image.cpp6
-rw-r--r--src/video_core/shader/decode/memory.cpp37
-rw-r--r--src/video_core/shader/decode/other.cpp58
-rw-r--r--src/video_core/shader/decode/texture.cpp42
-rw-r--r--src/video_core/shader/decode/xmad.cpp12
-rw-r--r--src/video_core/shader/node.h14
-rw-r--r--src/video_core/shader/node_helper.cpp2
-rw-r--r--src/video_core/shader/shader_ir.cpp138
-rw-r--r--src/video_core/shader/shader_ir.h55
-rw-r--r--src/video_core/shader/track.cpp35
-rw-r--r--src/video_core/texture_cache/surface_base.cpp9
-rw-r--r--src/video_core/texture_cache/surface_base.h10
-rw-r--r--src/video_core/texture_cache/surface_params.cpp13
-rw-r--r--src/video_core/texture_cache/texture_cache.h46
-rw-r--r--src/yuzu/configuration/config.cpp3
-rw-r--r--src/yuzu_cmd/config.cpp1
-rw-r--r--src/yuzu_cmd/default_ini.h4
-rw-r--r--src/yuzu_tester/config.cpp1
-rw-r--r--src/yuzu_tester/default_ini.h4
128 files changed, 4052 insertions, 1389 deletions
diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp
index 9a0939883..da50a0bbc 100644
--- a/src/audio_core/audio_renderer.cpp
+++ b/src/audio_core/audio_renderer.cpp
@@ -73,13 +73,15 @@ private:
73 EffectInStatus info{}; 73 EffectInStatus info{};
74}; 74};
75AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params, 75AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params,
76 Kernel::SharedPtr<Kernel::WritableEvent> buffer_event) 76 Kernel::SharedPtr<Kernel::WritableEvent> buffer_event,
77 std::size_t instance_number)
77 : worker_params{params}, buffer_event{buffer_event}, voices(params.voice_count), 78 : worker_params{params}, buffer_event{buffer_event}, voices(params.voice_count),
78 effects(params.effect_count) { 79 effects(params.effect_count) {
79 80
80 audio_out = std::make_unique<AudioCore::AudioOut>(); 81 audio_out = std::make_unique<AudioCore::AudioOut>();
81 stream = audio_out->OpenStream(core_timing, STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS, 82 stream = audio_out->OpenStream(core_timing, STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS,
82 "AudioRenderer", [=]() { buffer_event->Signal(); }); 83 fmt::format("AudioRenderer-Instance{}", instance_number),
84 [=]() { buffer_event->Signal(); });
83 audio_out->StartStream(stream); 85 audio_out->StartStream(stream);
84 86
85 QueueMixedBuffer(0); 87 QueueMixedBuffer(0);
@@ -217,13 +219,15 @@ std::vector<s16> AudioRenderer::VoiceState::DequeueSamples(std::size_t sample_co
217 if (offset == samples.size()) { 219 if (offset == samples.size()) {
218 offset = 0; 220 offset = 0;
219 221
220 if (!wave_buffer.is_looping) { 222 if (!wave_buffer.is_looping && wave_buffer.buffer_sz) {
221 SetWaveIndex(wave_index + 1); 223 SetWaveIndex(wave_index + 1);
222 } 224 }
223 225
224 out_status.wave_buffer_consumed++; 226 if (wave_buffer.buffer_sz) {
227 out_status.wave_buffer_consumed++;
228 }
225 229
226 if (wave_buffer.end_of_stream) { 230 if (wave_buffer.end_of_stream || wave_buffer.buffer_sz == 0) {
227 info.play_state = PlayState::Paused; 231 info.play_state = PlayState::Paused;
228 } 232 }
229 } 233 }
diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h
index b2e5d336c..45afbe759 100644
--- a/src/audio_core/audio_renderer.h
+++ b/src/audio_core/audio_renderer.h
@@ -215,7 +215,8 @@ static_assert(sizeof(UpdateDataHeader) == 0x40, "UpdateDataHeader has wrong size
215class AudioRenderer { 215class AudioRenderer {
216public: 216public:
217 AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params, 217 AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params,
218 Kernel::SharedPtr<Kernel::WritableEvent> buffer_event); 218 Kernel::SharedPtr<Kernel::WritableEvent> buffer_event,
219 std::size_t instance_number);
219 ~AudioRenderer(); 220 ~AudioRenderer();
220 221
221 std::vector<u8> UpdateAudioRenderer(const std::vector<u8>& input_params); 222 std::vector<u8> UpdateAudioRenderer(const std::vector<u8>& input_params);
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 2554add28..2b4266f29 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -56,6 +56,8 @@ add_custom_command(OUTPUT scm_rev.cpp
56 "${VIDEO_CORE}/shader/decode/shift.cpp" 56 "${VIDEO_CORE}/shader/decode/shift.cpp"
57 "${VIDEO_CORE}/shader/decode/video.cpp" 57 "${VIDEO_CORE}/shader/decode/video.cpp"
58 "${VIDEO_CORE}/shader/decode/xmad.cpp" 58 "${VIDEO_CORE}/shader/decode/xmad.cpp"
59 "${VIDEO_CORE}/shader/control_flow.cpp"
60 "${VIDEO_CORE}/shader/control_flow.h"
59 "${VIDEO_CORE}/shader/decode.cpp" 61 "${VIDEO_CORE}/shader/decode.cpp"
60 "${VIDEO_CORE}/shader/node.h" 62 "${VIDEO_CORE}/shader/node.h"
61 "${VIDEO_CORE}/shader/node_helper.cpp" 63 "${VIDEO_CORE}/shader/node_helper.cpp"
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 30eb9d82e..f4325f0f8 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -208,6 +208,8 @@ add_library(core STATIC
208 hle/service/aoc/aoc_u.h 208 hle/service/aoc/aoc_u.h
209 hle/service/apm/apm.cpp 209 hle/service/apm/apm.cpp
210 hle/service/apm/apm.h 210 hle/service/apm/apm.h
211 hle/service/apm/controller.cpp
212 hle/service/apm/controller.h
211 hle/service/apm/interface.cpp 213 hle/service/apm/interface.cpp
212 hle/service/apm/interface.h 214 hle/service/apm/interface.h
213 hle/service/audio/audctl.cpp 215 hle/service/audio/audctl.cpp
@@ -293,6 +295,7 @@ add_library(core STATIC
293 hle/service/hid/irs.h 295 hle/service/hid/irs.h
294 hle/service/hid/xcd.cpp 296 hle/service/hid/xcd.cpp
295 hle/service/hid/xcd.h 297 hle/service/hid/xcd.h
298 hle/service/hid/errors.h
296 hle/service/hid/controllers/controller_base.cpp 299 hle/service/hid/controllers/controller_base.cpp
297 hle/service/hid/controllers/controller_base.h 300 hle/service/hid/controllers/controller_base.h
298 hle/service/hid/controllers/debug_pad.cpp 301 hle/service/hid/controllers/debug_pad.cpp
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index c6691a8e1..45e94e625 100644
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -44,13 +44,6 @@ public:
44 /// Step CPU by one instruction 44 /// Step CPU by one instruction
45 virtual void Step() = 0; 45 virtual void Step() = 0;
46 46
47 /// Maps a backing memory region for the CPU
48 virtual void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
49 Kernel::VMAPermission perms) = 0;
50
51 /// Unmaps a region of memory that was previously mapped using MapBackingMemory
52 virtual void UnmapMemory(VAddr address, std::size_t size) = 0;
53
54 /// Clear all instruction cache 47 /// Clear all instruction cache
55 virtual void ClearInstructionCache() = 0; 48 virtual void ClearInstructionCache() = 0;
56 49
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp
index 44307fa19..f1506b372 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -177,15 +177,6 @@ ARM_Dynarmic::ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor,
177 177
178ARM_Dynarmic::~ARM_Dynarmic() = default; 178ARM_Dynarmic::~ARM_Dynarmic() = default;
179 179
180void ARM_Dynarmic::MapBackingMemory(u64 address, std::size_t size, u8* memory,
181 Kernel::VMAPermission perms) {
182 inner_unicorn.MapBackingMemory(address, size, memory, perms);
183}
184
185void ARM_Dynarmic::UnmapMemory(u64 address, std::size_t size) {
186 inner_unicorn.UnmapMemory(address, size);
187}
188
189void ARM_Dynarmic::SetPC(u64 pc) { 180void ARM_Dynarmic::SetPC(u64 pc) {
190 jit->SetPC(pc); 181 jit->SetPC(pc);
191} 182}
diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic.h
index b701e97a3..504d46c68 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.h
+++ b/src/core/arm/dynarmic/arm_dynarmic.h
@@ -23,9 +23,6 @@ public:
23 ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index); 23 ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index);
24 ~ARM_Dynarmic() override; 24 ~ARM_Dynarmic() override;
25 25
26 void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
27 Kernel::VMAPermission perms) override;
28 void UnmapMemory(u64 address, std::size_t size) override;
29 void SetPC(u64 pc) override; 26 void SetPC(u64 pc) override;
30 u64 GetPC() const override; 27 u64 GetPC() const override;
31 u64 GetReg(int index) const override; 28 u64 GetReg(int index) const override;
diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp
index 4e07fe8b5..97d5c2a8a 100644
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -50,11 +50,14 @@ static void CodeHook(uc_engine* uc, uint64_t address, uint32_t size, void* user_
50 50
51static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int size, u64 value, 51static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int size, u64 value,
52 void* user_data) { 52 void* user_data) {
53 auto* const system = static_cast<System*>(user_data);
54
53 ARM_Interface::ThreadContext ctx{}; 55 ARM_Interface::ThreadContext ctx{};
54 Core::CurrentArmInterface().SaveContext(ctx); 56 system->CurrentArmInterface().SaveContext(ctx);
55 ASSERT_MSG(false, "Attempted to read from unmapped memory: 0x{:X}, pc=0x{:X}, lr=0x{:X}", addr, 57 ASSERT_MSG(false, "Attempted to read from unmapped memory: 0x{:X}, pc=0x{:X}, lr=0x{:X}", addr,
56 ctx.pc, ctx.cpu_registers[30]); 58 ctx.pc, ctx.cpu_registers[30]);
57 return {}; 59
60 return false;
58} 61}
59 62
60ARM_Unicorn::ARM_Unicorn(System& system) : system{system} { 63ARM_Unicorn::ARM_Unicorn(System& system) : system{system} {
@@ -65,7 +68,7 @@ ARM_Unicorn::ARM_Unicorn(System& system) : system{system} {
65 68
66 uc_hook hook{}; 69 uc_hook hook{};
67 CHECKED(uc_hook_add(uc, &hook, UC_HOOK_INTR, (void*)InterruptHook, this, 0, -1)); 70 CHECKED(uc_hook_add(uc, &hook, UC_HOOK_INTR, (void*)InterruptHook, this, 0, -1));
68 CHECKED(uc_hook_add(uc, &hook, UC_HOOK_MEM_INVALID, (void*)UnmappedMemoryHook, this, 0, -1)); 71 CHECKED(uc_hook_add(uc, &hook, UC_HOOK_MEM_INVALID, (void*)UnmappedMemoryHook, &system, 0, -1));
69 if (GDBStub::IsServerEnabled()) { 72 if (GDBStub::IsServerEnabled()) {
70 CHECKED(uc_hook_add(uc, &hook, UC_HOOK_CODE, (void*)CodeHook, this, 0, -1)); 73 CHECKED(uc_hook_add(uc, &hook, UC_HOOK_CODE, (void*)CodeHook, this, 0, -1));
71 last_bkpt_hit = false; 74 last_bkpt_hit = false;
@@ -76,15 +79,6 @@ ARM_Unicorn::~ARM_Unicorn() {
76 CHECKED(uc_close(uc)); 79 CHECKED(uc_close(uc));
77} 80}
78 81
79void ARM_Unicorn::MapBackingMemory(VAddr address, std::size_t size, u8* memory,
80 Kernel::VMAPermission perms) {
81 CHECKED(uc_mem_map_ptr(uc, address, size, static_cast<u32>(perms), memory));
82}
83
84void ARM_Unicorn::UnmapMemory(VAddr address, std::size_t size) {
85 CHECKED(uc_mem_unmap(uc, address, size));
86}
87
88void ARM_Unicorn::SetPC(u64 pc) { 82void ARM_Unicorn::SetPC(u64 pc) {
89 CHECKED(uc_reg_write(uc, UC_ARM64_REG_PC, &pc)); 83 CHECKED(uc_reg_write(uc, UC_ARM64_REG_PC, &pc));
90} 84}
diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h
index 34e974b4d..fe2ffd70c 100644
--- a/src/core/arm/unicorn/arm_unicorn.h
+++ b/src/core/arm/unicorn/arm_unicorn.h
@@ -18,9 +18,6 @@ public:
18 explicit ARM_Unicorn(System& system); 18 explicit ARM_Unicorn(System& system);
19 ~ARM_Unicorn() override; 19 ~ARM_Unicorn() override;
20 20
21 void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
22 Kernel::VMAPermission perms) override;
23 void UnmapMemory(VAddr address, std::size_t size) override;
24 void SetPC(u64 pc) override; 21 void SetPC(u64 pc) override;
25 u64 GetPC() const override; 22 u64 GetPC() const override;
26 u64 GetReg(int index) const override; 23 u64 GetReg(int index) const override;
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 262411db8..4aceee785 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -25,6 +25,7 @@
25#include "core/hle/kernel/scheduler.h" 25#include "core/hle/kernel/scheduler.h"
26#include "core/hle/kernel/thread.h" 26#include "core/hle/kernel/thread.h"
27#include "core/hle/service/am/applets/applets.h" 27#include "core/hle/service/am/applets/applets.h"
28#include "core/hle/service/apm/controller.h"
28#include "core/hle/service/glue/manager.h" 29#include "core/hle/service/glue/manager.h"
29#include "core/hle/service/service.h" 30#include "core/hle/service/service.h"
30#include "core/hle/service/sm/sm.h" 31#include "core/hle/service/sm/sm.h"
@@ -143,7 +144,7 @@ struct System::Impl {
143 telemetry_session = std::make_unique<Core::TelemetrySession>(); 144 telemetry_session = std::make_unique<Core::TelemetrySession>();
144 service_manager = std::make_shared<Service::SM::ServiceManager>(); 145 service_manager = std::make_shared<Service::SM::ServiceManager>();
145 146
146 Service::Init(service_manager, system, *virtual_filesystem); 147 Service::Init(service_manager, system);
147 GDBStub::Init(); 148 GDBStub::Init();
148 149
149 renderer = VideoCore::CreateRenderer(emu_window, system); 150 renderer = VideoCore::CreateRenderer(emu_window, system);
@@ -306,6 +307,9 @@ struct System::Impl {
306 /// Frontend applets 307 /// Frontend applets
307 Service::AM::Applets::AppletManager applet_manager; 308 Service::AM::Applets::AppletManager applet_manager;
308 309
310 /// APM (Performance) services
311 Service::APM::Controller apm_controller{core_timing};
312
309 /// Glue services 313 /// Glue services
310 Service::Glue::ARPManager arp_manager; 314 Service::Glue::ARPManager arp_manager;
311 315
@@ -568,6 +572,14 @@ const Service::Glue::ARPManager& System::GetARPManager() const {
568 return impl->arp_manager; 572 return impl->arp_manager;
569} 573}
570 574
575Service::APM::Controller& System::GetAPMController() {
576 return impl->apm_controller;
577}
578
579const Service::APM::Controller& System::GetAPMController() const {
580 return impl->apm_controller;
581}
582
571System::ResultStatus System::Init(Frontend::EmuWindow& emu_window) { 583System::ResultStatus System::Init(Frontend::EmuWindow& emu_window) {
572 return impl->Init(*this, emu_window); 584 return impl->Init(*this, emu_window);
573} 585}
diff --git a/src/core/core.h b/src/core/core.h
index 70adb7af9..8ebb385ac 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -43,6 +43,10 @@ struct AppletFrontendSet;
43class AppletManager; 43class AppletManager;
44} // namespace AM::Applets 44} // namespace AM::Applets
45 45
46namespace APM {
47class Controller;
48}
49
46namespace Glue { 50namespace Glue {
47class ARPManager; 51class ARPManager;
48} 52}
@@ -296,6 +300,10 @@ public:
296 300
297 const Service::Glue::ARPManager& GetARPManager() const; 301 const Service::Glue::ARPManager& GetARPManager() const;
298 302
303 Service::APM::Controller& GetAPMController();
304
305 const Service::APM::Controller& GetAPMController() const;
306
299private: 307private:
300 System(); 308 System();
301 309
@@ -319,10 +327,6 @@ private:
319 static System s_instance; 327 static System s_instance;
320}; 328};
321 329
322inline ARM_Interface& CurrentArmInterface() {
323 return System::GetInstance().CurrentArmInterface();
324}
325
326inline Kernel::Process* CurrentProcess() { 330inline Kernel::Process* CurrentProcess() {
327 return System::GetInstance().CurrentProcess(); 331 return System::GetInstance().CurrentProcess();
328} 332}
diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp
index 99b7d387d..21c410e34 100644
--- a/src/core/core_cpu.cpp
+++ b/src/core/core_cpu.cpp
@@ -53,16 +53,12 @@ bool CpuBarrier::Rendezvous() {
53Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier, 53Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier,
54 std::size_t core_index) 54 std::size_t core_index)
55 : cpu_barrier{cpu_barrier}, core_timing{system.CoreTiming()}, core_index{core_index} { 55 : cpu_barrier{cpu_barrier}, core_timing{system.CoreTiming()}, core_index{core_index} {
56 if (Settings::values.cpu_jit_enabled) {
57#ifdef ARCHITECTURE_x86_64 56#ifdef ARCHITECTURE_x86_64
58 arm_interface = std::make_unique<ARM_Dynarmic>(system, exclusive_monitor, core_index); 57 arm_interface = std::make_unique<ARM_Dynarmic>(system, exclusive_monitor, core_index);
59#else 58#else
60 arm_interface = std::make_unique<ARM_Unicorn>(system); 59 arm_interface = std::make_unique<ARM_Unicorn>(system);
61 LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available"); 60 LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
62#endif 61#endif
63 } else {
64 arm_interface = std::make_unique<ARM_Unicorn>(system);
65 }
66 62
67 scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface); 63 scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface);
68} 64}
@@ -70,15 +66,12 @@ Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_ba
70Cpu::~Cpu() = default; 66Cpu::~Cpu() = default;
71 67
72std::unique_ptr<ExclusiveMonitor> Cpu::MakeExclusiveMonitor(std::size_t num_cores) { 68std::unique_ptr<ExclusiveMonitor> Cpu::MakeExclusiveMonitor(std::size_t num_cores) {
73 if (Settings::values.cpu_jit_enabled) {
74#ifdef ARCHITECTURE_x86_64 69#ifdef ARCHITECTURE_x86_64
75 return std::make_unique<DynarmicExclusiveMonitor>(num_cores); 70 return std::make_unique<DynarmicExclusiveMonitor>(num_cores);
76#else 71#else
77 return nullptr; // TODO(merry): Passthrough exclusive monitor 72 // TODO(merry): Passthrough exclusive monitor
73 return nullptr;
78#endif 74#endif
79 } else {
80 return nullptr; // TODO(merry): Passthrough exclusive monitor
81 }
82} 75}
83 76
84void Cpu::RunLoop(bool tight_loop) { 77void Cpu::RunLoop(bool tight_loop) {
diff --git a/src/core/file_sys/program_metadata.cpp b/src/core/file_sys/program_metadata.cpp
index eb76174c5..7310b3602 100644
--- a/src/core/file_sys/program_metadata.cpp
+++ b/src/core/file_sys/program_metadata.cpp
@@ -94,6 +94,10 @@ u64 ProgramMetadata::GetFilesystemPermissions() const {
94 return aci_file_access.permissions; 94 return aci_file_access.permissions;
95} 95}
96 96
97u32 ProgramMetadata::GetSystemResourceSize() const {
98 return npdm_header.system_resource_size;
99}
100
97const ProgramMetadata::KernelCapabilityDescriptors& ProgramMetadata::GetKernelCapabilities() const { 101const ProgramMetadata::KernelCapabilityDescriptors& ProgramMetadata::GetKernelCapabilities() const {
98 return aci_kernel_capabilities; 102 return aci_kernel_capabilities;
99} 103}
diff --git a/src/core/file_sys/program_metadata.h b/src/core/file_sys/program_metadata.h
index 43bf2820a..88ec97d85 100644
--- a/src/core/file_sys/program_metadata.h
+++ b/src/core/file_sys/program_metadata.h
@@ -58,6 +58,7 @@ public:
58 u32 GetMainThreadStackSize() const; 58 u32 GetMainThreadStackSize() const;
59 u64 GetTitleID() const; 59 u64 GetTitleID() const;
60 u64 GetFilesystemPermissions() const; 60 u64 GetFilesystemPermissions() const;
61 u32 GetSystemResourceSize() const;
61 const KernelCapabilityDescriptors& GetKernelCapabilities() const; 62 const KernelCapabilityDescriptors& GetKernelCapabilities() const;
62 63
63 void Print() const; 64 void Print() const;
@@ -76,7 +77,8 @@ private:
76 u8 reserved_3; 77 u8 reserved_3;
77 u8 main_thread_priority; 78 u8 main_thread_priority;
78 u8 main_thread_cpu; 79 u8 main_thread_cpu;
79 std::array<u8, 8> reserved_4; 80 std::array<u8, 4> reserved_4;
81 u32_le system_resource_size;
80 u32_le process_category; 82 u32_le process_category;
81 u32_le main_stack_size; 83 u32_le main_stack_size;
82 std::array<u8, 0x10> application_name; 84 std::array<u8, 0x10> application_name;
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index f45ef05f6..92169a97b 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -129,20 +129,17 @@ u64 Process::GetTotalPhysicalMemoryAvailable() const {
129 return vm_manager.GetTotalPhysicalMemoryAvailable(); 129 return vm_manager.GetTotalPhysicalMemoryAvailable();
130} 130}
131 131
132u64 Process::GetTotalPhysicalMemoryAvailableWithoutMmHeap() const { 132u64 Process::GetTotalPhysicalMemoryAvailableWithoutSystemResource() const {
133 // TODO: Subtract the personal heap size from this when the 133 return GetTotalPhysicalMemoryAvailable() - GetSystemResourceSize();
134 // personal heap is implemented.
135 return GetTotalPhysicalMemoryAvailable();
136} 134}
137 135
138u64 Process::GetTotalPhysicalMemoryUsed() const { 136u64 Process::GetTotalPhysicalMemoryUsed() const {
139 return vm_manager.GetCurrentHeapSize() + main_thread_stack_size + code_memory_size; 137 return vm_manager.GetCurrentHeapSize() + main_thread_stack_size + code_memory_size +
138 GetSystemResourceUsage();
140} 139}
141 140
142u64 Process::GetTotalPhysicalMemoryUsedWithoutMmHeap() const { 141u64 Process::GetTotalPhysicalMemoryUsedWithoutSystemResource() const {
143 // TODO: Subtract the personal heap size from this when the 142 return GetTotalPhysicalMemoryUsed() - GetSystemResourceUsage();
144 // personal heap is implemented.
145 return GetTotalPhysicalMemoryUsed();
146} 143}
147 144
148void Process::RegisterThread(const Thread* thread) { 145void Process::RegisterThread(const Thread* thread) {
@@ -172,6 +169,7 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
172 program_id = metadata.GetTitleID(); 169 program_id = metadata.GetTitleID();
173 ideal_core = metadata.GetMainThreadCore(); 170 ideal_core = metadata.GetMainThreadCore();
174 is_64bit_process = metadata.Is64BitProgram(); 171 is_64bit_process = metadata.Is64BitProgram();
172 system_resource_size = metadata.GetSystemResourceSize();
175 173
176 vm_manager.Reset(metadata.GetAddressSpaceType()); 174 vm_manager.Reset(metadata.GetAddressSpaceType());
177 175
@@ -186,19 +184,11 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
186} 184}
187 185
188void Process::Run(s32 main_thread_priority, u64 stack_size) { 186void Process::Run(s32 main_thread_priority, u64 stack_size) {
189 // The kernel always ensures that the given stack size is page aligned. 187 AllocateMainThreadStack(stack_size);
190 main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE); 188 tls_region_address = CreateTLSRegion();
191
192 // Allocate and map the main thread stack
193 // TODO(bunnei): This is heap area that should be allocated by the kernel and not mapped as part
194 // of the user address space.
195 const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size;
196 vm_manager
197 .MapMemoryBlock(mapping_address, std::make_shared<std::vector<u8>>(main_thread_stack_size),
198 0, main_thread_stack_size, MemoryState::Stack)
199 .Unwrap();
200 189
201 vm_manager.LogLayout(); 190 vm_manager.LogLayout();
191
202 ChangeStatus(ProcessStatus::Running); 192 ChangeStatus(ProcessStatus::Running);
203 193
204 SetupMainThread(*this, kernel, main_thread_priority); 194 SetupMainThread(*this, kernel, main_thread_priority);
@@ -228,6 +218,9 @@ void Process::PrepareForTermination() {
228 stop_threads(system.Scheduler(2).GetThreadList()); 218 stop_threads(system.Scheduler(2).GetThreadList());
229 stop_threads(system.Scheduler(3).GetThreadList()); 219 stop_threads(system.Scheduler(3).GetThreadList());
230 220
221 FreeTLSRegion(tls_region_address);
222 tls_region_address = 0;
223
231 ChangeStatus(ProcessStatus::Exited); 224 ChangeStatus(ProcessStatus::Exited);
232} 225}
233 226
@@ -327,4 +320,16 @@ void Process::ChangeStatus(ProcessStatus new_status) {
327 WakeupAllWaitingThreads(); 320 WakeupAllWaitingThreads();
328} 321}
329 322
323void Process::AllocateMainThreadStack(u64 stack_size) {
324 // The kernel always ensures that the given stack size is page aligned.
325 main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE);
326
327 // Allocate and map the main thread stack
328 const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size;
329 vm_manager
330 .MapMemoryBlock(mapping_address, std::make_shared<std::vector<u8>>(main_thread_stack_size),
331 0, main_thread_stack_size, MemoryState::Stack)
332 .Unwrap();
333}
334
330} // namespace Kernel 335} // namespace Kernel
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index 83ea02bee..c2df451f3 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -135,6 +135,11 @@ public:
135 return mutex; 135 return mutex;
136 } 136 }
137 137
138 /// Gets the address to the process' dedicated TLS region.
139 VAddr GetTLSRegionAddress() const {
140 return tls_region_address;
141 }
142
138 /// Gets the current status of the process 143 /// Gets the current status of the process
139 ProcessStatus GetStatus() const { 144 ProcessStatus GetStatus() const {
140 return status; 145 return status;
@@ -168,8 +173,24 @@ public:
168 return capabilities.GetPriorityMask(); 173 return capabilities.GetPriorityMask();
169 } 174 }
170 175
171 u32 IsVirtualMemoryEnabled() const { 176 /// Gets the amount of secure memory to allocate for memory management.
172 return is_virtual_address_memory_enabled; 177 u32 GetSystemResourceSize() const {
178 return system_resource_size;
179 }
180
181 /// Gets the amount of secure memory currently in use for memory management.
182 u32 GetSystemResourceUsage() const {
183 // On hardware, this returns the amount of system resource memory that has
184 // been used by the kernel. This is problematic for Yuzu to emulate, because
185 // system resource memory is used for page tables -- and yuzu doesn't really
186 // have a way to calculate how much memory is required for page tables for
187 // the current process at any given time.
188 // TODO: Is this even worth implementing? Games may retrieve this value via
189 // an SDK function that gets used + available system resource size for debug
190 // or diagnostic purposes. However, it seems unlikely that a game would make
191 // decisions based on how much system memory is dedicated to its page tables.
192 // Is returning a value other than zero wise?
193 return 0;
173 } 194 }
174 195
175 /// Whether this process is an AArch64 or AArch32 process. 196 /// Whether this process is an AArch64 or AArch32 process.
@@ -196,15 +217,15 @@ public:
196 u64 GetTotalPhysicalMemoryAvailable() const; 217 u64 GetTotalPhysicalMemoryAvailable() const;
197 218
198 /// Retrieves the total physical memory available to this process in bytes, 219 /// Retrieves the total physical memory available to this process in bytes,
199 /// without the size of the personal heap added to it. 220 /// without the size of the personal system resource heap added to it.
200 u64 GetTotalPhysicalMemoryAvailableWithoutMmHeap() const; 221 u64 GetTotalPhysicalMemoryAvailableWithoutSystemResource() const;
201 222
202 /// Retrieves the total physical memory used by this process in bytes. 223 /// Retrieves the total physical memory used by this process in bytes.
203 u64 GetTotalPhysicalMemoryUsed() const; 224 u64 GetTotalPhysicalMemoryUsed() const;
204 225
205 /// Retrieves the total physical memory used by this process in bytes, 226 /// Retrieves the total physical memory used by this process in bytes,
206 /// without the size of the personal heap added to it. 227 /// without the size of the personal system resource heap added to it.
207 u64 GetTotalPhysicalMemoryUsedWithoutMmHeap() const; 228 u64 GetTotalPhysicalMemoryUsedWithoutSystemResource() const;
208 229
209 /// Gets the list of all threads created with this process as their owner. 230 /// Gets the list of all threads created with this process as their owner.
210 const std::list<const Thread*>& GetThreadList() const { 231 const std::list<const Thread*>& GetThreadList() const {
@@ -280,6 +301,9 @@ private:
280 /// a process signal. 301 /// a process signal.
281 void ChangeStatus(ProcessStatus new_status); 302 void ChangeStatus(ProcessStatus new_status);
282 303
304 /// Allocates the main thread stack for the process, given the stack size in bytes.
305 void AllocateMainThreadStack(u64 stack_size);
306
283 /// Memory manager for this process. 307 /// Memory manager for this process.
284 Kernel::VMManager vm_manager; 308 Kernel::VMManager vm_manager;
285 309
@@ -298,12 +322,16 @@ private:
298 /// Title ID corresponding to the process 322 /// Title ID corresponding to the process
299 u64 program_id = 0; 323 u64 program_id = 0;
300 324
325 /// Specifies additional memory to be reserved for the process's memory management by the
326 /// system. When this is non-zero, secure memory is allocated and used for page table allocation
327 /// instead of using the normal global page tables/memory block management.
328 u32 system_resource_size = 0;
329
301 /// Resource limit descriptor for this process 330 /// Resource limit descriptor for this process
302 SharedPtr<ResourceLimit> resource_limit; 331 SharedPtr<ResourceLimit> resource_limit;
303 332
304 /// The ideal CPU core for this process, threads are scheduled on this core by default. 333 /// The ideal CPU core for this process, threads are scheduled on this core by default.
305 u8 ideal_core = 0; 334 u8 ideal_core = 0;
306 u32 is_virtual_address_memory_enabled = 0;
307 335
308 /// The Thread Local Storage area is allocated as processes create threads, 336 /// The Thread Local Storage area is allocated as processes create threads,
309 /// each TLS area is 0x200 bytes, so one page (0x1000) is split up in 8 parts, and each part 337 /// each TLS area is 0x200 bytes, so one page (0x1000) is split up in 8 parts, and each part
@@ -338,6 +366,9 @@ private:
338 /// variable related facilities. 366 /// variable related facilities.
339 Mutex mutex; 367 Mutex mutex;
340 368
369 /// Address indicating the location of the process' dedicated TLS region.
370 VAddr tls_region_address = 0;
371
341 /// Random values for svcGetInfo RandomEntropy 372 /// Random values for svcGetInfo RandomEntropy
342 std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy{}; 373 std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy{};
343 374
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 332573a95..1fd1a732a 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -318,7 +318,14 @@ static ResultCode UnmapMemory(Core::System& system, VAddr dst_addr, VAddr src_ad
318 return result; 318 return result;
319 } 319 }
320 320
321 return vm_manager.UnmapRange(dst_addr, size); 321 const auto unmap_res = vm_manager.UnmapRange(dst_addr, size);
322
323 // Reprotect the source mapping on success
324 if (unmap_res.IsSuccess()) {
325 ASSERT(vm_manager.ReprotectRange(src_addr, size, VMAPermission::ReadWrite).IsSuccess());
326 }
327
328 return unmap_res;
322} 329}
323 330
324/// Connect to an OS service given the port name, returns the handle to the port to out 331/// Connect to an OS service given the port name, returns the handle to the port to out
@@ -729,16 +736,16 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
729 StackRegionBaseAddr = 14, 736 StackRegionBaseAddr = 14,
730 StackRegionSize = 15, 737 StackRegionSize = 15,
731 // 3.0.0+ 738 // 3.0.0+
732 IsVirtualAddressMemoryEnabled = 16, 739 SystemResourceSize = 16,
733 PersonalMmHeapUsage = 17, 740 SystemResourceUsage = 17,
734 TitleId = 18, 741 TitleId = 18,
735 // 4.0.0+ 742 // 4.0.0+
736 PrivilegedProcessId = 19, 743 PrivilegedProcessId = 19,
737 // 5.0.0+ 744 // 5.0.0+
738 UserExceptionContextAddr = 20, 745 UserExceptionContextAddr = 20,
739 // 6.0.0+ 746 // 6.0.0+
740 TotalPhysicalMemoryAvailableWithoutMmHeap = 21, 747 TotalPhysicalMemoryAvailableWithoutSystemResource = 21,
741 TotalPhysicalMemoryUsedWithoutMmHeap = 22, 748 TotalPhysicalMemoryUsedWithoutSystemResource = 22,
742 }; 749 };
743 750
744 const auto info_id_type = static_cast<GetInfoType>(info_id); 751 const auto info_id_type = static_cast<GetInfoType>(info_id);
@@ -756,12 +763,12 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
756 case GetInfoType::StackRegionSize: 763 case GetInfoType::StackRegionSize:
757 case GetInfoType::TotalPhysicalMemoryAvailable: 764 case GetInfoType::TotalPhysicalMemoryAvailable:
758 case GetInfoType::TotalPhysicalMemoryUsed: 765 case GetInfoType::TotalPhysicalMemoryUsed:
759 case GetInfoType::IsVirtualAddressMemoryEnabled: 766 case GetInfoType::SystemResourceSize:
760 case GetInfoType::PersonalMmHeapUsage: 767 case GetInfoType::SystemResourceUsage:
761 case GetInfoType::TitleId: 768 case GetInfoType::TitleId:
762 case GetInfoType::UserExceptionContextAddr: 769 case GetInfoType::UserExceptionContextAddr:
763 case GetInfoType::TotalPhysicalMemoryAvailableWithoutMmHeap: 770 case GetInfoType::TotalPhysicalMemoryAvailableWithoutSystemResource:
764 case GetInfoType::TotalPhysicalMemoryUsedWithoutMmHeap: { 771 case GetInfoType::TotalPhysicalMemoryUsedWithoutSystemResource: {
765 if (info_sub_id != 0) { 772 if (info_sub_id != 0) {
766 return ERR_INVALID_ENUM_VALUE; 773 return ERR_INVALID_ENUM_VALUE;
767 } 774 }
@@ -822,8 +829,13 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
822 *result = process->GetTotalPhysicalMemoryUsed(); 829 *result = process->GetTotalPhysicalMemoryUsed();
823 return RESULT_SUCCESS; 830 return RESULT_SUCCESS;
824 831
825 case GetInfoType::IsVirtualAddressMemoryEnabled: 832 case GetInfoType::SystemResourceSize:
826 *result = process->IsVirtualMemoryEnabled(); 833 *result = process->GetSystemResourceSize();
834 return RESULT_SUCCESS;
835
836 case GetInfoType::SystemResourceUsage:
837 LOG_WARNING(Kernel_SVC, "(STUBBED) Attempted to query system resource usage");
838 *result = process->GetSystemResourceUsage();
827 return RESULT_SUCCESS; 839 return RESULT_SUCCESS;
828 840
829 case GetInfoType::TitleId: 841 case GetInfoType::TitleId:
@@ -831,17 +843,15 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
831 return RESULT_SUCCESS; 843 return RESULT_SUCCESS;
832 844
833 case GetInfoType::UserExceptionContextAddr: 845 case GetInfoType::UserExceptionContextAddr:
834 LOG_WARNING(Kernel_SVC, 846 *result = process->GetTLSRegionAddress();
835 "(STUBBED) Attempted to query user exception context address, returned 0");
836 *result = 0;
837 return RESULT_SUCCESS; 847 return RESULT_SUCCESS;
838 848
839 case GetInfoType::TotalPhysicalMemoryAvailableWithoutMmHeap: 849 case GetInfoType::TotalPhysicalMemoryAvailableWithoutSystemResource:
840 *result = process->GetTotalPhysicalMemoryAvailable(); 850 *result = process->GetTotalPhysicalMemoryAvailableWithoutSystemResource();
841 return RESULT_SUCCESS; 851 return RESULT_SUCCESS;
842 852
843 case GetInfoType::TotalPhysicalMemoryUsedWithoutMmHeap: 853 case GetInfoType::TotalPhysicalMemoryUsedWithoutSystemResource:
844 *result = process->GetTotalPhysicalMemoryUsedWithoutMmHeap(); 854 *result = process->GetTotalPhysicalMemoryUsedWithoutSystemResource();
845 return RESULT_SUCCESS; 855 return RESULT_SUCCESS;
846 856
847 default: 857 default:
@@ -946,6 +956,86 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
946 } 956 }
947} 957}
948 958
959/// Maps memory at a desired address
960static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size) {
961 LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size);
962
963 if (!Common::Is4KBAligned(addr)) {
964 LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr);
965 return ERR_INVALID_ADDRESS;
966 }
967
968 if (!Common::Is4KBAligned(size)) {
969 LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size);
970 return ERR_INVALID_SIZE;
971 }
972
973 if (size == 0) {
974 LOG_ERROR(Kernel_SVC, "Size is zero");
975 return ERR_INVALID_SIZE;
976 }
977
978 if (!(addr < addr + size)) {
979 LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address");
980 return ERR_INVALID_MEMORY_RANGE;
981 }
982
983 Process* const current_process = system.Kernel().CurrentProcess();
984 auto& vm_manager = current_process->VMManager();
985
986 if (current_process->GetSystemResourceSize() == 0) {
987 LOG_ERROR(Kernel_SVC, "System Resource Size is zero");
988 return ERR_INVALID_STATE;
989 }
990
991 if (!vm_manager.IsWithinMapRegion(addr, size)) {
992 LOG_ERROR(Kernel_SVC, "Range not within map region");
993 return ERR_INVALID_MEMORY_RANGE;
994 }
995
996 return vm_manager.MapPhysicalMemory(addr, size);
997}
998
999/// Unmaps memory previously mapped via MapPhysicalMemory
1000static ResultCode UnmapPhysicalMemory(Core::System& system, VAddr addr, u64 size) {
1001 LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size);
1002
1003 if (!Common::Is4KBAligned(addr)) {
1004 LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr);
1005 return ERR_INVALID_ADDRESS;
1006 }
1007
1008 if (!Common::Is4KBAligned(size)) {
1009 LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size);
1010 return ERR_INVALID_SIZE;
1011 }
1012
1013 if (size == 0) {
1014 LOG_ERROR(Kernel_SVC, "Size is zero");
1015 return ERR_INVALID_SIZE;
1016 }
1017
1018 if (!(addr < addr + size)) {
1019 LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address");
1020 return ERR_INVALID_MEMORY_RANGE;
1021 }
1022
1023 Process* const current_process = system.Kernel().CurrentProcess();
1024 auto& vm_manager = current_process->VMManager();
1025
1026 if (current_process->GetSystemResourceSize() == 0) {
1027 LOG_ERROR(Kernel_SVC, "System Resource Size is zero");
1028 return ERR_INVALID_STATE;
1029 }
1030
1031 if (!vm_manager.IsWithinMapRegion(addr, size)) {
1032 LOG_ERROR(Kernel_SVC, "Range not within map region");
1033 return ERR_INVALID_MEMORY_RANGE;
1034 }
1035
1036 return vm_manager.UnmapPhysicalMemory(addr, size);
1037}
1038
949/// Sets the thread activity 1039/// Sets the thread activity
950static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 activity) { 1040static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 activity) {
951 LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, activity=0x{:08X}", handle, activity); 1041 LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, activity=0x{:08X}", handle, activity);
@@ -1647,8 +1737,8 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var
1647// Wait for an address (via Address Arbiter) 1737// Wait for an address (via Address Arbiter)
1648static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, s32 value, 1738static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, s32 value,
1649 s64 timeout) { 1739 s64 timeout) {
1650 LOG_WARNING(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, timeout={}", 1740 LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, timeout={}", address,
1651 address, type, value, timeout); 1741 type, value, timeout);
1652 1742
1653 // If the passed address is a kernel virtual address, return invalid memory state. 1743 // If the passed address is a kernel virtual address, return invalid memory state.
1654 if (Memory::IsKernelVirtualAddress(address)) { 1744 if (Memory::IsKernelVirtualAddress(address)) {
@@ -1670,8 +1760,8 @@ static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type,
1670// Signals to an address (via Address Arbiter) 1760// Signals to an address (via Address Arbiter)
1671static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type, s32 value, 1761static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type, s32 value,
1672 s32 num_to_wake) { 1762 s32 num_to_wake) {
1673 LOG_WARNING(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, num_to_wake=0x{:X}", 1763 LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, num_to_wake=0x{:X}",
1674 address, type, value, num_to_wake); 1764 address, type, value, num_to_wake);
1675 1765
1676 // If the passed address is a kernel virtual address, return invalid memory state. 1766 // If the passed address is a kernel virtual address, return invalid memory state.
1677 if (Memory::IsKernelVirtualAddress(address)) { 1767 if (Memory::IsKernelVirtualAddress(address)) {
@@ -2303,8 +2393,8 @@ static const FunctionDef SVC_Table[] = {
2303 {0x29, SvcWrap<GetInfo>, "GetInfo"}, 2393 {0x29, SvcWrap<GetInfo>, "GetInfo"},
2304 {0x2A, nullptr, "FlushEntireDataCache"}, 2394 {0x2A, nullptr, "FlushEntireDataCache"},
2305 {0x2B, nullptr, "FlushDataCache"}, 2395 {0x2B, nullptr, "FlushDataCache"},
2306 {0x2C, nullptr, "MapPhysicalMemory"}, 2396 {0x2C, SvcWrap<MapPhysicalMemory>, "MapPhysicalMemory"},
2307 {0x2D, nullptr, "UnmapPhysicalMemory"}, 2397 {0x2D, SvcWrap<UnmapPhysicalMemory>, "UnmapPhysicalMemory"},
2308 {0x2E, nullptr, "GetFutureThreadInfo"}, 2398 {0x2E, nullptr, "GetFutureThreadInfo"},
2309 {0x2F, nullptr, "GetLastThreadInfo"}, 2399 {0x2F, nullptr, "GetLastThreadInfo"},
2310 {0x30, SvcWrap<GetResourceLimitLimitValue>, "GetResourceLimitLimitValue"}, 2400 {0x30, SvcWrap<GetResourceLimitLimitValue>, "GetResourceLimitLimitValue"},
diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h
index 865473c6f..c2d8d0dc3 100644
--- a/src/core/hle/kernel/svc_wrap.h
+++ b/src/core/hle/kernel/svc_wrap.h
@@ -32,6 +32,11 @@ void SvcWrap(Core::System& system) {
32 FuncReturn(system, func(system, Param(system, 0)).raw); 32 FuncReturn(system, func(system, Param(system, 0)).raw);
33} 33}
34 34
35template <ResultCode func(Core::System&, u64, u64)>
36void SvcWrap(Core::System& system) {
37 FuncReturn(system, func(system, Param(system, 0), Param(system, 1)).raw);
38}
39
35template <ResultCode func(Core::System&, u32)> 40template <ResultCode func(Core::System&, u32)>
36void SvcWrap(Core::System& system) { 41void SvcWrap(Core::System& system) {
37 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw); 42 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw);
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp
index 501544090..4f45fb03b 100644
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -8,10 +8,11 @@
8#include "common/assert.h" 8#include "common/assert.h"
9#include "common/logging/log.h" 9#include "common/logging/log.h"
10#include "common/memory_hook.h" 10#include "common/memory_hook.h"
11#include "core/arm/arm_interface.h"
12#include "core/core.h" 11#include "core/core.h"
13#include "core/file_sys/program_metadata.h" 12#include "core/file_sys/program_metadata.h"
14#include "core/hle/kernel/errors.h" 13#include "core/hle/kernel/errors.h"
14#include "core/hle/kernel/process.h"
15#include "core/hle/kernel/resource_limit.h"
15#include "core/hle/kernel/vm_manager.h" 16#include "core/hle/kernel/vm_manager.h"
16#include "core/memory.h" 17#include "core/memory.h"
17#include "core/memory_setup.h" 18#include "core/memory_setup.h"
@@ -49,10 +50,14 @@ bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const {
49 type != next.type) { 50 type != next.type) {
50 return false; 51 return false;
51 } 52 }
52 if (type == VMAType::AllocatedMemoryBlock && 53 if ((attribute & MemoryAttribute::DeviceMapped) == MemoryAttribute::DeviceMapped) {
53 (backing_block != next.backing_block || offset + size != next.offset)) { 54 // TODO: Can device mapped memory be merged sanely?
55 // Not merging it may cause inaccuracies versus hardware when memory layout is queried.
54 return false; 56 return false;
55 } 57 }
58 if (type == VMAType::AllocatedMemoryBlock) {
59 return true;
60 }
56 if (type == VMAType::BackingMemory && backing_memory + size != next.backing_memory) { 61 if (type == VMAType::BackingMemory && backing_memory + size != next.backing_memory) {
57 return false; 62 return false;
58 } 63 }
@@ -100,7 +105,7 @@ bool VMManager::IsValidHandle(VMAHandle handle) const {
100ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target, 105ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target,
101 std::shared_ptr<std::vector<u8>> block, 106 std::shared_ptr<std::vector<u8>> block,
102 std::size_t offset, u64 size, 107 std::size_t offset, u64 size,
103 MemoryState state) { 108 MemoryState state, VMAPermission perm) {
104 ASSERT(block != nullptr); 109 ASSERT(block != nullptr);
105 ASSERT(offset + size <= block->size()); 110 ASSERT(offset + size <= block->size());
106 111
@@ -109,17 +114,8 @@ ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target,
109 VirtualMemoryArea& final_vma = vma_handle->second; 114 VirtualMemoryArea& final_vma = vma_handle->second;
110 ASSERT(final_vma.size == size); 115 ASSERT(final_vma.size == size);
111 116
112 system.ArmInterface(0).MapBackingMemory(target, size, block->data() + offset,
113 VMAPermission::ReadWriteExecute);
114 system.ArmInterface(1).MapBackingMemory(target, size, block->data() + offset,
115 VMAPermission::ReadWriteExecute);
116 system.ArmInterface(2).MapBackingMemory(target, size, block->data() + offset,
117 VMAPermission::ReadWriteExecute);
118 system.ArmInterface(3).MapBackingMemory(target, size, block->data() + offset,
119 VMAPermission::ReadWriteExecute);
120
121 final_vma.type = VMAType::AllocatedMemoryBlock; 117 final_vma.type = VMAType::AllocatedMemoryBlock;
122 final_vma.permissions = VMAPermission::ReadWrite; 118 final_vma.permissions = perm;
123 final_vma.state = state; 119 final_vma.state = state;
124 final_vma.backing_block = std::move(block); 120 final_vma.backing_block = std::move(block);
125 final_vma.offset = offset; 121 final_vma.offset = offset;
@@ -137,11 +133,6 @@ ResultVal<VMManager::VMAHandle> VMManager::MapBackingMemory(VAddr target, u8* me
137 VirtualMemoryArea& final_vma = vma_handle->second; 133 VirtualMemoryArea& final_vma = vma_handle->second;
138 ASSERT(final_vma.size == size); 134 ASSERT(final_vma.size == size);
139 135
140 system.ArmInterface(0).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
141 system.ArmInterface(1).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
142 system.ArmInterface(2).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
143 system.ArmInterface(3).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
144
145 final_vma.type = VMAType::BackingMemory; 136 final_vma.type = VMAType::BackingMemory;
146 final_vma.permissions = VMAPermission::ReadWrite; 137 final_vma.permissions = VMAPermission::ReadWrite;
147 final_vma.state = state; 138 final_vma.state = state;
@@ -230,11 +221,6 @@ ResultCode VMManager::UnmapRange(VAddr target, u64 size) {
230 221
231 ASSERT(FindVMA(target)->second.size >= size); 222 ASSERT(FindVMA(target)->second.size >= size);
232 223
233 system.ArmInterface(0).UnmapMemory(target, size);
234 system.ArmInterface(1).UnmapMemory(target, size);
235 system.ArmInterface(2).UnmapMemory(target, size);
236 system.ArmInterface(3).UnmapMemory(target, size);
237
238 return RESULT_SUCCESS; 224 return RESULT_SUCCESS;
239} 225}
240 226
@@ -308,6 +294,166 @@ ResultVal<VAddr> VMManager::SetHeapSize(u64 size) {
308 return MakeResult<VAddr>(heap_region_base); 294 return MakeResult<VAddr>(heap_region_base);
309} 295}
310 296
297ResultCode VMManager::MapPhysicalMemory(VAddr target, u64 size) {
298 const auto end_addr = target + size;
299 const auto last_addr = end_addr - 1;
300 VAddr cur_addr = target;
301
302 ResultCode result = RESULT_SUCCESS;
303
304 // Check how much memory we've already mapped.
305 const auto mapped_size_result = SizeOfAllocatedVMAsInRange(target, size);
306 if (mapped_size_result.Failed()) {
307 return mapped_size_result.Code();
308 }
309
310 // If we've already mapped the desired amount, return early.
311 const std::size_t mapped_size = *mapped_size_result;
312 if (mapped_size == size) {
313 return RESULT_SUCCESS;
314 }
315
316 // Check that we can map the memory we want.
317 const auto res_limit = system.CurrentProcess()->GetResourceLimit();
318 const u64 physmem_remaining = res_limit->GetMaxResourceValue(ResourceType::PhysicalMemory) -
319 res_limit->GetCurrentResourceValue(ResourceType::PhysicalMemory);
320 if (physmem_remaining < (size - mapped_size)) {
321 return ERR_RESOURCE_LIMIT_EXCEEDED;
322 }
323
324 // Keep track of the memory regions we unmap.
325 std::vector<std::pair<u64, u64>> mapped_regions;
326
327 // Iterate, trying to map memory.
328 {
329 cur_addr = target;
330
331 auto iter = FindVMA(target);
332 ASSERT_MSG(iter != vma_map.end(), "MapPhysicalMemory iter != end");
333
334 while (true) {
335 const auto& vma = iter->second;
336 const auto vma_start = vma.base;
337 const auto vma_end = vma_start + vma.size;
338 const auto vma_last = vma_end - 1;
339
340 // Map the memory block
341 const auto map_size = std::min(end_addr - cur_addr, vma_end - cur_addr);
342 if (vma.state == MemoryState::Unmapped) {
343 const auto map_res =
344 MapMemoryBlock(cur_addr, std::make_shared<std::vector<u8>>(map_size, 0), 0,
345 map_size, MemoryState::Heap, VMAPermission::ReadWrite);
346 result = map_res.Code();
347 if (result.IsError()) {
348 break;
349 }
350
351 mapped_regions.emplace_back(cur_addr, map_size);
352 }
353
354 // Break once we hit the end of the range.
355 if (last_addr <= vma_last) {
356 break;
357 }
358
359 // Advance to the next block.
360 cur_addr = vma_end;
361 iter = FindVMA(cur_addr);
362 ASSERT_MSG(iter != vma_map.end(), "MapPhysicalMemory iter != end");
363 }
364 }
365
366 // If we failed, unmap memory.
367 if (result.IsError()) {
368 for (const auto [unmap_address, unmap_size] : mapped_regions) {
369 ASSERT_MSG(UnmapRange(unmap_address, unmap_size).IsSuccess(),
370 "MapPhysicalMemory un-map on error");
371 }
372
373 return result;
374 }
375
376 // Update amount of mapped physical memory.
377 physical_memory_mapped += size - mapped_size;
378
379 return RESULT_SUCCESS;
380}
381
382ResultCode VMManager::UnmapPhysicalMemory(VAddr target, u64 size) {
383 const auto end_addr = target + size;
384 const auto last_addr = end_addr - 1;
385 VAddr cur_addr = target;
386
387 ResultCode result = RESULT_SUCCESS;
388
389 // Check how much memory is currently mapped.
390 const auto mapped_size_result = SizeOfUnmappablePhysicalMemoryInRange(target, size);
391 if (mapped_size_result.Failed()) {
392 return mapped_size_result.Code();
393 }
394
395 // If we've already unmapped all the memory, return early.
396 const std::size_t mapped_size = *mapped_size_result;
397 if (mapped_size == 0) {
398 return RESULT_SUCCESS;
399 }
400
401 // Keep track of the memory regions we unmap.
402 std::vector<std::pair<u64, u64>> unmapped_regions;
403
404 // Try to unmap regions.
405 {
406 cur_addr = target;
407
408 auto iter = FindVMA(target);
409 ASSERT_MSG(iter != vma_map.end(), "UnmapPhysicalMemory iter != end");
410
411 while (true) {
412 const auto& vma = iter->second;
413 const auto vma_start = vma.base;
414 const auto vma_end = vma_start + vma.size;
415 const auto vma_last = vma_end - 1;
416
417 // Unmap the memory block
418 const auto unmap_size = std::min(end_addr - cur_addr, vma_end - cur_addr);
419 if (vma.state == MemoryState::Heap) {
420 result = UnmapRange(cur_addr, unmap_size);
421 if (result.IsError()) {
422 break;
423 }
424
425 unmapped_regions.emplace_back(cur_addr, unmap_size);
426 }
427
428 // Break once we hit the end of the range.
429 if (last_addr <= vma_last) {
430 break;
431 }
432
433 // Advance to the next block.
434 cur_addr = vma_end;
435 iter = FindVMA(cur_addr);
436 ASSERT_MSG(iter != vma_map.end(), "UnmapPhysicalMemory iter != end");
437 }
438 }
439
440 // If we failed, re-map regions.
441 // TODO: Preserve memory contents?
442 if (result.IsError()) {
443 for (const auto [map_address, map_size] : unmapped_regions) {
444 const auto remap_res =
445 MapMemoryBlock(map_address, std::make_shared<std::vector<u8>>(map_size, 0), 0,
446 map_size, MemoryState::Heap, VMAPermission::None);
447 ASSERT_MSG(remap_res.Succeeded(), "UnmapPhysicalMemory re-map on error");
448 }
449 }
450
451 // Update mapped amount
452 physical_memory_mapped -= mapped_size;
453
454 return RESULT_SUCCESS;
455}
456
311ResultCode VMManager::MapCodeMemory(VAddr dst_address, VAddr src_address, u64 size) { 457ResultCode VMManager::MapCodeMemory(VAddr dst_address, VAddr src_address, u64 size) {
312 constexpr auto ignore_attribute = MemoryAttribute::LockedForIPC | MemoryAttribute::DeviceMapped; 458 constexpr auto ignore_attribute = MemoryAttribute::LockedForIPC | MemoryAttribute::DeviceMapped;
313 const auto src_check_result = CheckRangeState( 459 const auto src_check_result = CheckRangeState(
@@ -455,7 +601,7 @@ ResultCode VMManager::MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size, Mem
455 // Protect mirror with permissions from old region 601 // Protect mirror with permissions from old region
456 Reprotect(new_vma, vma->second.permissions); 602 Reprotect(new_vma, vma->second.permissions);
457 // Remove permissions from old region 603 // Remove permissions from old region
458 Reprotect(vma, VMAPermission::None); 604 ReprotectRange(src_addr, size, VMAPermission::None);
459 605
460 return RESULT_SUCCESS; 606 return RESULT_SUCCESS;
461} 607}
@@ -588,14 +734,14 @@ VMManager::VMAIter VMManager::SplitVMA(VMAIter vma_handle, u64 offset_in_vma) {
588VMManager::VMAIter VMManager::MergeAdjacent(VMAIter iter) { 734VMManager::VMAIter VMManager::MergeAdjacent(VMAIter iter) {
589 const VMAIter next_vma = std::next(iter); 735 const VMAIter next_vma = std::next(iter);
590 if (next_vma != vma_map.end() && iter->second.CanBeMergedWith(next_vma->second)) { 736 if (next_vma != vma_map.end() && iter->second.CanBeMergedWith(next_vma->second)) {
591 iter->second.size += next_vma->second.size; 737 MergeAdjacentVMA(iter->second, next_vma->second);
592 vma_map.erase(next_vma); 738 vma_map.erase(next_vma);
593 } 739 }
594 740
595 if (iter != vma_map.begin()) { 741 if (iter != vma_map.begin()) {
596 VMAIter prev_vma = std::prev(iter); 742 VMAIter prev_vma = std::prev(iter);
597 if (prev_vma->second.CanBeMergedWith(iter->second)) { 743 if (prev_vma->second.CanBeMergedWith(iter->second)) {
598 prev_vma->second.size += iter->second.size; 744 MergeAdjacentVMA(prev_vma->second, iter->second);
599 vma_map.erase(iter); 745 vma_map.erase(iter);
600 iter = prev_vma; 746 iter = prev_vma;
601 } 747 }
@@ -604,6 +750,38 @@ VMManager::VMAIter VMManager::MergeAdjacent(VMAIter iter) {
604 return iter; 750 return iter;
605} 751}
606 752
753void VMManager::MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryArea& right) {
754 ASSERT(left.CanBeMergedWith(right));
755
756 // Always merge allocated memory blocks, even when they don't share the same backing block.
757 if (left.type == VMAType::AllocatedMemoryBlock &&
758 (left.backing_block != right.backing_block || left.offset + left.size != right.offset)) {
759 // Check if we can save work.
760 if (left.offset == 0 && left.size == left.backing_block->size()) {
761 // Fast case: left is an entire backing block.
762 left.backing_block->insert(left.backing_block->end(),
763 right.backing_block->begin() + right.offset,
764 right.backing_block->begin() + right.offset + right.size);
765 } else {
766 // Slow case: make a new memory block for left and right.
767 auto new_memory = std::make_shared<std::vector<u8>>();
768 new_memory->insert(new_memory->end(), left.backing_block->begin() + left.offset,
769 left.backing_block->begin() + left.offset + left.size);
770 new_memory->insert(new_memory->end(), right.backing_block->begin() + right.offset,
771 right.backing_block->begin() + right.offset + right.size);
772 left.backing_block = new_memory;
773 left.offset = 0;
774 }
775
776 // Page table update is needed, because backing memory changed.
777 left.size += right.size;
778 UpdatePageTableForVMA(left);
779 } else {
780 // Just update the size.
781 left.size += right.size;
782 }
783}
784
607void VMManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) { 785void VMManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) {
608 switch (vma.type) { 786 switch (vma.type) {
609 case VMAType::Free: 787 case VMAType::Free:
@@ -778,6 +956,84 @@ VMManager::CheckResults VMManager::CheckRangeState(VAddr address, u64 size, Memo
778 std::make_tuple(initial_state, initial_permissions, initial_attributes & ~ignore_mask)); 956 std::make_tuple(initial_state, initial_permissions, initial_attributes & ~ignore_mask));
779} 957}
780 958
959ResultVal<std::size_t> VMManager::SizeOfAllocatedVMAsInRange(VAddr address,
960 std::size_t size) const {
961 const VAddr end_addr = address + size;
962 const VAddr last_addr = end_addr - 1;
963 std::size_t mapped_size = 0;
964
965 VAddr cur_addr = address;
966 auto iter = FindVMA(cur_addr);
967 ASSERT_MSG(iter != vma_map.end(), "SizeOfAllocatedVMAsInRange iter != end");
968
969 while (true) {
970 const auto& vma = iter->second;
971 const VAddr vma_start = vma.base;
972 const VAddr vma_end = vma_start + vma.size;
973 const VAddr vma_last = vma_end - 1;
974
975 // Add size if relevant.
976 if (vma.state != MemoryState::Unmapped) {
977 mapped_size += std::min(end_addr - cur_addr, vma_end - cur_addr);
978 }
979
980 // Break once we hit the end of the range.
981 if (last_addr <= vma_last) {
982 break;
983 }
984
985 // Advance to the next block.
986 cur_addr = vma_end;
987 iter = std::next(iter);
988 ASSERT_MSG(iter != vma_map.end(), "SizeOfAllocatedVMAsInRange iter != end");
989 }
990
991 return MakeResult(mapped_size);
992}
993
994ResultVal<std::size_t> VMManager::SizeOfUnmappablePhysicalMemoryInRange(VAddr address,
995 std::size_t size) const {
996 const VAddr end_addr = address + size;
997 const VAddr last_addr = end_addr - 1;
998 std::size_t mapped_size = 0;
999
1000 VAddr cur_addr = address;
1001 auto iter = FindVMA(cur_addr);
1002 ASSERT_MSG(iter != vma_map.end(), "SizeOfUnmappablePhysicalMemoryInRange iter != end");
1003
1004 while (true) {
1005 const auto& vma = iter->second;
1006 const auto vma_start = vma.base;
1007 const auto vma_end = vma_start + vma.size;
1008 const auto vma_last = vma_end - 1;
1009 const auto state = vma.state;
1010 const auto attr = vma.attribute;
1011
1012 // Memory within region must be free or mapped heap.
1013 if (!((state == MemoryState::Heap && attr == MemoryAttribute::None) ||
1014 (state == MemoryState::Unmapped))) {
1015 return ERR_INVALID_ADDRESS_STATE;
1016 }
1017
1018 // Add size if relevant.
1019 if (state != MemoryState::Unmapped) {
1020 mapped_size += std::min(end_addr - cur_addr, vma_end - cur_addr);
1021 }
1022
1023 // Break once we hit the end of the range.
1024 if (last_addr <= vma_last) {
1025 break;
1026 }
1027
1028 // Advance to the next block.
1029 cur_addr = vma_end;
1030 iter = std::next(iter);
1031 ASSERT_MSG(iter != vma_map.end(), "SizeOfUnmappablePhysicalMemoryInRange iter != end");
1032 }
1033
1034 return MakeResult(mapped_size);
1035}
1036
781u64 VMManager::GetTotalPhysicalMemoryAvailable() const { 1037u64 VMManager::GetTotalPhysicalMemoryAvailable() const {
782 LOG_WARNING(Kernel, "(STUBBED) called"); 1038 LOG_WARNING(Kernel, "(STUBBED) called");
783 return 0xF8000000; 1039 return 0xF8000000;
diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h
index 9fe6ac3f4..0aecb7499 100644
--- a/src/core/hle/kernel/vm_manager.h
+++ b/src/core/hle/kernel/vm_manager.h
@@ -349,7 +349,8 @@ public:
349 * @param state MemoryState tag to attach to the VMA. 349 * @param state MemoryState tag to attach to the VMA.
350 */ 350 */
351 ResultVal<VMAHandle> MapMemoryBlock(VAddr target, std::shared_ptr<std::vector<u8>> block, 351 ResultVal<VMAHandle> MapMemoryBlock(VAddr target, std::shared_ptr<std::vector<u8>> block,
352 std::size_t offset, u64 size, MemoryState state); 352 std::size_t offset, u64 size, MemoryState state,
353 VMAPermission perm = VMAPermission::ReadWrite);
353 354
354 /** 355 /**
355 * Maps an unmanaged host memory pointer at a given address. 356 * Maps an unmanaged host memory pointer at a given address.
@@ -450,6 +451,34 @@ public:
450 /// 451 ///
451 ResultVal<VAddr> SetHeapSize(u64 size); 452 ResultVal<VAddr> SetHeapSize(u64 size);
452 453
454 /// Maps memory at a given address.
455 ///
456 /// @param addr The virtual address to map memory at.
457 /// @param size The amount of memory to map.
458 ///
459 /// @note The destination address must lie within the Map region.
460 ///
461 /// @note This function requires that SystemResourceSize be non-zero,
462 /// however, this is just because if it were not then the
463 /// resulting page tables could be exploited on hardware by
464 /// a malicious program. SystemResource usage does not need
465 /// to be explicitly checked or updated here.
466 ResultCode MapPhysicalMemory(VAddr target, u64 size);
467
468 /// Unmaps memory at a given address.
469 ///
470 /// @param addr The virtual address to unmap memory at.
471 /// @param size The amount of memory to unmap.
472 ///
473 /// @note The destination address must lie within the Map region.
474 ///
475 /// @note This function requires that SystemResourceSize be non-zero,
476 /// however, this is just because if it were not then the
477 /// resulting page tables could be exploited on hardware by
478 /// a malicious program. SystemResource usage does not need
479 /// to be explicitly checked or updated here.
480 ResultCode UnmapPhysicalMemory(VAddr target, u64 size);
481
453 /// Maps a region of memory as code memory. 482 /// Maps a region of memory as code memory.
454 /// 483 ///
455 /// @param dst_address The base address of the region to create the aliasing memory region. 484 /// @param dst_address The base address of the region to create the aliasing memory region.
@@ -657,6 +686,11 @@ private:
657 */ 686 */
658 VMAIter MergeAdjacent(VMAIter vma); 687 VMAIter MergeAdjacent(VMAIter vma);
659 688
689 /**
690 * Merges two adjacent VMAs.
691 */
692 void MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryArea& right);
693
660 /// Updates the pages corresponding to this VMA so they match the VMA's attributes. 694 /// Updates the pages corresponding to this VMA so they match the VMA's attributes.
661 void UpdatePageTableForVMA(const VirtualMemoryArea& vma); 695 void UpdatePageTableForVMA(const VirtualMemoryArea& vma);
662 696
@@ -701,6 +735,13 @@ private:
701 MemoryAttribute attribute_mask, MemoryAttribute attribute, 735 MemoryAttribute attribute_mask, MemoryAttribute attribute,
702 MemoryAttribute ignore_mask) const; 736 MemoryAttribute ignore_mask) const;
703 737
738 /// Gets the amount of memory currently mapped (state != Unmapped) in a range.
739 ResultVal<std::size_t> SizeOfAllocatedVMAsInRange(VAddr address, std::size_t size) const;
740
741 /// Gets the amount of memory unmappable by UnmapPhysicalMemory in a range.
742 ResultVal<std::size_t> SizeOfUnmappablePhysicalMemoryInRange(VAddr address,
743 std::size_t size) const;
744
704 /** 745 /**
705 * A map covering the entirety of the managed address space, keyed by the `base` field of each 746 * A map covering the entirety of the managed address space, keyed by the `base` field of each
706 * VMA. It must always be modified by splitting or merging VMAs, so that the invariant 747 * VMA. It must always be modified by splitting or merging VMAs, so that the invariant
@@ -742,6 +783,11 @@ private:
742 // end of the range. This is essentially 'base_address + current_size'. 783 // end of the range. This is essentially 'base_address + current_size'.
743 VAddr heap_end = 0; 784 VAddr heap_end = 0;
744 785
786 // The current amount of memory mapped via MapPhysicalMemory.
787 // This is used here (and in Nintendo's kernel) only for debugging, and does not impact
788 // any behavior.
789 u64 physical_memory_mapped = 0;
790
745 Core::System& system; 791 Core::System& system;
746}; 792};
747} // namespace Kernel 793} // namespace Kernel
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index eced38001..a192a1f5f 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -29,7 +29,8 @@
29#include "core/hle/service/am/omm.h" 29#include "core/hle/service/am/omm.h"
30#include "core/hle/service/am/spsm.h" 30#include "core/hle/service/am/spsm.h"
31#include "core/hle/service/am/tcap.h" 31#include "core/hle/service/am/tcap.h"
32#include "core/hle/service/apm/apm.h" 32#include "core/hle/service/apm/controller.h"
33#include "core/hle/service/apm/interface.h"
33#include "core/hle/service/filesystem/filesystem.h" 34#include "core/hle/service/filesystem/filesystem.h"
34#include "core/hle/service/ns/ns.h" 35#include "core/hle/service/ns/ns.h"
35#include "core/hle/service/nvflinger/nvflinger.h" 36#include "core/hle/service/nvflinger/nvflinger.h"
@@ -265,8 +266,8 @@ ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger
265 {65, nullptr, "ReportUserIsActive"}, 266 {65, nullptr, "ReportUserIsActive"},
266 {66, nullptr, "GetCurrentIlluminance"}, 267 {66, nullptr, "GetCurrentIlluminance"},
267 {67, nullptr, "IsIlluminanceAvailable"}, 268 {67, nullptr, "IsIlluminanceAvailable"},
268 {68, nullptr, "SetAutoSleepDisabled"}, 269 {68, &ISelfController::SetAutoSleepDisabled, "SetAutoSleepDisabled"},
269 {69, nullptr, "IsAutoSleepDisabled"}, 270 {69, &ISelfController::IsAutoSleepDisabled, "IsAutoSleepDisabled"},
270 {70, nullptr, "ReportMultimediaError"}, 271 {70, nullptr, "ReportMultimediaError"},
271 {71, nullptr, "GetCurrentIlluminanceEx"}, 272 {71, nullptr, "GetCurrentIlluminanceEx"},
272 {80, nullptr, "SetWirelessPriorityMode"}, 273 {80, nullptr, "SetWirelessPriorityMode"},
@@ -453,6 +454,34 @@ void ISelfController::GetIdleTimeDetectionExtension(Kernel::HLERequestContext& c
453 rb.Push<u32>(idle_time_detection_extension); 454 rb.Push<u32>(idle_time_detection_extension);
454} 455}
455 456
457void ISelfController::SetAutoSleepDisabled(Kernel::HLERequestContext& ctx) {
458 IPC::RequestParser rp{ctx};
459 is_auto_sleep_disabled = rp.Pop<bool>();
460
461 // On the system itself, if the previous state of is_auto_sleep_disabled
462 // differed from the current value passed in, it'd signify the internal
463 // window manager to update (and also increment some statistics like update counts)
464 //
465 // It'd also indicate this change to an idle handling context.
466 //
467 // However, given we're emulating this behavior, most of this can be ignored
468 // and it's sufficient to simply set the member variable for querying via
469 // IsAutoSleepDisabled().
470
471 LOG_DEBUG(Service_AM, "called. is_auto_sleep_disabled={}", is_auto_sleep_disabled);
472
473 IPC::ResponseBuilder rb{ctx, 2};
474 rb.Push(RESULT_SUCCESS);
475}
476
477void ISelfController::IsAutoSleepDisabled(Kernel::HLERequestContext& ctx) {
478 LOG_DEBUG(Service_AM, "called.");
479
480 IPC::ResponseBuilder rb{ctx, 3};
481 rb.Push(RESULT_SUCCESS);
482 rb.Push(is_auto_sleep_disabled);
483}
484
456void ISelfController::GetAccumulatedSuspendedTickValue(Kernel::HLERequestContext& ctx) { 485void ISelfController::GetAccumulatedSuspendedTickValue(Kernel::HLERequestContext& ctx) {
457 LOG_DEBUG(Service_AM, "called."); 486 LOG_DEBUG(Service_AM, "called.");
458 487
@@ -520,8 +549,9 @@ void AppletMessageQueue::OperationModeChanged() {
520 on_operation_mode_changed.writable->Signal(); 549 on_operation_mode_changed.writable->Signal();
521} 550}
522 551
523ICommonStateGetter::ICommonStateGetter(std::shared_ptr<AppletMessageQueue> msg_queue) 552ICommonStateGetter::ICommonStateGetter(Core::System& system,
524 : ServiceFramework("ICommonStateGetter"), msg_queue(std::move(msg_queue)) { 553 std::shared_ptr<AppletMessageQueue> msg_queue)
554 : ServiceFramework("ICommonStateGetter"), system(system), msg_queue(std::move(msg_queue)) {
525 // clang-format off 555 // clang-format off
526 static const FunctionInfo functions[] = { 556 static const FunctionInfo functions[] = {
527 {0, &ICommonStateGetter::GetEventHandle, "GetEventHandle"}, 557 {0, &ICommonStateGetter::GetEventHandle, "GetEventHandle"},
@@ -554,7 +584,7 @@ ICommonStateGetter::ICommonStateGetter(std::shared_ptr<AppletMessageQueue> msg_q
554 {63, nullptr, "GetHdcpAuthenticationStateChangeEvent"}, 584 {63, nullptr, "GetHdcpAuthenticationStateChangeEvent"},
555 {64, nullptr, "SetTvPowerStateMatchingMode"}, 585 {64, nullptr, "SetTvPowerStateMatchingMode"},
556 {65, nullptr, "GetApplicationIdByContentActionName"}, 586 {65, nullptr, "GetApplicationIdByContentActionName"},
557 {66, nullptr, "SetCpuBoostMode"}, 587 {66, &ICommonStateGetter::SetCpuBoostMode, "SetCpuBoostMode"},
558 {80, nullptr, "PerformSystemButtonPressingIfInFocus"}, 588 {80, nullptr, "PerformSystemButtonPressingIfInFocus"},
559 {90, nullptr, "SetPerformanceConfigurationChangedNotification"}, 589 {90, nullptr, "SetPerformanceConfigurationChangedNotification"},
560 {91, nullptr, "GetCurrentPerformanceConfiguration"}, 590 {91, nullptr, "GetCurrentPerformanceConfiguration"},
@@ -635,6 +665,16 @@ void ICommonStateGetter::GetDefaultDisplayResolution(Kernel::HLERequestContext&
635 } 665 }
636} 666}
637 667
668void ICommonStateGetter::SetCpuBoostMode(Kernel::HLERequestContext& ctx) {
669 LOG_DEBUG(Service_AM, "called, forwarding to APM:SYS");
670
671 const auto& sm = system.ServiceManager();
672 const auto apm_sys = sm.GetService<APM::APM_Sys>("apm:sys");
673 ASSERT(apm_sys != nullptr);
674
675 apm_sys->SetCpuBoostMode(ctx);
676}
677
638IStorage::IStorage(std::vector<u8> buffer) 678IStorage::IStorage(std::vector<u8> buffer)
639 : ServiceFramework("IStorage"), buffer(std::move(buffer)) { 679 : ServiceFramework("IStorage"), buffer(std::move(buffer)) {
640 // clang-format off 680 // clang-format off
@@ -663,13 +703,11 @@ void ICommonStateGetter::GetOperationMode(Kernel::HLERequestContext& ctx) {
663} 703}
664 704
665void ICommonStateGetter::GetPerformanceMode(Kernel::HLERequestContext& ctx) { 705void ICommonStateGetter::GetPerformanceMode(Kernel::HLERequestContext& ctx) {
666 const bool use_docked_mode{Settings::values.use_docked_mode}; 706 LOG_DEBUG(Service_AM, "called");
667 LOG_DEBUG(Service_AM, "called, use_docked_mode={}", use_docked_mode);
668 707
669 IPC::ResponseBuilder rb{ctx, 3}; 708 IPC::ResponseBuilder rb{ctx, 3};
670 rb.Push(RESULT_SUCCESS); 709 rb.Push(RESULT_SUCCESS);
671 rb.Push(static_cast<u32>(use_docked_mode ? APM::PerformanceMode::Docked 710 rb.PushEnum(system.GetAPMController().GetCurrentPerformanceMode());
672 : APM::PerformanceMode::Handheld));
673} 711}
674 712
675class ILibraryAppletAccessor final : public ServiceFramework<ILibraryAppletAccessor> { 713class ILibraryAppletAccessor final : public ServiceFramework<ILibraryAppletAccessor> {
diff --git a/src/core/hle/service/am/am.h b/src/core/hle/service/am/am.h
index 49ff20959..6cb582483 100644
--- a/src/core/hle/service/am/am.h
+++ b/src/core/hle/service/am/am.h
@@ -133,6 +133,8 @@ private:
133 void SetHandlesRequestToDisplay(Kernel::HLERequestContext& ctx); 133 void SetHandlesRequestToDisplay(Kernel::HLERequestContext& ctx);
134 void SetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx); 134 void SetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx);
135 void GetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx); 135 void GetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx);
136 void SetAutoSleepDisabled(Kernel::HLERequestContext& ctx);
137 void IsAutoSleepDisabled(Kernel::HLERequestContext& ctx);
136 void GetAccumulatedSuspendedTickValue(Kernel::HLERequestContext& ctx); 138 void GetAccumulatedSuspendedTickValue(Kernel::HLERequestContext& ctx);
137 void GetAccumulatedSuspendedTickChangedEvent(Kernel::HLERequestContext& ctx); 139 void GetAccumulatedSuspendedTickChangedEvent(Kernel::HLERequestContext& ctx);
138 140
@@ -142,11 +144,13 @@ private:
142 144
143 u32 idle_time_detection_extension = 0; 145 u32 idle_time_detection_extension = 0;
144 u64 num_fatal_sections_entered = 0; 146 u64 num_fatal_sections_entered = 0;
147 bool is_auto_sleep_disabled = false;
145}; 148};
146 149
147class ICommonStateGetter final : public ServiceFramework<ICommonStateGetter> { 150class ICommonStateGetter final : public ServiceFramework<ICommonStateGetter> {
148public: 151public:
149 explicit ICommonStateGetter(std::shared_ptr<AppletMessageQueue> msg_queue); 152 explicit ICommonStateGetter(Core::System& system,
153 std::shared_ptr<AppletMessageQueue> msg_queue);
150 ~ICommonStateGetter() override; 154 ~ICommonStateGetter() override;
151 155
152private: 156private:
@@ -168,7 +172,9 @@ private:
168 void GetPerformanceMode(Kernel::HLERequestContext& ctx); 172 void GetPerformanceMode(Kernel::HLERequestContext& ctx);
169 void GetBootMode(Kernel::HLERequestContext& ctx); 173 void GetBootMode(Kernel::HLERequestContext& ctx);
170 void GetDefaultDisplayResolution(Kernel::HLERequestContext& ctx); 174 void GetDefaultDisplayResolution(Kernel::HLERequestContext& ctx);
175 void SetCpuBoostMode(Kernel::HLERequestContext& ctx);
171 176
177 Core::System& system;
172 std::shared_ptr<AppletMessageQueue> msg_queue; 178 std::shared_ptr<AppletMessageQueue> msg_queue;
173}; 179};
174 180
diff --git a/src/core/hle/service/am/applet_ae.cpp b/src/core/hle/service/am/applet_ae.cpp
index fe5beb8f9..a34368c8b 100644
--- a/src/core/hle/service/am/applet_ae.cpp
+++ b/src/core/hle/service/am/applet_ae.cpp
@@ -42,7 +42,7 @@ private:
42 42
43 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 43 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
44 rb.Push(RESULT_SUCCESS); 44 rb.Push(RESULT_SUCCESS);
45 rb.PushIpcInterface<ICommonStateGetter>(msg_queue); 45 rb.PushIpcInterface<ICommonStateGetter>(system, msg_queue);
46 } 46 }
47 47
48 void GetSelfController(Kernel::HLERequestContext& ctx) { 48 void GetSelfController(Kernel::HLERequestContext& ctx) {
@@ -146,7 +146,7 @@ private:
146 146
147 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 147 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
148 rb.Push(RESULT_SUCCESS); 148 rb.Push(RESULT_SUCCESS);
149 rb.PushIpcInterface<ICommonStateGetter>(msg_queue); 149 rb.PushIpcInterface<ICommonStateGetter>(system, msg_queue);
150 } 150 }
151 151
152 void GetSelfController(Kernel::HLERequestContext& ctx) { 152 void GetSelfController(Kernel::HLERequestContext& ctx) {
diff --git a/src/core/hle/service/am/applet_oe.cpp b/src/core/hle/service/am/applet_oe.cpp
index 6e255fe95..5d53ef113 100644
--- a/src/core/hle/service/am/applet_oe.cpp
+++ b/src/core/hle/service/am/applet_oe.cpp
@@ -80,7 +80,7 @@ private:
80 80
81 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 81 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
82 rb.Push(RESULT_SUCCESS); 82 rb.Push(RESULT_SUCCESS);
83 rb.PushIpcInterface<ICommonStateGetter>(msg_queue); 83 rb.PushIpcInterface<ICommonStateGetter>(system, msg_queue);
84 } 84 }
85 85
86 void GetLibraryAppletCreator(Kernel::HLERequestContext& ctx) { 86 void GetLibraryAppletCreator(Kernel::HLERequestContext& ctx) {
diff --git a/src/core/hle/service/apm/apm.cpp b/src/core/hle/service/apm/apm.cpp
index f3c09bbb1..85bbf5988 100644
--- a/src/core/hle/service/apm/apm.cpp
+++ b/src/core/hle/service/apm/apm.cpp
@@ -2,7 +2,6 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/logging/log.h"
6#include "core/hle/ipc_helpers.h" 5#include "core/hle/ipc_helpers.h"
7#include "core/hle/service/apm/apm.h" 6#include "core/hle/service/apm/apm.h"
8#include "core/hle/service/apm/interface.h" 7#include "core/hle/service/apm/interface.h"
@@ -12,11 +11,15 @@ namespace Service::APM {
12Module::Module() = default; 11Module::Module() = default;
13Module::~Module() = default; 12Module::~Module() = default;
14 13
15void InstallInterfaces(SM::ServiceManager& service_manager) { 14void InstallInterfaces(Core::System& system) {
16 auto module_ = std::make_shared<Module>(); 15 auto module_ = std::make_shared<Module>();
17 std::make_shared<APM>(module_, "apm")->InstallAsService(service_manager); 16 std::make_shared<APM>(module_, system.GetAPMController(), "apm")
18 std::make_shared<APM>(module_, "apm:p")->InstallAsService(service_manager); 17 ->InstallAsService(system.ServiceManager());
19 std::make_shared<APM_Sys>()->InstallAsService(service_manager); 18 std::make_shared<APM>(module_, system.GetAPMController(), "apm:p")
19 ->InstallAsService(system.ServiceManager());
20 std::make_shared<APM>(module_, system.GetAPMController(), "apm:am")
21 ->InstallAsService(system.ServiceManager());
22 std::make_shared<APM_Sys>(system.GetAPMController())->InstallAsService(system.ServiceManager());
20} 23}
21 24
22} // namespace Service::APM 25} // namespace Service::APM
diff --git a/src/core/hle/service/apm/apm.h b/src/core/hle/service/apm/apm.h
index 4d7d5bb7c..cf4c2bb11 100644
--- a/src/core/hle/service/apm/apm.h
+++ b/src/core/hle/service/apm/apm.h
@@ -8,11 +8,6 @@
8 8
9namespace Service::APM { 9namespace Service::APM {
10 10
11enum class PerformanceMode : u8 {
12 Handheld = 0,
13 Docked = 1,
14};
15
16class Module final { 11class Module final {
17public: 12public:
18 Module(); 13 Module();
@@ -20,6 +15,6 @@ public:
20}; 15};
21 16
22/// Registers all AM services with the specified service manager. 17/// Registers all AM services with the specified service manager.
23void InstallInterfaces(SM::ServiceManager& service_manager); 18void InstallInterfaces(Core::System& system);
24 19
25} // namespace Service::APM 20} // namespace Service::APM
diff --git a/src/core/hle/service/apm/controller.cpp b/src/core/hle/service/apm/controller.cpp
new file mode 100644
index 000000000..4376612eb
--- /dev/null
+++ b/src/core/hle/service/apm/controller.cpp
@@ -0,0 +1,68 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/logging/log.h"
6#include "core/core_timing.h"
7#include "core/hle/service/apm/controller.h"
8#include "core/settings.h"
9
10namespace Service::APM {
11
12constexpr PerformanceConfiguration DEFAULT_PERFORMANCE_CONFIGURATION =
13 PerformanceConfiguration::Config7;
14
15Controller::Controller(Core::Timing::CoreTiming& core_timing)
16 : core_timing(core_timing), configs{
17 {PerformanceMode::Handheld, DEFAULT_PERFORMANCE_CONFIGURATION},
18 {PerformanceMode::Docked, DEFAULT_PERFORMANCE_CONFIGURATION},
19 } {}
20
21Controller::~Controller() = default;
22
23void Controller::SetPerformanceConfiguration(PerformanceMode mode,
24 PerformanceConfiguration config) {
25 static const std::map<PerformanceConfiguration, u32> PCONFIG_TO_SPEED_MAP{
26 {PerformanceConfiguration::Config1, 1020}, {PerformanceConfiguration::Config2, 1020},
27 {PerformanceConfiguration::Config3, 1224}, {PerformanceConfiguration::Config4, 1020},
28 {PerformanceConfiguration::Config5, 1020}, {PerformanceConfiguration::Config6, 1224},
29 {PerformanceConfiguration::Config7, 1020}, {PerformanceConfiguration::Config8, 1020},
30 {PerformanceConfiguration::Config9, 1020}, {PerformanceConfiguration::Config10, 1020},
31 {PerformanceConfiguration::Config11, 1020}, {PerformanceConfiguration::Config12, 1020},
32 {PerformanceConfiguration::Config13, 1785}, {PerformanceConfiguration::Config14, 1785},
33 {PerformanceConfiguration::Config15, 1020}, {PerformanceConfiguration::Config16, 1020},
34 };
35
36 SetClockSpeed(PCONFIG_TO_SPEED_MAP.find(config)->second);
37 configs.insert_or_assign(mode, config);
38}
39
40void Controller::SetFromCpuBoostMode(CpuBoostMode mode) {
41 constexpr std::array<PerformanceConfiguration, 3> BOOST_MODE_TO_CONFIG_MAP{{
42 PerformanceConfiguration::Config7,
43 PerformanceConfiguration::Config13,
44 PerformanceConfiguration::Config15,
45 }};
46
47 SetPerformanceConfiguration(PerformanceMode::Docked,
48 BOOST_MODE_TO_CONFIG_MAP.at(static_cast<u32>(mode)));
49}
50
51PerformanceMode Controller::GetCurrentPerformanceMode() {
52 return Settings::values.use_docked_mode ? PerformanceMode::Docked : PerformanceMode::Handheld;
53}
54
55PerformanceConfiguration Controller::GetCurrentPerformanceConfiguration(PerformanceMode mode) {
56 if (configs.find(mode) == configs.end()) {
57 configs.insert_or_assign(mode, DEFAULT_PERFORMANCE_CONFIGURATION);
58 }
59
60 return configs[mode];
61}
62
63void Controller::SetClockSpeed(u32 mhz) {
64 LOG_INFO(Service_APM, "called, mhz={:08X}", mhz);
65 // TODO(DarkLordZach): Actually signal core_timing to change clock speed.
66}
67
68} // namespace Service::APM
diff --git a/src/core/hle/service/apm/controller.h b/src/core/hle/service/apm/controller.h
new file mode 100644
index 000000000..8ac80eaea
--- /dev/null
+++ b/src/core/hle/service/apm/controller.h
@@ -0,0 +1,70 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <map>
8#include "common/common_types.h"
9
10namespace Core::Timing {
11class CoreTiming;
12}
13
14namespace Service::APM {
15
16enum class PerformanceConfiguration : u32 {
17 Config1 = 0x00010000,
18 Config2 = 0x00010001,
19 Config3 = 0x00010002,
20 Config4 = 0x00020000,
21 Config5 = 0x00020001,
22 Config6 = 0x00020002,
23 Config7 = 0x00020003,
24 Config8 = 0x00020004,
25 Config9 = 0x00020005,
26 Config10 = 0x00020006,
27 Config11 = 0x92220007,
28 Config12 = 0x92220008,
29 Config13 = 0x92220009,
30 Config14 = 0x9222000A,
31 Config15 = 0x9222000B,
32 Config16 = 0x9222000C,
33};
34
35enum class CpuBoostMode : u32 {
36 Disabled = 0,
37 Full = 1, // CPU + GPU -> Config 13, 14, 15, or 16
38 Partial = 2, // GPU Only -> Config 15 or 16
39};
40
41enum class PerformanceMode : u8 {
42 Handheld = 0,
43 Docked = 1,
44};
45
46// Class to manage the state and change of the emulated system performance.
47// Specifically, this deals with PerformanceMode, which corresponds to the system being docked or
48// undocked, and PerformanceConfig which specifies the exact CPU, GPU, and Memory clocks to operate
49// at. Additionally, this manages 'Boost Mode', which allows games to temporarily overclock the
50// system during times of high load -- this simply maps to different PerformanceConfigs to use.
51class Controller {
52public:
53 Controller(Core::Timing::CoreTiming& core_timing);
54 ~Controller();
55
56 void SetPerformanceConfiguration(PerformanceMode mode, PerformanceConfiguration config);
57 void SetFromCpuBoostMode(CpuBoostMode mode);
58
59 PerformanceMode GetCurrentPerformanceMode();
60 PerformanceConfiguration GetCurrentPerformanceConfiguration(PerformanceMode mode);
61
62private:
63 void SetClockSpeed(u32 mhz);
64
65 std::map<PerformanceMode, PerformanceConfiguration> configs;
66
67 Core::Timing::CoreTiming& core_timing;
68};
69
70} // namespace Service::APM
diff --git a/src/core/hle/service/apm/interface.cpp b/src/core/hle/service/apm/interface.cpp
index d058c0245..06f0f8edd 100644
--- a/src/core/hle/service/apm/interface.cpp
+++ b/src/core/hle/service/apm/interface.cpp
@@ -5,43 +5,32 @@
5#include "common/logging/log.h" 5#include "common/logging/log.h"
6#include "core/hle/ipc_helpers.h" 6#include "core/hle/ipc_helpers.h"
7#include "core/hle/service/apm/apm.h" 7#include "core/hle/service/apm/apm.h"
8#include "core/hle/service/apm/controller.h"
8#include "core/hle/service/apm/interface.h" 9#include "core/hle/service/apm/interface.h"
9 10
10namespace Service::APM { 11namespace Service::APM {
11 12
12class ISession final : public ServiceFramework<ISession> { 13class ISession final : public ServiceFramework<ISession> {
13public: 14public:
14 ISession() : ServiceFramework("ISession") { 15 ISession(Controller& controller) : ServiceFramework("ISession"), controller(controller) {
15 static const FunctionInfo functions[] = { 16 static const FunctionInfo functions[] = {
16 {0, &ISession::SetPerformanceConfiguration, "SetPerformanceConfiguration"}, 17 {0, &ISession::SetPerformanceConfiguration, "SetPerformanceConfiguration"},
17 {1, &ISession::GetPerformanceConfiguration, "GetPerformanceConfiguration"}, 18 {1, &ISession::GetPerformanceConfiguration, "GetPerformanceConfiguration"},
19 {2, nullptr, "SetCpuOverclockEnabled"},
18 }; 20 };
19 RegisterHandlers(functions); 21 RegisterHandlers(functions);
20 } 22 }
21 23
22private: 24private:
23 enum class PerformanceConfiguration : u32 {
24 Config1 = 0x00010000,
25 Config2 = 0x00010001,
26 Config3 = 0x00010002,
27 Config4 = 0x00020000,
28 Config5 = 0x00020001,
29 Config6 = 0x00020002,
30 Config7 = 0x00020003,
31 Config8 = 0x00020004,
32 Config9 = 0x00020005,
33 Config10 = 0x00020006,
34 Config11 = 0x92220007,
35 Config12 = 0x92220008,
36 };
37
38 void SetPerformanceConfiguration(Kernel::HLERequestContext& ctx) { 25 void SetPerformanceConfiguration(Kernel::HLERequestContext& ctx) {
39 IPC::RequestParser rp{ctx}; 26 IPC::RequestParser rp{ctx};
40 27
41 auto mode = static_cast<PerformanceMode>(rp.Pop<u32>()); 28 const auto mode = rp.PopEnum<PerformanceMode>();
42 u32 config = rp.Pop<u32>(); 29 const auto config = rp.PopEnum<PerformanceConfiguration>();
43 LOG_WARNING(Service_APM, "(STUBBED) called mode={} config={}", static_cast<u32>(mode), 30 LOG_DEBUG(Service_APM, "called mode={} config={}", static_cast<u32>(mode),
44 config); 31 static_cast<u32>(config));
32
33 controller.SetPerformanceConfiguration(mode, config);
45 34
46 IPC::ResponseBuilder rb{ctx, 2}; 35 IPC::ResponseBuilder rb{ctx, 2};
47 rb.Push(RESULT_SUCCESS); 36 rb.Push(RESULT_SUCCESS);
@@ -50,20 +39,23 @@ private:
50 void GetPerformanceConfiguration(Kernel::HLERequestContext& ctx) { 39 void GetPerformanceConfiguration(Kernel::HLERequestContext& ctx) {
51 IPC::RequestParser rp{ctx}; 40 IPC::RequestParser rp{ctx};
52 41
53 auto mode = static_cast<PerformanceMode>(rp.Pop<u32>()); 42 const auto mode = rp.PopEnum<PerformanceMode>();
54 LOG_WARNING(Service_APM, "(STUBBED) called mode={}", static_cast<u32>(mode)); 43 LOG_DEBUG(Service_APM, "called mode={}", static_cast<u32>(mode));
55 44
56 IPC::ResponseBuilder rb{ctx, 3}; 45 IPC::ResponseBuilder rb{ctx, 3};
57 rb.Push(RESULT_SUCCESS); 46 rb.Push(RESULT_SUCCESS);
58 rb.Push<u32>(static_cast<u32>(PerformanceConfiguration::Config1)); 47 rb.PushEnum(controller.GetCurrentPerformanceConfiguration(mode));
59 } 48 }
49
50 Controller& controller;
60}; 51};
61 52
62APM::APM(std::shared_ptr<Module> apm, const char* name) 53APM::APM(std::shared_ptr<Module> apm, Controller& controller, const char* name)
63 : ServiceFramework(name), apm(std::move(apm)) { 54 : ServiceFramework(name), apm(std::move(apm)), controller(controller) {
64 static const FunctionInfo functions[] = { 55 static const FunctionInfo functions[] = {
65 {0, &APM::OpenSession, "OpenSession"}, 56 {0, &APM::OpenSession, "OpenSession"},
66 {1, nullptr, "GetPerformanceMode"}, 57 {1, &APM::GetPerformanceMode, "GetPerformanceMode"},
58 {6, nullptr, "IsCpuOverclockEnabled"},
67 }; 59 };
68 RegisterHandlers(functions); 60 RegisterHandlers(functions);
69} 61}
@@ -75,10 +67,17 @@ void APM::OpenSession(Kernel::HLERequestContext& ctx) {
75 67
76 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 68 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
77 rb.Push(RESULT_SUCCESS); 69 rb.Push(RESULT_SUCCESS);
78 rb.PushIpcInterface<ISession>(); 70 rb.PushIpcInterface<ISession>(controller);
71}
72
73void APM::GetPerformanceMode(Kernel::HLERequestContext& ctx) {
74 LOG_DEBUG(Service_APM, "called");
75
76 IPC::ResponseBuilder rb{ctx, 2};
77 rb.PushEnum(controller.GetCurrentPerformanceMode());
79} 78}
80 79
81APM_Sys::APM_Sys() : ServiceFramework{"apm:sys"} { 80APM_Sys::APM_Sys(Controller& controller) : ServiceFramework{"apm:sys"}, controller(controller) {
82 // clang-format off 81 // clang-format off
83 static const FunctionInfo functions[] = { 82 static const FunctionInfo functions[] = {
84 {0, nullptr, "RequestPerformanceMode"}, 83 {0, nullptr, "RequestPerformanceMode"},
@@ -87,8 +86,8 @@ APM_Sys::APM_Sys() : ServiceFramework{"apm:sys"} {
87 {3, nullptr, "GetLastThrottlingState"}, 86 {3, nullptr, "GetLastThrottlingState"},
88 {4, nullptr, "ClearLastThrottlingState"}, 87 {4, nullptr, "ClearLastThrottlingState"},
89 {5, nullptr, "LoadAndApplySettings"}, 88 {5, nullptr, "LoadAndApplySettings"},
90 {6, nullptr, "SetCpuBoostMode"}, 89 {6, &APM_Sys::SetCpuBoostMode, "SetCpuBoostMode"},
91 {7, nullptr, "GetCurrentPerformanceConfiguration"}, 90 {7, &APM_Sys::GetCurrentPerformanceConfiguration, "GetCurrentPerformanceConfiguration"},
92 }; 91 };
93 // clang-format on 92 // clang-format on
94 93
@@ -102,7 +101,28 @@ void APM_Sys::GetPerformanceEvent(Kernel::HLERequestContext& ctx) {
102 101
103 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 102 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
104 rb.Push(RESULT_SUCCESS); 103 rb.Push(RESULT_SUCCESS);
105 rb.PushIpcInterface<ISession>(); 104 rb.PushIpcInterface<ISession>(controller);
105}
106
107void APM_Sys::SetCpuBoostMode(Kernel::HLERequestContext& ctx) {
108 IPC::RequestParser rp{ctx};
109 const auto mode = rp.PopEnum<CpuBoostMode>();
110
111 LOG_DEBUG(Service_APM, "called, mode={:08X}", static_cast<u32>(mode));
112
113 controller.SetFromCpuBoostMode(mode);
114
115 IPC::ResponseBuilder rb{ctx, 2};
116 rb.Push(RESULT_SUCCESS);
117}
118
119void APM_Sys::GetCurrentPerformanceConfiguration(Kernel::HLERequestContext& ctx) {
120 LOG_DEBUG(Service_APM, "called");
121
122 IPC::ResponseBuilder rb{ctx, 3};
123 rb.Push(RESULT_SUCCESS);
124 rb.PushEnum(
125 controller.GetCurrentPerformanceConfiguration(controller.GetCurrentPerformanceMode()));
106} 126}
107 127
108} // namespace Service::APM 128} // namespace Service::APM
diff --git a/src/core/hle/service/apm/interface.h b/src/core/hle/service/apm/interface.h
index 773541aa4..de1b89437 100644
--- a/src/core/hle/service/apm/interface.h
+++ b/src/core/hle/service/apm/interface.h
@@ -8,24 +8,34 @@
8 8
9namespace Service::APM { 9namespace Service::APM {
10 10
11class Controller;
12class Module;
13
11class APM final : public ServiceFramework<APM> { 14class APM final : public ServiceFramework<APM> {
12public: 15public:
13 explicit APM(std::shared_ptr<Module> apm, const char* name); 16 explicit APM(std::shared_ptr<Module> apm, Controller& controller, const char* name);
14 ~APM() override; 17 ~APM() override;
15 18
16private: 19private:
17 void OpenSession(Kernel::HLERequestContext& ctx); 20 void OpenSession(Kernel::HLERequestContext& ctx);
21 void GetPerformanceMode(Kernel::HLERequestContext& ctx);
18 22
19 std::shared_ptr<Module> apm; 23 std::shared_ptr<Module> apm;
24 Controller& controller;
20}; 25};
21 26
22class APM_Sys final : public ServiceFramework<APM_Sys> { 27class APM_Sys final : public ServiceFramework<APM_Sys> {
23public: 28public:
24 explicit APM_Sys(); 29 explicit APM_Sys(Controller& controller);
25 ~APM_Sys() override; 30 ~APM_Sys() override;
26 31
32 void SetCpuBoostMode(Kernel::HLERequestContext& ctx);
33
27private: 34private:
28 void GetPerformanceEvent(Kernel::HLERequestContext& ctx); 35 void GetPerformanceEvent(Kernel::HLERequestContext& ctx);
36 void GetCurrentPerformanceConfiguration(Kernel::HLERequestContext& ctx);
37
38 Controller& controller;
29}; 39};
30 40
31} // namespace Service::APM 41} // namespace Service::APM
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp
index 3711e1ea1..679299f68 100644
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -25,7 +25,8 @@ namespace Service::Audio {
25 25
26class IAudioRenderer final : public ServiceFramework<IAudioRenderer> { 26class IAudioRenderer final : public ServiceFramework<IAudioRenderer> {
27public: 27public:
28 explicit IAudioRenderer(AudioCore::AudioRendererParameter audren_params) 28 explicit IAudioRenderer(AudioCore::AudioRendererParameter audren_params,
29 const std::size_t instance_number)
29 : ServiceFramework("IAudioRenderer") { 30 : ServiceFramework("IAudioRenderer") {
30 // clang-format off 31 // clang-format off
31 static const FunctionInfo functions[] = { 32 static const FunctionInfo functions[] = {
@@ -48,8 +49,8 @@ public:
48 auto& system = Core::System::GetInstance(); 49 auto& system = Core::System::GetInstance();
49 system_event = Kernel::WritableEvent::CreateEventPair( 50 system_event = Kernel::WritableEvent::CreateEventPair(
50 system.Kernel(), Kernel::ResetType::Manual, "IAudioRenderer:SystemEvent"); 51 system.Kernel(), Kernel::ResetType::Manual, "IAudioRenderer:SystemEvent");
51 renderer = std::make_unique<AudioCore::AudioRenderer>(system.CoreTiming(), audren_params, 52 renderer = std::make_unique<AudioCore::AudioRenderer>(
52 system_event.writable); 53 system.CoreTiming(), audren_params, system_event.writable, instance_number);
53 } 54 }
54 55
55private: 56private:
@@ -607,7 +608,7 @@ void AudRenU::OpenAudioRendererImpl(Kernel::HLERequestContext& ctx) {
607 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 608 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
608 609
609 rb.Push(RESULT_SUCCESS); 610 rb.Push(RESULT_SUCCESS);
610 rb.PushIpcInterface<IAudioRenderer>(params); 611 rb.PushIpcInterface<IAudioRenderer>(params, audren_instance_count++);
611} 612}
612 613
613bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const { 614bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const {
diff --git a/src/core/hle/service/audio/audren_u.h b/src/core/hle/service/audio/audren_u.h
index 1d3c8df61..49f2733cf 100644
--- a/src/core/hle/service/audio/audren_u.h
+++ b/src/core/hle/service/audio/audren_u.h
@@ -33,6 +33,7 @@ private:
33 }; 33 };
34 34
35 bool IsFeatureSupported(AudioFeatures feature, u32_le revision) const; 35 bool IsFeatureSupported(AudioFeatures feature, u32_le revision) const;
36 std::size_t audren_instance_count = 0;
36}; 37};
37 38
38} // namespace Service::Audio 39} // namespace Service::Audio
diff --git a/src/core/hle/service/filesystem/filesystem.cpp b/src/core/hle/service/filesystem/filesystem.cpp
index 1ebfeb4bf..8ce110dd1 100644
--- a/src/core/hle/service/filesystem/filesystem.cpp
+++ b/src/core/hle/service/filesystem/filesystem.cpp
@@ -472,12 +472,12 @@ void CreateFactories(FileSys::VfsFilesystem& vfs, bool overwrite) {
472 } 472 }
473} 473}
474 474
475void InstallInterfaces(SM::ServiceManager& service_manager, FileSys::VfsFilesystem& vfs) { 475void InstallInterfaces(Core::System& system) {
476 romfs_factory = nullptr; 476 romfs_factory = nullptr;
477 CreateFactories(vfs, false); 477 CreateFactories(*system.GetFilesystem(), false);
478 std::make_shared<FSP_LDR>()->InstallAsService(service_manager); 478 std::make_shared<FSP_LDR>()->InstallAsService(system.ServiceManager());
479 std::make_shared<FSP_PR>()->InstallAsService(service_manager); 479 std::make_shared<FSP_PR>()->InstallAsService(system.ServiceManager());
480 std::make_shared<FSP_SRV>()->InstallAsService(service_manager); 480 std::make_shared<FSP_SRV>(system.GetReporter())->InstallAsService(system.ServiceManager());
481} 481}
482 482
483} // namespace Service::FileSystem 483} // namespace Service::FileSystem
diff --git a/src/core/hle/service/filesystem/filesystem.h b/src/core/hle/service/filesystem/filesystem.h
index 6481f237c..3849dd89e 100644
--- a/src/core/hle/service/filesystem/filesystem.h
+++ b/src/core/hle/service/filesystem/filesystem.h
@@ -65,7 +65,7 @@ FileSys::VirtualDir GetModificationDumpRoot(u64 title_id);
65// above is called. 65// above is called.
66void CreateFactories(FileSys::VfsFilesystem& vfs, bool overwrite = true); 66void CreateFactories(FileSys::VfsFilesystem& vfs, bool overwrite = true);
67 67
68void InstallInterfaces(SM::ServiceManager& service_manager, FileSys::VfsFilesystem& vfs); 68void InstallInterfaces(Core::System& system);
69 69
70// A class that wraps a VfsDirectory with methods that return ResultVal and ResultCode instead of 70// A class that wraps a VfsDirectory with methods that return ResultVal and ResultCode instead of
71// pointers and booleans. This makes using a VfsDirectory with switch services much easier and 71// pointers and booleans. This makes using a VfsDirectory with switch services much easier and
diff --git a/src/core/hle/service/filesystem/fsp_srv.cpp b/src/core/hle/service/filesystem/fsp_srv.cpp
index e7df8fd98..d3cd46a9b 100644
--- a/src/core/hle/service/filesystem/fsp_srv.cpp
+++ b/src/core/hle/service/filesystem/fsp_srv.cpp
@@ -26,6 +26,7 @@
26#include "core/hle/kernel/process.h" 26#include "core/hle/kernel/process.h"
27#include "core/hle/service/filesystem/filesystem.h" 27#include "core/hle/service/filesystem/filesystem.h"
28#include "core/hle/service/filesystem/fsp_srv.h" 28#include "core/hle/service/filesystem/fsp_srv.h"
29#include "core/reporter.h"
29 30
30namespace Service::FileSystem { 31namespace Service::FileSystem {
31 32
@@ -613,7 +614,7 @@ private:
613 u64 next_entry_index = 0; 614 u64 next_entry_index = 0;
614}; 615};
615 616
616FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") { 617FSP_SRV::FSP_SRV(const Core::Reporter& reporter) : ServiceFramework("fsp-srv"), reporter(reporter) {
617 // clang-format off 618 // clang-format off
618 static const FunctionInfo functions[] = { 619 static const FunctionInfo functions[] = {
619 {0, nullptr, "OpenFileSystem"}, 620 {0, nullptr, "OpenFileSystem"},
@@ -710,14 +711,14 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") {
710 {1001, nullptr, "SetSaveDataSize"}, 711 {1001, nullptr, "SetSaveDataSize"},
711 {1002, nullptr, "SetSaveDataRootPath"}, 712 {1002, nullptr, "SetSaveDataRootPath"},
712 {1003, nullptr, "DisableAutoSaveDataCreation"}, 713 {1003, nullptr, "DisableAutoSaveDataCreation"},
713 {1004, nullptr, "SetGlobalAccessLogMode"}, 714 {1004, &FSP_SRV::SetGlobalAccessLogMode, "SetGlobalAccessLogMode"},
714 {1005, &FSP_SRV::GetGlobalAccessLogMode, "GetGlobalAccessLogMode"}, 715 {1005, &FSP_SRV::GetGlobalAccessLogMode, "GetGlobalAccessLogMode"},
715 {1006, nullptr, "OutputAccessLogToSdCard"}, 716 {1006, &FSP_SRV::OutputAccessLogToSdCard, "OutputAccessLogToSdCard"},
716 {1007, nullptr, "RegisterUpdatePartition"}, 717 {1007, nullptr, "RegisterUpdatePartition"},
717 {1008, nullptr, "OpenRegisteredUpdatePartition"}, 718 {1008, nullptr, "OpenRegisteredUpdatePartition"},
718 {1009, nullptr, "GetAndClearMemoryReportInfo"}, 719 {1009, nullptr, "GetAndClearMemoryReportInfo"},
719 {1010, nullptr, "SetDataStorageRedirectTarget"}, 720 {1010, nullptr, "SetDataStorageRedirectTarget"},
720 {1011, nullptr, "OutputAccessLogToSdCard2"}, 721 {1011, &FSP_SRV::GetAccessLogVersionInfo, "GetAccessLogVersionInfo"},
721 {1100, nullptr, "OverrideSaveDataTransferTokenSignVerificationKey"}, 722 {1100, nullptr, "OverrideSaveDataTransferTokenSignVerificationKey"},
722 {1110, nullptr, "CorruptSaveDataFileSystemBySaveDataSpaceId2"}, 723 {1110, nullptr, "CorruptSaveDataFileSystemBySaveDataSpaceId2"},
723 {1200, nullptr, "OpenMultiCommitManager"}, 724 {1200, nullptr, "OpenMultiCommitManager"},
@@ -814,21 +815,22 @@ void FSP_SRV::OpenSaveDataInfoReaderBySaveDataSpaceId(Kernel::HLERequestContext&
814 rb.PushIpcInterface<ISaveDataInfoReader>(std::make_shared<ISaveDataInfoReader>(space)); 815 rb.PushIpcInterface<ISaveDataInfoReader>(std::make_shared<ISaveDataInfoReader>(space));
815} 816}
816 817
817void FSP_SRV::GetGlobalAccessLogMode(Kernel::HLERequestContext& ctx) { 818void FSP_SRV::SetGlobalAccessLogMode(Kernel::HLERequestContext& ctx) {
818 LOG_WARNING(Service_FS, "(STUBBED) called"); 819 IPC::RequestParser rp{ctx};
820 log_mode = rp.PopEnum<LogMode>();
819 821
820 enum class LogMode : u32 { 822 LOG_DEBUG(Service_FS, "called, log_mode={:08X}", static_cast<u32>(log_mode));
821 Off, 823
822 Log, 824 IPC::ResponseBuilder rb{ctx, 2};
823 RedirectToSdCard, 825 rb.Push(RESULT_SUCCESS);
824 LogToSdCard = Log | RedirectToSdCard, 826}
825 }; 827
828void FSP_SRV::GetGlobalAccessLogMode(Kernel::HLERequestContext& ctx) {
829 LOG_DEBUG(Service_FS, "called");
826 830
827 // Given we always want to receive logging information,
828 // we always specify logging as enabled.
829 IPC::ResponseBuilder rb{ctx, 3}; 831 IPC::ResponseBuilder rb{ctx, 3};
830 rb.Push(RESULT_SUCCESS); 832 rb.Push(RESULT_SUCCESS);
831 rb.PushEnum(LogMode::Log); 833 rb.PushEnum(log_mode);
832} 834}
833 835
834void FSP_SRV::OpenDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx) { 836void FSP_SRV::OpenDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx) {
@@ -902,4 +904,26 @@ void FSP_SRV::OpenPatchDataStorageByCurrentProcess(Kernel::HLERequestContext& ct
902 rb.Push(FileSys::ERROR_ENTITY_NOT_FOUND); 904 rb.Push(FileSys::ERROR_ENTITY_NOT_FOUND);
903} 905}
904 906
907void FSP_SRV::OutputAccessLogToSdCard(Kernel::HLERequestContext& ctx) {
908 const auto raw = ctx.ReadBuffer();
909 auto log = Common::StringFromFixedZeroTerminatedBuffer(
910 reinterpret_cast<const char*>(raw.data()), raw.size());
911
912 LOG_DEBUG(Service_FS, "called, log='{}'", log);
913
914 reporter.SaveFilesystemAccessReport(log_mode, std::move(log));
915
916 IPC::ResponseBuilder rb{ctx, 2};
917 rb.Push(RESULT_SUCCESS);
918}
919
920void FSP_SRV::GetAccessLogVersionInfo(Kernel::HLERequestContext& ctx) {
921 LOG_DEBUG(Service_FS, "called");
922
923 IPC::ResponseBuilder rb{ctx, 4};
924 rb.Push(RESULT_SUCCESS);
925 rb.PushEnum(AccessLogVersion::Latest);
926 rb.Push(access_log_program_index);
927}
928
905} // namespace Service::FileSystem 929} // namespace Service::FileSystem
diff --git a/src/core/hle/service/filesystem/fsp_srv.h b/src/core/hle/service/filesystem/fsp_srv.h
index d7572ba7a..b5486a193 100644
--- a/src/core/hle/service/filesystem/fsp_srv.h
+++ b/src/core/hle/service/filesystem/fsp_srv.h
@@ -7,15 +7,32 @@
7#include <memory> 7#include <memory>
8#include "core/hle/service/service.h" 8#include "core/hle/service/service.h"
9 9
10namespace Core {
11class Reporter;
12}
13
10namespace FileSys { 14namespace FileSys {
11class FileSystemBackend; 15class FileSystemBackend;
12} 16}
13 17
14namespace Service::FileSystem { 18namespace Service::FileSystem {
15 19
20enum class AccessLogVersion : u32 {
21 V7_0_0 = 2,
22
23 Latest = V7_0_0,
24};
25
26enum class LogMode : u32 {
27 Off,
28 Log,
29 RedirectToSdCard,
30 LogToSdCard = Log | RedirectToSdCard,
31};
32
16class FSP_SRV final : public ServiceFramework<FSP_SRV> { 33class FSP_SRV final : public ServiceFramework<FSP_SRV> {
17public: 34public:
18 explicit FSP_SRV(); 35 explicit FSP_SRV(const Core::Reporter& reporter);
19 ~FSP_SRV() override; 36 ~FSP_SRV() override;
20 37
21private: 38private:
@@ -26,13 +43,20 @@ private:
26 void OpenSaveDataFileSystem(Kernel::HLERequestContext& ctx); 43 void OpenSaveDataFileSystem(Kernel::HLERequestContext& ctx);
27 void OpenReadOnlySaveDataFileSystem(Kernel::HLERequestContext& ctx); 44 void OpenReadOnlySaveDataFileSystem(Kernel::HLERequestContext& ctx);
28 void OpenSaveDataInfoReaderBySaveDataSpaceId(Kernel::HLERequestContext& ctx); 45 void OpenSaveDataInfoReaderBySaveDataSpaceId(Kernel::HLERequestContext& ctx);
46 void SetGlobalAccessLogMode(Kernel::HLERequestContext& ctx);
29 void GetGlobalAccessLogMode(Kernel::HLERequestContext& ctx); 47 void GetGlobalAccessLogMode(Kernel::HLERequestContext& ctx);
30 void OpenDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx); 48 void OpenDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx);
31 void OpenDataStorageByDataId(Kernel::HLERequestContext& ctx); 49 void OpenDataStorageByDataId(Kernel::HLERequestContext& ctx);
32 void OpenPatchDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx); 50 void OpenPatchDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx);
51 void OutputAccessLogToSdCard(Kernel::HLERequestContext& ctx);
52 void GetAccessLogVersionInfo(Kernel::HLERequestContext& ctx);
33 53
34 FileSys::VirtualFile romfs; 54 FileSys::VirtualFile romfs;
35 u64 current_process_id = 0; 55 u64 current_process_id = 0;
56 u32 access_log_program_index = 0;
57 LogMode log_mode = LogMode::LogToSdCard;
58
59 const Core::Reporter& reporter;
36}; 60};
37 61
38} // namespace Service::FileSystem 62} // namespace Service::FileSystem
diff --git a/src/core/hle/service/friend/friend.cpp b/src/core/hle/service/friend/friend.cpp
index dec541f2e..d1ec12ef9 100644
--- a/src/core/hle/service/friend/friend.cpp
+++ b/src/core/hle/service/friend/friend.cpp
@@ -22,7 +22,7 @@ public:
22 {0, nullptr, "GetCompletionEvent"}, 22 {0, nullptr, "GetCompletionEvent"},
23 {1, nullptr, "Cancel"}, 23 {1, nullptr, "Cancel"},
24 {10100, nullptr, "GetFriendListIds"}, 24 {10100, nullptr, "GetFriendListIds"},
25 {10101, nullptr, "GetFriendList"}, 25 {10101, &IFriendService::GetFriendList, "GetFriendList"},
26 {10102, nullptr, "UpdateFriendInfo"}, 26 {10102, nullptr, "UpdateFriendInfo"},
27 {10110, nullptr, "GetFriendProfileImage"}, 27 {10110, nullptr, "GetFriendProfileImage"},
28 {10200, nullptr, "SendFriendRequestForApplication"}, 28 {10200, nullptr, "SendFriendRequestForApplication"},
@@ -99,6 +99,23 @@ public:
99 } 99 }
100 100
101private: 101private:
102 enum class PresenceFilter : u32 {
103 None = 0,
104 Online = 1,
105 OnlinePlay = 2,
106 OnlineOrOnlinePlay = 3,
107 };
108
109 struct SizedFriendFilter {
110 PresenceFilter presence;
111 u8 is_favorite;
112 u8 same_app;
113 u8 same_app_played;
114 u8 arbitary_app_played;
115 u64 group_id;
116 };
117 static_assert(sizeof(SizedFriendFilter) == 0x10, "SizedFriendFilter is an invalid size");
118
102 void DeclareCloseOnlinePlaySession(Kernel::HLERequestContext& ctx) { 119 void DeclareCloseOnlinePlaySession(Kernel::HLERequestContext& ctx) {
103 // Stub used by Splatoon 2 120 // Stub used by Splatoon 2
104 LOG_WARNING(Service_ACC, "(STUBBED) called"); 121 LOG_WARNING(Service_ACC, "(STUBBED) called");
@@ -112,6 +129,22 @@ private:
112 IPC::ResponseBuilder rb{ctx, 2}; 129 IPC::ResponseBuilder rb{ctx, 2};
113 rb.Push(RESULT_SUCCESS); 130 rb.Push(RESULT_SUCCESS);
114 } 131 }
132
133 void GetFriendList(Kernel::HLERequestContext& ctx) {
134 IPC::RequestParser rp{ctx};
135 const auto friend_offset = rp.Pop<u32>();
136 const auto uuid = rp.PopRaw<Common::UUID>();
137 [[maybe_unused]] const auto filter = rp.PopRaw<SizedFriendFilter>();
138 const auto pid = rp.Pop<u64>();
139 LOG_WARNING(Service_ACC, "(STUBBED) called, offset={}, uuid={}, pid={}", friend_offset,
140 uuid.Format(), pid);
141
142 IPC::ResponseBuilder rb{ctx, 3};
143 rb.Push(RESULT_SUCCESS);
144
145 rb.Push<u32>(0); // Friend count
146 // TODO(ogniK): Return a buffer of u64s which are the "NetworkServiceAccountId"
147 }
115}; 148};
116 149
117class INotificationService final : public ServiceFramework<INotificationService> { 150class INotificationService final : public ServiceFramework<INotificationService> {
diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp
index fdd6d79a2..1e81f776f 100644
--- a/src/core/hle/service/hid/controllers/npad.cpp
+++ b/src/core/hle/service/hid/controllers/npad.cpp
@@ -548,6 +548,37 @@ void Controller_NPad::DisconnectNPad(u32 npad_id) {
548 connected_controllers[NPadIdToIndex(npad_id)].is_connected = false; 548 connected_controllers[NPadIdToIndex(npad_id)].is_connected = false;
549} 549}
550 550
551void Controller_NPad::StartLRAssignmentMode() {
552 // Nothing internally is used for lr assignment mode. Since we have the ability to set the
553 // controller types from boot, it doesn't really matter about showing a selection screen
554 is_in_lr_assignment_mode = true;
555}
556
557void Controller_NPad::StopLRAssignmentMode() {
558 is_in_lr_assignment_mode = false;
559}
560
561bool Controller_NPad::SwapNpadAssignment(u32 npad_id_1, u32 npad_id_2) {
562 if (npad_id_1 == NPAD_HANDHELD || npad_id_2 == NPAD_HANDHELD || npad_id_1 == NPAD_UNKNOWN ||
563 npad_id_2 == NPAD_UNKNOWN) {
564 return true;
565 }
566 const auto npad_index_1 = NPadIdToIndex(npad_id_1);
567 const auto npad_index_2 = NPadIdToIndex(npad_id_2);
568
569 if (!IsControllerSupported(connected_controllers[npad_index_1].type) ||
570 !IsControllerSupported(connected_controllers[npad_index_2].type)) {
571 return false;
572 }
573
574 std::swap(connected_controllers[npad_index_1].type, connected_controllers[npad_index_2].type);
575
576 InitNewlyAddedControler(npad_index_1);
577 InitNewlyAddedControler(npad_index_2);
578
579 return true;
580}
581
551bool Controller_NPad::IsControllerSupported(NPadControllerType controller) { 582bool Controller_NPad::IsControllerSupported(NPadControllerType controller) {
552 if (controller == NPadControllerType::Handheld) { 583 if (controller == NPadControllerType::Handheld) {
553 // Handheld is not even a supported type, lets stop here 584 // Handheld is not even a supported type, lets stop here
diff --git a/src/core/hle/service/hid/controllers/npad.h b/src/core/hle/service/hid/controllers/npad.h
index 4ff50b3cd..4b6c1083f 100644
--- a/src/core/hle/service/hid/controllers/npad.h
+++ b/src/core/hle/service/hid/controllers/npad.h
@@ -124,6 +124,10 @@ public:
124 void ConnectAllDisconnectedControllers(); 124 void ConnectAllDisconnectedControllers();
125 void ClearAllControllers(); 125 void ClearAllControllers();
126 126
127 void StartLRAssignmentMode();
128 void StopLRAssignmentMode();
129 bool SwapNpadAssignment(u32 npad_id_1, u32 npad_id_2);
130
127 // Logical OR for all buttons presses on all controllers 131 // Logical OR for all buttons presses on all controllers
128 // Specifically for cheat engine and other features. 132 // Specifically for cheat engine and other features.
129 u32 GetAndResetPressState(); 133 u32 GetAndResetPressState();
@@ -321,5 +325,6 @@ private:
321 void RequestPadStateUpdate(u32 npad_id); 325 void RequestPadStateUpdate(u32 npad_id);
322 std::array<ControllerPad, 10> npad_pad_states{}; 326 std::array<ControllerPad, 10> npad_pad_states{};
323 bool IsControllerSupported(NPadControllerType controller); 327 bool IsControllerSupported(NPadControllerType controller);
328 bool is_in_lr_assignment_mode{false};
324}; 329};
325} // namespace Service::HID 330} // namespace Service::HID
diff --git a/src/core/hle/service/hid/errors.h b/src/core/hle/service/hid/errors.h
new file mode 100644
index 000000000..3583642e7
--- /dev/null
+++ b/src/core/hle/service/hid/errors.h
@@ -0,0 +1,13 @@
1// Copyright 2019 yuzu emulator team
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "core/hle/result.h"
8
9namespace Service::HID {
10
11constexpr ResultCode ERR_NPAD_NOT_CONNECTED{ErrorModule::HID, 710};
12
13} // namespace Service::HID
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index a4ad95d96..0bd24b8eb 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -16,6 +16,7 @@
16#include "core/hle/kernel/readable_event.h" 16#include "core/hle/kernel/readable_event.h"
17#include "core/hle/kernel/shared_memory.h" 17#include "core/hle/kernel/shared_memory.h"
18#include "core/hle/kernel/writable_event.h" 18#include "core/hle/kernel/writable_event.h"
19#include "core/hle/service/hid/errors.h"
19#include "core/hle/service/hid/hid.h" 20#include "core/hle/service/hid/hid.h"
20#include "core/hle/service/hid/irs.h" 21#include "core/hle/service/hid/irs.h"
21#include "core/hle/service/hid/xcd.h" 22#include "core/hle/service/hid/xcd.h"
@@ -202,11 +203,11 @@ Hid::Hid() : ServiceFramework("hid") {
202 {123, nullptr, "SetNpadJoyAssignmentModeSingleByDefault"}, 203 {123, nullptr, "SetNpadJoyAssignmentModeSingleByDefault"},
203 {124, &Hid::SetNpadJoyAssignmentModeDual, "SetNpadJoyAssignmentModeDual"}, 204 {124, &Hid::SetNpadJoyAssignmentModeDual, "SetNpadJoyAssignmentModeDual"},
204 {125, &Hid::MergeSingleJoyAsDualJoy, "MergeSingleJoyAsDualJoy"}, 205 {125, &Hid::MergeSingleJoyAsDualJoy, "MergeSingleJoyAsDualJoy"},
205 {126, nullptr, "StartLrAssignmentMode"}, 206 {126, &Hid::StartLrAssignmentMode, "StartLrAssignmentMode"},
206 {127, nullptr, "StopLrAssignmentMode"}, 207 {127, &Hid::StopLrAssignmentMode, "StopLrAssignmentMode"},
207 {128, &Hid::SetNpadHandheldActivationMode, "SetNpadHandheldActivationMode"}, 208 {128, &Hid::SetNpadHandheldActivationMode, "SetNpadHandheldActivationMode"},
208 {129, nullptr, "GetNpadHandheldActivationMode"}, 209 {129, nullptr, "GetNpadHandheldActivationMode"},
209 {130, nullptr, "SwapNpadAssignment"}, 210 {130, &Hid::SwapNpadAssignment, "SwapNpadAssignment"},
210 {131, nullptr, "IsUnintendedHomeButtonInputProtectionEnabled"}, 211 {131, nullptr, "IsUnintendedHomeButtonInputProtectionEnabled"},
211 {132, nullptr, "EnableUnintendedHomeButtonInputProtection"}, 212 {132, nullptr, "EnableUnintendedHomeButtonInputProtection"},
212 {133, nullptr, "SetNpadJoyAssignmentModeSingleWithDestination"}, 213 {133, nullptr, "SetNpadJoyAssignmentModeSingleWithDestination"},
@@ -733,6 +734,49 @@ void Hid::SetPalmaBoostMode(Kernel::HLERequestContext& ctx) {
733 rb.Push(RESULT_SUCCESS); 734 rb.Push(RESULT_SUCCESS);
734} 735}
735 736
737void Hid::StartLrAssignmentMode(Kernel::HLERequestContext& ctx) {
738 IPC::RequestParser rp{ctx};
739 const auto applet_resource_user_id{rp.Pop<u64>()};
740
741 LOG_DEBUG(Service_HID, "called, applet_resource_user_id={}", applet_resource_user_id);
742 auto& controller = applet_resource->GetController<Controller_NPad>(HidController::NPad);
743 controller.StartLRAssignmentMode();
744
745 IPC::ResponseBuilder rb{ctx, 2};
746 rb.Push(RESULT_SUCCESS);
747}
748
749void Hid::StopLrAssignmentMode(Kernel::HLERequestContext& ctx) {
750 IPC::RequestParser rp{ctx};
751 const auto applet_resource_user_id{rp.Pop<u64>()};
752
753 LOG_DEBUG(Service_HID, "called, applet_resource_user_id={}", applet_resource_user_id);
754 auto& controller = applet_resource->GetController<Controller_NPad>(HidController::NPad);
755 controller.StopLRAssignmentMode();
756
757 IPC::ResponseBuilder rb{ctx, 2};
758 rb.Push(RESULT_SUCCESS);
759}
760
761void Hid::SwapNpadAssignment(Kernel::HLERequestContext& ctx) {
762 IPC::RequestParser rp{ctx};
763 const auto npad_1{rp.Pop<u32>()};
764 const auto npad_2{rp.Pop<u32>()};
765 const auto applet_resource_user_id{rp.Pop<u64>()};
766
767 LOG_DEBUG(Service_HID, "called, applet_resource_user_id={}, npad_1={}, npad_2={}",
768 applet_resource_user_id, npad_1, npad_2);
769
770 auto& controller = applet_resource->GetController<Controller_NPad>(HidController::NPad);
771 IPC::ResponseBuilder rb{ctx, 2};
772 if (controller.SwapNpadAssignment(npad_1, npad_2)) {
773 rb.Push(RESULT_SUCCESS);
774 } else {
775 LOG_ERROR(Service_HID, "Npads are not connected!");
776 rb.Push(ERR_NPAD_NOT_CONNECTED);
777 }
778}
779
736class HidDbg final : public ServiceFramework<HidDbg> { 780class HidDbg final : public ServiceFramework<HidDbg> {
737public: 781public:
738 explicit HidDbg() : ServiceFramework{"hid:dbg"} { 782 explicit HidDbg() : ServiceFramework{"hid:dbg"} {
diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h
index d3660cad2..28260ef1b 100644
--- a/src/core/hle/service/hid/hid.h
+++ b/src/core/hle/service/hid/hid.h
@@ -119,6 +119,9 @@ private:
119 void StopSixAxisSensor(Kernel::HLERequestContext& ctx); 119 void StopSixAxisSensor(Kernel::HLERequestContext& ctx);
120 void SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx); 120 void SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx);
121 void SetPalmaBoostMode(Kernel::HLERequestContext& ctx); 121 void SetPalmaBoostMode(Kernel::HLERequestContext& ctx);
122 void StartLrAssignmentMode(Kernel::HLERequestContext& ctx);
123 void StopLrAssignmentMode(Kernel::HLERequestContext& ctx);
124 void SwapNpadAssignment(Kernel::HLERequestContext& ctx);
122 125
123 std::shared_ptr<IAppletResource> applet_resource; 126 std::shared_ptr<IAppletResource> applet_resource;
124}; 127};
diff --git a/src/core/hle/service/ldr/ldr.cpp b/src/core/hle/service/ldr/ldr.cpp
index b839303ac..8ddad8682 100644
--- a/src/core/hle/service/ldr/ldr.cpp
+++ b/src/core/hle/service/ldr/ldr.cpp
@@ -345,14 +345,16 @@ public:
345 vm_manager 345 vm_manager
346 .MirrorMemory(*map_address, nro_address, nro_size, Kernel::MemoryState::ModuleCode) 346 .MirrorMemory(*map_address, nro_address, nro_size, Kernel::MemoryState::ModuleCode)
347 .IsSuccess()); 347 .IsSuccess());
348 ASSERT(vm_manager.UnmapRange(nro_address, nro_size).IsSuccess()); 348 ASSERT(vm_manager.ReprotectRange(nro_address, nro_size, Kernel::VMAPermission::None)
349 .IsSuccess());
349 350
350 if (bss_size > 0) { 351 if (bss_size > 0) {
351 ASSERT(vm_manager 352 ASSERT(vm_manager
352 .MirrorMemory(*map_address + nro_size, bss_address, bss_size, 353 .MirrorMemory(*map_address + nro_size, bss_address, bss_size,
353 Kernel::MemoryState::ModuleCode) 354 Kernel::MemoryState::ModuleCode)
354 .IsSuccess()); 355 .IsSuccess());
355 ASSERT(vm_manager.UnmapRange(bss_address, bss_size).IsSuccess()); 356 ASSERT(vm_manager.ReprotectRange(bss_address, bss_size, Kernel::VMAPermission::None)
357 .IsSuccess());
356 } 358 }
357 359
358 vm_manager.ReprotectRange(*map_address, header.text_size, 360 vm_manager.ReprotectRange(*map_address, header.text_size,
@@ -364,7 +366,8 @@ public:
364 366
365 Core::System::GetInstance().InvalidateCpuInstructionCaches(); 367 Core::System::GetInstance().InvalidateCpuInstructionCaches();
366 368
367 nro.insert_or_assign(*map_address, NROInfo{hash, nro_size + bss_size}); 369 nro.insert_or_assign(*map_address,
370 NROInfo{hash, nro_address, nro_size, bss_address, bss_size});
368 371
369 IPC::ResponseBuilder rb{ctx, 4}; 372 IPC::ResponseBuilder rb{ctx, 4};
370 rb.Push(RESULT_SUCCESS); 373 rb.Push(RESULT_SUCCESS);
@@ -409,9 +412,23 @@ public:
409 } 412 }
410 413
411 auto& vm_manager = Core::CurrentProcess()->VMManager(); 414 auto& vm_manager = Core::CurrentProcess()->VMManager();
412 const auto& nro_size = iter->second.size; 415 const auto& nro_info = iter->second;
413 416
414 ASSERT(vm_manager.UnmapRange(nro_address, nro_size).IsSuccess()); 417 // Unmap the mirrored memory
418 ASSERT(
419 vm_manager.UnmapRange(nro_address, nro_info.nro_size + nro_info.bss_size).IsSuccess());
420
421 // Reprotect the source memory
422 ASSERT(vm_manager
423 .ReprotectRange(nro_info.nro_address, nro_info.nro_size,
424 Kernel::VMAPermission::ReadWrite)
425 .IsSuccess());
426 if (nro_info.bss_size > 0) {
427 ASSERT(vm_manager
428 .ReprotectRange(nro_info.bss_address, nro_info.bss_size,
429 Kernel::VMAPermission::ReadWrite)
430 .IsSuccess());
431 }
415 432
416 Core::System::GetInstance().InvalidateCpuInstructionCaches(); 433 Core::System::GetInstance().InvalidateCpuInstructionCaches();
417 434
@@ -473,7 +490,10 @@ private:
473 490
474 struct NROInfo { 491 struct NROInfo {
475 SHA256Hash hash; 492 SHA256Hash hash;
476 u64 size; 493 VAddr nro_address;
494 u64 nro_size;
495 VAddr bss_address;
496 u64 bss_size;
477 }; 497 };
478 498
479 bool initialized = false; 499 bool initialized = false;
diff --git a/src/core/hle/service/mii/mii.cpp b/src/core/hle/service/mii/mii.cpp
index ce84e25ed..0b3923ad9 100644
--- a/src/core/hle/service/mii/mii.cpp
+++ b/src/core/hle/service/mii/mii.cpp
@@ -48,7 +48,7 @@ public:
48 {19, nullptr, "Export"}, 48 {19, nullptr, "Export"},
49 {20, nullptr, "IsBrokenDatabaseWithClearFlag"}, 49 {20, nullptr, "IsBrokenDatabaseWithClearFlag"},
50 {21, &IDatabaseService::GetIndex, "GetIndex"}, 50 {21, &IDatabaseService::GetIndex, "GetIndex"},
51 {22, nullptr, "SetInterfaceVersion"}, 51 {22, &IDatabaseService::SetInterfaceVersion, "SetInterfaceVersion"},
52 {23, nullptr, "Convert"}, 52 {23, nullptr, "Convert"},
53 }; 53 };
54 // clang-format on 54 // clang-format on
@@ -350,8 +350,22 @@ private:
350 rb.Push(index); 350 rb.Push(index);
351 } 351 }
352 352
353 void SetInterfaceVersion(Kernel::HLERequestContext& ctx) {
354 IPC::RequestParser rp{ctx};
355 current_interface_version = rp.PopRaw<u32>();
356
357 LOG_DEBUG(Service_Mii, "called, interface_version={:08X}", current_interface_version);
358
359 UNIMPLEMENTED_IF(current_interface_version != 1);
360
361 IPC::ResponseBuilder rb{ctx, 2};
362 rb.Push(RESULT_SUCCESS);
363 }
364
353 MiiManager db; 365 MiiManager db;
354 366
367 u32 current_interface_version = 0;
368
355 // Last read offsets of Get functions 369 // Last read offsets of Get functions
356 std::array<u32, 4> offsets{}; 370 std::array<u32, 4> offsets{};
357}; 371};
diff --git a/src/core/hle/service/pm/pm.cpp b/src/core/hle/service/pm/pm.cpp
index ebcc41a43..fe6b5f798 100644
--- a/src/core/hle/service/pm/pm.cpp
+++ b/src/core/hle/service/pm/pm.cpp
@@ -3,11 +3,44 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "core/hle/ipc_helpers.h" 5#include "core/hle/ipc_helpers.h"
6#include "core/hle/kernel/kernel.h"
7#include "core/hle/kernel/process.h"
6#include "core/hle/service/pm/pm.h" 8#include "core/hle/service/pm/pm.h"
7#include "core/hle/service/service.h" 9#include "core/hle/service/service.h"
8 10
9namespace Service::PM { 11namespace Service::PM {
10 12
13namespace {
14
15constexpr ResultCode ERROR_PROCESS_NOT_FOUND{ErrorModule::PM, 1};
16
17constexpr u64 NO_PROCESS_FOUND_PID{0};
18
19std::optional<Kernel::SharedPtr<Kernel::Process>> SearchProcessList(
20 const std::vector<Kernel::SharedPtr<Kernel::Process>>& process_list,
21 std::function<bool(const Kernel::SharedPtr<Kernel::Process>&)> predicate) {
22 const auto iter = std::find_if(process_list.begin(), process_list.end(), predicate);
23
24 if (iter == process_list.end()) {
25 return std::nullopt;
26 }
27
28 return *iter;
29}
30
31void GetApplicationPidGeneric(Kernel::HLERequestContext& ctx,
32 const std::vector<Kernel::SharedPtr<Kernel::Process>>& process_list) {
33 const auto process = SearchProcessList(process_list, [](const auto& process) {
34 return process->GetProcessID() == Kernel::Process::ProcessIDMin;
35 });
36
37 IPC::ResponseBuilder rb{ctx, 4};
38 rb.Push(RESULT_SUCCESS);
39 rb.Push(process.has_value() ? (*process)->GetProcessID() : NO_PROCESS_FOUND_PID);
40}
41
42} // Anonymous namespace
43
11class BootMode final : public ServiceFramework<BootMode> { 44class BootMode final : public ServiceFramework<BootMode> {
12public: 45public:
13 explicit BootMode() : ServiceFramework{"pm:bm"} { 46 explicit BootMode() : ServiceFramework{"pm:bm"} {
@@ -41,14 +74,15 @@ private:
41 74
42class DebugMonitor final : public ServiceFramework<DebugMonitor> { 75class DebugMonitor final : public ServiceFramework<DebugMonitor> {
43public: 76public:
44 explicit DebugMonitor() : ServiceFramework{"pm:dmnt"} { 77 explicit DebugMonitor(const Kernel::KernelCore& kernel)
78 : ServiceFramework{"pm:dmnt"}, kernel(kernel) {
45 // clang-format off 79 // clang-format off
46 static const FunctionInfo functions[] = { 80 static const FunctionInfo functions[] = {
47 {0, nullptr, "GetDebugProcesses"}, 81 {0, nullptr, "GetDebugProcesses"},
48 {1, nullptr, "StartDebugProcess"}, 82 {1, nullptr, "StartDebugProcess"},
49 {2, nullptr, "GetTitlePid"}, 83 {2, &DebugMonitor::GetTitlePid, "GetTitlePid"},
50 {3, nullptr, "EnableDebugForTitleId"}, 84 {3, nullptr, "EnableDebugForTitleId"},
51 {4, nullptr, "GetApplicationPid"}, 85 {4, &DebugMonitor::GetApplicationPid, "GetApplicationPid"},
52 {5, nullptr, "EnableDebugForApplication"}, 86 {5, nullptr, "EnableDebugForApplication"},
53 {6, nullptr, "DisableDebug"}, 87 {6, nullptr, "DisableDebug"},
54 }; 88 };
@@ -56,21 +90,77 @@ public:
56 90
57 RegisterHandlers(functions); 91 RegisterHandlers(functions);
58 } 92 }
93
94private:
95 void GetTitlePid(Kernel::HLERequestContext& ctx) {
96 IPC::RequestParser rp{ctx};
97 const auto title_id = rp.PopRaw<u64>();
98
99 LOG_DEBUG(Service_PM, "called, title_id={:016X}", title_id);
100
101 const auto process =
102 SearchProcessList(kernel.GetProcessList(), [title_id](const auto& process) {
103 return process->GetTitleID() == title_id;
104 });
105
106 if (!process.has_value()) {
107 IPC::ResponseBuilder rb{ctx, 2};
108 rb.Push(ERROR_PROCESS_NOT_FOUND);
109 return;
110 }
111
112 IPC::ResponseBuilder rb{ctx, 4};
113 rb.Push(RESULT_SUCCESS);
114 rb.Push((*process)->GetProcessID());
115 }
116
117 void GetApplicationPid(Kernel::HLERequestContext& ctx) {
118 LOG_DEBUG(Service_PM, "called");
119 GetApplicationPidGeneric(ctx, kernel.GetProcessList());
120 }
121
122 const Kernel::KernelCore& kernel;
59}; 123};
60 124
61class Info final : public ServiceFramework<Info> { 125class Info final : public ServiceFramework<Info> {
62public: 126public:
63 explicit Info() : ServiceFramework{"pm:info"} { 127 explicit Info(const std::vector<Kernel::SharedPtr<Kernel::Process>>& process_list)
128 : ServiceFramework{"pm:info"}, process_list(process_list) {
64 static const FunctionInfo functions[] = { 129 static const FunctionInfo functions[] = {
65 {0, nullptr, "GetTitleId"}, 130 {0, &Info::GetTitleId, "GetTitleId"},
66 }; 131 };
67 RegisterHandlers(functions); 132 RegisterHandlers(functions);
68 } 133 }
134
135private:
136 void GetTitleId(Kernel::HLERequestContext& ctx) {
137 IPC::RequestParser rp{ctx};
138 const auto process_id = rp.PopRaw<u64>();
139
140 LOG_DEBUG(Service_PM, "called, process_id={:016X}", process_id);
141
142 const auto process = SearchProcessList(process_list, [process_id](const auto& process) {
143 return process->GetProcessID() == process_id;
144 });
145
146 if (!process.has_value()) {
147 IPC::ResponseBuilder rb{ctx, 2};
148 rb.Push(ERROR_PROCESS_NOT_FOUND);
149 return;
150 }
151
152 IPC::ResponseBuilder rb{ctx, 4};
153 rb.Push(RESULT_SUCCESS);
154 rb.Push((*process)->GetTitleID());
155 }
156
157 const std::vector<Kernel::SharedPtr<Kernel::Process>>& process_list;
69}; 158};
70 159
71class Shell final : public ServiceFramework<Shell> { 160class Shell final : public ServiceFramework<Shell> {
72public: 161public:
73 explicit Shell() : ServiceFramework{"pm:shell"} { 162 explicit Shell(const Kernel::KernelCore& kernel)
163 : ServiceFramework{"pm:shell"}, kernel(kernel) {
74 // clang-format off 164 // clang-format off
75 static const FunctionInfo functions[] = { 165 static const FunctionInfo functions[] = {
76 {0, nullptr, "LaunchProcess"}, 166 {0, nullptr, "LaunchProcess"},
@@ -79,21 +169,31 @@ public:
79 {3, nullptr, "GetProcessEventWaiter"}, 169 {3, nullptr, "GetProcessEventWaiter"},
80 {4, nullptr, "GetProcessEventType"}, 170 {4, nullptr, "GetProcessEventType"},
81 {5, nullptr, "NotifyBootFinished"}, 171 {5, nullptr, "NotifyBootFinished"},
82 {6, nullptr, "GetApplicationPid"}, 172 {6, &Shell::GetApplicationPid, "GetApplicationPid"},
83 {7, nullptr, "BoostSystemMemoryResourceLimit"}, 173 {7, nullptr, "BoostSystemMemoryResourceLimit"},
84 {8, nullptr, "EnableAdditionalSystemThreads"}, 174 {8, nullptr, "EnableAdditionalSystemThreads"},
175 {9, nullptr, "GetUnimplementedEventHandle"},
85 }; 176 };
86 // clang-format on 177 // clang-format on
87 178
88 RegisterHandlers(functions); 179 RegisterHandlers(functions);
89 } 180 }
181
182private:
183 void GetApplicationPid(Kernel::HLERequestContext& ctx) {
184 LOG_DEBUG(Service_PM, "called");
185 GetApplicationPidGeneric(ctx, kernel.GetProcessList());
186 }
187
188 const Kernel::KernelCore& kernel;
90}; 189};
91 190
92void InstallInterfaces(SM::ServiceManager& sm) { 191void InstallInterfaces(Core::System& system) {
93 std::make_shared<BootMode>()->InstallAsService(sm); 192 std::make_shared<BootMode>()->InstallAsService(system.ServiceManager());
94 std::make_shared<DebugMonitor>()->InstallAsService(sm); 193 std::make_shared<DebugMonitor>(system.Kernel())->InstallAsService(system.ServiceManager());
95 std::make_shared<Info>()->InstallAsService(sm); 194 std::make_shared<Info>(system.Kernel().GetProcessList())
96 std::make_shared<Shell>()->InstallAsService(sm); 195 ->InstallAsService(system.ServiceManager());
196 std::make_shared<Shell>(system.Kernel())->InstallAsService(system.ServiceManager());
97} 197}
98 198
99} // namespace Service::PM 199} // namespace Service::PM
diff --git a/src/core/hle/service/pm/pm.h b/src/core/hle/service/pm/pm.h
index cc8d3f215..852e7050c 100644
--- a/src/core/hle/service/pm/pm.h
+++ b/src/core/hle/service/pm/pm.h
@@ -4,8 +4,8 @@
4 4
5#pragma once 5#pragma once
6 6
7namespace Service::SM { 7namespace Core {
8class ServiceManager; 8class System;
9} 9}
10 10
11namespace Service::PM { 11namespace Service::PM {
@@ -16,6 +16,6 @@ enum class SystemBootMode {
16}; 16};
17 17
18/// Registers all PM services with the specified service manager. 18/// Registers all PM services with the specified service manager.
19void InstallInterfaces(SM::ServiceManager& service_manager); 19void InstallInterfaces(Core::System& system);
20 20
21} // namespace Service::PM 21} // namespace Service::PM
diff --git a/src/core/hle/service/service.cpp b/src/core/hle/service/service.cpp
index 5fc7d3cab..7eefd733f 100644
--- a/src/core/hle/service/service.cpp
+++ b/src/core/hle/service/service.cpp
@@ -195,8 +195,7 @@ ResultCode ServiceFrameworkBase::HandleSyncRequest(Kernel::HLERequestContext& co
195// Module interface 195// Module interface
196 196
197/// Initialize ServiceManager 197/// Initialize ServiceManager
198void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system, 198void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system) {
199 FileSys::VfsFilesystem& vfs) {
200 // NVFlinger needs to be accessed by several services like Vi and AppletOE so we instantiate it 199 // NVFlinger needs to be accessed by several services like Vi and AppletOE so we instantiate it
201 // here and pass it into the respective InstallInterfaces functions. 200 // here and pass it into the respective InstallInterfaces functions.
202 auto nv_flinger = std::make_shared<NVFlinger::NVFlinger>(system.CoreTiming()); 201 auto nv_flinger = std::make_shared<NVFlinger::NVFlinger>(system.CoreTiming());
@@ -206,7 +205,7 @@ void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system,
206 Account::InstallInterfaces(system); 205 Account::InstallInterfaces(system);
207 AM::InstallInterfaces(*sm, nv_flinger, system); 206 AM::InstallInterfaces(*sm, nv_flinger, system);
208 AOC::InstallInterfaces(*sm); 207 AOC::InstallInterfaces(*sm);
209 APM::InstallInterfaces(*sm); 208 APM::InstallInterfaces(system);
210 Audio::InstallInterfaces(*sm); 209 Audio::InstallInterfaces(*sm);
211 BCAT::InstallInterfaces(*sm); 210 BCAT::InstallInterfaces(*sm);
212 BPC::InstallInterfaces(*sm); 211 BPC::InstallInterfaces(*sm);
@@ -218,7 +217,7 @@ void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system,
218 EUPLD::InstallInterfaces(*sm); 217 EUPLD::InstallInterfaces(*sm);
219 Fatal::InstallInterfaces(*sm); 218 Fatal::InstallInterfaces(*sm);
220 FGM::InstallInterfaces(*sm); 219 FGM::InstallInterfaces(*sm);
221 FileSystem::InstallInterfaces(*sm, vfs); 220 FileSystem::InstallInterfaces(system);
222 Friend::InstallInterfaces(*sm); 221 Friend::InstallInterfaces(*sm);
223 Glue::InstallInterfaces(system); 222 Glue::InstallInterfaces(system);
224 GRC::InstallInterfaces(*sm); 223 GRC::InstallInterfaces(*sm);
@@ -242,7 +241,7 @@ void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system,
242 PCTL::InstallInterfaces(*sm); 241 PCTL::InstallInterfaces(*sm);
243 PCV::InstallInterfaces(*sm); 242 PCV::InstallInterfaces(*sm);
244 PlayReport::InstallInterfaces(*sm); 243 PlayReport::InstallInterfaces(*sm);
245 PM::InstallInterfaces(*sm); 244 PM::InstallInterfaces(system);
246 PSC::InstallInterfaces(*sm); 245 PSC::InstallInterfaces(*sm);
247 PSM::InstallInterfaces(*sm); 246 PSM::InstallInterfaces(*sm);
248 Set::InstallInterfaces(*sm); 247 Set::InstallInterfaces(*sm);
diff --git a/src/core/hle/service/service.h b/src/core/hle/service/service.h
index abbfe5524..c6c4bdae5 100644
--- a/src/core/hle/service/service.h
+++ b/src/core/hle/service/service.h
@@ -182,8 +182,7 @@ private:
182}; 182};
183 183
184/// Initialize ServiceManager 184/// Initialize ServiceManager
185void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system, 185void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system);
186 FileSys::VfsFilesystem& vfs);
187 186
188/// Shutdown ServiceManager 187/// Shutdown ServiceManager
189void Shutdown(); 188void Shutdown();
diff --git a/src/core/reporter.cpp b/src/core/reporter.cpp
index 6ea26fda7..5d4c3e6ea 100644
--- a/src/core/reporter.cpp
+++ b/src/core/reporter.cpp
@@ -350,6 +350,24 @@ void Reporter::SaveErrorReport(u64 title_id, ResultCode result,
350 SaveToFile(std::move(out), GetPath("error_report", title_id, timestamp)); 350 SaveToFile(std::move(out), GetPath("error_report", title_id, timestamp));
351} 351}
352 352
353void Reporter::SaveFilesystemAccessReport(Service::FileSystem::LogMode log_mode,
354 std::string log_message) const {
355 if (!IsReportingEnabled())
356 return;
357
358 const auto timestamp = GetTimestamp();
359 const auto title_id = system.CurrentProcess()->GetTitleID();
360 json out;
361
362 out["yuzu_version"] = GetYuzuVersionData();
363 out["report_common"] = GetReportCommonData(title_id, RESULT_SUCCESS, timestamp);
364
365 out["log_mode"] = fmt::format("{:08X}", static_cast<u32>(log_mode));
366 out["log_message"] = std::move(log_message);
367
368 SaveToFile(std::move(out), GetPath("filesystem_access_report", title_id, timestamp));
369}
370
353void Reporter::SaveUserReport() const { 371void Reporter::SaveUserReport() const {
354 if (!IsReportingEnabled()) { 372 if (!IsReportingEnabled()) {
355 return; 373 return;
diff --git a/src/core/reporter.h b/src/core/reporter.h
index 4266ca550..44256de50 100644
--- a/src/core/reporter.h
+++ b/src/core/reporter.h
@@ -16,6 +16,10 @@ namespace Kernel {
16class HLERequestContext; 16class HLERequestContext;
17} // namespace Kernel 17} // namespace Kernel
18 18
19namespace Service::FileSystem {
20enum class LogMode : u32;
21}
22
19namespace Core { 23namespace Core {
20 24
21class System; 25class System;
@@ -49,6 +53,9 @@ public:
49 std::optional<std::string> custom_text_main = {}, 53 std::optional<std::string> custom_text_main = {},
50 std::optional<std::string> custom_text_detail = {}) const; 54 std::optional<std::string> custom_text_detail = {}) const;
51 55
56 void SaveFilesystemAccessReport(Service::FileSystem::LogMode log_mode,
57 std::string log_message) const;
58
52 void SaveUserReport() const; 59 void SaveUserReport() const;
53 60
54private: 61private:
diff --git a/src/core/settings.cpp b/src/core/settings.cpp
index 63aa59690..0dd1632ac 100644
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -85,7 +85,6 @@ void LogSettings() {
85 LogSetting("System_RngSeed", Settings::values.rng_seed.value_or(0)); 85 LogSetting("System_RngSeed", Settings::values.rng_seed.value_or(0));
86 LogSetting("System_CurrentUser", Settings::values.current_user); 86 LogSetting("System_CurrentUser", Settings::values.current_user);
87 LogSetting("System_LanguageIndex", Settings::values.language_index); 87 LogSetting("System_LanguageIndex", Settings::values.language_index);
88 LogSetting("Core_CpuJitEnabled", Settings::values.cpu_jit_enabled);
89 LogSetting("Core_UseMultiCore", Settings::values.use_multi_core); 88 LogSetting("Core_UseMultiCore", Settings::values.use_multi_core);
90 LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor); 89 LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor);
91 LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit); 90 LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit);
diff --git a/src/core/settings.h b/src/core/settings.h
index acf18d653..6638ce8f9 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -378,7 +378,6 @@ struct Values {
378 std::atomic_bool is_device_reload_pending{true}; 378 std::atomic_bool is_device_reload_pending{true};
379 379
380 // Core 380 // Core
381 bool cpu_jit_enabled;
382 bool use_multi_core; 381 bool use_multi_core;
383 382
384 // Data Storage 383 // Data Storage
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp
index 98f49042a..793d102d3 100644
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -168,7 +168,6 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) {
168 AddField(Telemetry::FieldType::UserConfig, "Audio_SinkId", Settings::values.sink_id); 168 AddField(Telemetry::FieldType::UserConfig, "Audio_SinkId", Settings::values.sink_id);
169 AddField(Telemetry::FieldType::UserConfig, "Audio_EnableAudioStretching", 169 AddField(Telemetry::FieldType::UserConfig, "Audio_EnableAudioStretching",
170 Settings::values.enable_audio_stretching); 170 Settings::values.enable_audio_stretching);
171 AddField(Telemetry::FieldType::UserConfig, "Core_UseCpuJit", Settings::values.cpu_jit_enabled);
172 AddField(Telemetry::FieldType::UserConfig, "Core_UseMultiCore", 171 AddField(Telemetry::FieldType::UserConfig, "Core_UseMultiCore",
173 Settings::values.use_multi_core); 172 Settings::values.use_multi_core);
174 AddField(Telemetry::FieldType::UserConfig, "Renderer_ResolutionFactor", 173 AddField(Telemetry::FieldType::UserConfig, "Renderer_ResolutionFactor",
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 6839abe71..7c18c27b3 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -1,4 +1,5 @@
1add_library(video_core STATIC 1add_library(video_core STATIC
2 buffer_cache.h
2 dma_pusher.cpp 3 dma_pusher.cpp
3 dma_pusher.h 4 dma_pusher.h
4 debug_utils/debug_utils.cpp 5 debug_utils/debug_utils.cpp
@@ -43,8 +44,6 @@ add_library(video_core STATIC
43 renderer_opengl/gl_device.h 44 renderer_opengl/gl_device.h
44 renderer_opengl/gl_framebuffer_cache.cpp 45 renderer_opengl/gl_framebuffer_cache.cpp
45 renderer_opengl/gl_framebuffer_cache.h 46 renderer_opengl/gl_framebuffer_cache.h
46 renderer_opengl/gl_global_cache.cpp
47 renderer_opengl/gl_global_cache.h
48 renderer_opengl/gl_rasterizer.cpp 47 renderer_opengl/gl_rasterizer.cpp
49 renderer_opengl/gl_rasterizer.h 48 renderer_opengl/gl_rasterizer.h
50 renderer_opengl/gl_resource_manager.cpp 49 renderer_opengl/gl_resource_manager.cpp
@@ -103,6 +102,8 @@ add_library(video_core STATIC
103 shader/decode/video.cpp 102 shader/decode/video.cpp
104 shader/decode/xmad.cpp 103 shader/decode/xmad.cpp
105 shader/decode/other.cpp 104 shader/decode/other.cpp
105 shader/control_flow.cpp
106 shader/control_flow.h
106 shader/decode.cpp 107 shader/decode.cpp
107 shader/node_helper.cpp 108 shader/node_helper.cpp
108 shader/node_helper.h 109 shader/node_helper.h
diff --git a/src/video_core/buffer_cache.h b/src/video_core/buffer_cache.h
new file mode 100644
index 000000000..6f868b8b4
--- /dev/null
+++ b/src/video_core/buffer_cache.h
@@ -0,0 +1,299 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <memory>
9#include <mutex>
10#include <unordered_map>
11#include <unordered_set>
12#include <utility>
13#include <vector>
14
15#include "common/alignment.h"
16#include "common/common_types.h"
17#include "core/core.h"
18#include "video_core/memory_manager.h"
19#include "video_core/rasterizer_cache.h"
20
21namespace VideoCore {
22class RasterizerInterface;
23}
24
25namespace VideoCommon {
26
27template <typename BufferStorageType>
28class CachedBuffer final : public RasterizerCacheObject {
29public:
30 explicit CachedBuffer(VAddr cpu_addr, u8* host_ptr)
31 : RasterizerCacheObject{host_ptr}, host_ptr{host_ptr}, cpu_addr{cpu_addr} {}
32 ~CachedBuffer() override = default;
33
34 VAddr GetCpuAddr() const override {
35 return cpu_addr;
36 }
37
38 std::size_t GetSizeInBytes() const override {
39 return size;
40 }
41
42 u8* GetWritableHostPtr() const {
43 return host_ptr;
44 }
45
46 std::size_t GetSize() const {
47 return size;
48 }
49
50 std::size_t GetCapacity() const {
51 return capacity;
52 }
53
54 bool IsInternalized() const {
55 return is_internal;
56 }
57
58 const BufferStorageType& GetBuffer() const {
59 return buffer;
60 }
61
62 void SetSize(std::size_t new_size) {
63 size = new_size;
64 }
65
66 void SetInternalState(bool is_internal_) {
67 is_internal = is_internal_;
68 }
69
70 BufferStorageType ExchangeBuffer(BufferStorageType buffer_, std::size_t new_capacity) {
71 capacity = new_capacity;
72 std::swap(buffer, buffer_);
73 return buffer_;
74 }
75
76private:
77 u8* host_ptr{};
78 VAddr cpu_addr{};
79 std::size_t size{};
80 std::size_t capacity{};
81 bool is_internal{};
82 BufferStorageType buffer;
83};
84
85template <typename BufferStorageType, typename BufferType, typename StreamBuffer>
86class BufferCache : public RasterizerCache<std::shared_ptr<CachedBuffer<BufferStorageType>>> {
87public:
88 using Buffer = std::shared_ptr<CachedBuffer<BufferStorageType>>;
89 using BufferInfo = std::pair<const BufferType*, u64>;
90
91 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
92 std::unique_ptr<StreamBuffer> stream_buffer)
93 : RasterizerCache<Buffer>{rasterizer}, system{system},
94 stream_buffer{std::move(stream_buffer)}, stream_buffer_handle{
95 this->stream_buffer->GetHandle()} {}
96 ~BufferCache() = default;
97
98 void Unregister(const Buffer& entry) override {
99 std::lock_guard lock{RasterizerCache<Buffer>::mutex};
100 if (entry->IsInternalized()) {
101 internalized_entries.erase(entry->GetCacheAddr());
102 }
103 ReserveBuffer(entry);
104 RasterizerCache<Buffer>::Unregister(entry);
105 }
106
107 void TickFrame() {
108 marked_for_destruction_index =
109 (marked_for_destruction_index + 1) % marked_for_destruction_ring_buffer.size();
110 MarkedForDestruction().clear();
111 }
112
113 BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
114 bool internalize = false, bool is_written = false) {
115 std::lock_guard lock{RasterizerCache<Buffer>::mutex};
116
117 auto& memory_manager = system.GPU().MemoryManager();
118 const auto host_ptr = memory_manager.GetPointer(gpu_addr);
119 if (!host_ptr) {
120 return {GetEmptyBuffer(size), 0};
121 }
122 const auto cache_addr = ToCacheAddr(host_ptr);
123
124 // Cache management is a big overhead, so only cache entries with a given size.
125 // TODO: Figure out which size is the best for given games.
126 constexpr std::size_t max_stream_size = 0x800;
127 if (!internalize && size < max_stream_size &&
128 internalized_entries.find(cache_addr) == internalized_entries.end()) {
129 return StreamBufferUpload(host_ptr, size, alignment);
130 }
131
132 auto entry = RasterizerCache<Buffer>::TryGet(cache_addr);
133 if (!entry) {
134 return FixedBufferUpload(gpu_addr, host_ptr, size, internalize, is_written);
135 }
136
137 if (entry->GetSize() < size) {
138 IncreaseBufferSize(entry, size);
139 }
140 if (is_written) {
141 entry->MarkAsModified(true, *this);
142 }
143 return {ToHandle(entry->GetBuffer()), 0};
144 }
145
146 /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
147 BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size,
148 std::size_t alignment = 4) {
149 std::lock_guard lock{RasterizerCache<Buffer>::mutex};
150 return StreamBufferUpload(raw_pointer, size, alignment);
151 }
152
153 void Map(std::size_t max_size) {
154 std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4);
155 buffer_offset = buffer_offset_base;
156 }
157
158 /// Finishes the upload stream, returns true on bindings invalidation.
159 bool Unmap() {
160 stream_buffer->Unmap(buffer_offset - buffer_offset_base);
161 return std::exchange(invalidated, false);
162 }
163
164 virtual const BufferType* GetEmptyBuffer(std::size_t size) = 0;
165
166protected:
167 void FlushObjectInner(const Buffer& entry) override {
168 DownloadBufferData(entry->GetBuffer(), 0, entry->GetSize(), entry->GetWritableHostPtr());
169 }
170
171 virtual BufferStorageType CreateBuffer(std::size_t size) = 0;
172
173 virtual const BufferType* ToHandle(const BufferStorageType& storage) = 0;
174
175 virtual void UploadBufferData(const BufferStorageType& buffer, std::size_t offset,
176 std::size_t size, const u8* data) = 0;
177
178 virtual void DownloadBufferData(const BufferStorageType& buffer, std::size_t offset,
179 std::size_t size, u8* data) = 0;
180
181 virtual void CopyBufferData(const BufferStorageType& src, const BufferStorageType& dst,
182 std::size_t src_offset, std::size_t dst_offset,
183 std::size_t size) = 0;
184
185private:
186 BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size,
187 std::size_t alignment) {
188 AlignBuffer(alignment);
189 const std::size_t uploaded_offset = buffer_offset;
190 std::memcpy(buffer_ptr, raw_pointer, size);
191
192 buffer_ptr += size;
193 buffer_offset += size;
194 return {&stream_buffer_handle, uploaded_offset};
195 }
196
197 BufferInfo FixedBufferUpload(GPUVAddr gpu_addr, u8* host_ptr, std::size_t size,
198 bool internalize, bool is_written) {
199 auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
200 const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
201 ASSERT(cpu_addr);
202
203 auto entry = GetUncachedBuffer(*cpu_addr, host_ptr);
204 entry->SetSize(size);
205 entry->SetInternalState(internalize);
206 RasterizerCache<Buffer>::Register(entry);
207
208 if (internalize) {
209 internalized_entries.emplace(ToCacheAddr(host_ptr));
210 }
211 if (is_written) {
212 entry->MarkAsModified(true, *this);
213 }
214
215 if (entry->GetCapacity() < size) {
216 MarkedForDestruction().push_back(entry->ExchangeBuffer(CreateBuffer(size), size));
217 }
218
219 UploadBufferData(entry->GetBuffer(), 0, size, host_ptr);
220 return {ToHandle(entry->GetBuffer()), 0};
221 }
222
223 void IncreaseBufferSize(Buffer& entry, std::size_t new_size) {
224 const std::size_t old_size = entry->GetSize();
225 if (entry->GetCapacity() < new_size) {
226 const auto& old_buffer = entry->GetBuffer();
227 auto new_buffer = CreateBuffer(new_size);
228
229 // Copy bits from the old buffer to the new buffer.
230 CopyBufferData(old_buffer, new_buffer, 0, 0, old_size);
231 MarkedForDestruction().push_back(
232 entry->ExchangeBuffer(std::move(new_buffer), new_size));
233
234 // This buffer could have been used
235 invalidated = true;
236 }
237 // Upload the new bits.
238 const std::size_t size_diff = new_size - old_size;
239 UploadBufferData(entry->GetBuffer(), old_size, size_diff, entry->GetHostPtr() + old_size);
240
241 // Update entry's size in the object and in the cache.
242 Unregister(entry);
243
244 entry->SetSize(new_size);
245 RasterizerCache<Buffer>::Register(entry);
246 }
247
248 Buffer GetUncachedBuffer(VAddr cpu_addr, u8* host_ptr) {
249 if (auto entry = TryGetReservedBuffer(host_ptr)) {
250 return entry;
251 }
252 return std::make_shared<CachedBuffer<BufferStorageType>>(cpu_addr, host_ptr);
253 }
254
255 Buffer TryGetReservedBuffer(u8* host_ptr) {
256 const auto it = buffer_reserve.find(ToCacheAddr(host_ptr));
257 if (it == buffer_reserve.end()) {
258 return {};
259 }
260 auto& reserve = it->second;
261 auto entry = reserve.back();
262 reserve.pop_back();
263 return entry;
264 }
265
266 void ReserveBuffer(Buffer entry) {
267 buffer_reserve[entry->GetCacheAddr()].push_back(std::move(entry));
268 }
269
270 void AlignBuffer(std::size_t alignment) {
271 // Align the offset, not the mapped pointer
272 const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment);
273 buffer_ptr += offset_aligned - buffer_offset;
274 buffer_offset = offset_aligned;
275 }
276
277 std::vector<BufferStorageType>& MarkedForDestruction() {
278 return marked_for_destruction_ring_buffer[marked_for_destruction_index];
279 }
280
281 Core::System& system;
282
283 std::unique_ptr<StreamBuffer> stream_buffer;
284 BufferType stream_buffer_handle{};
285
286 bool invalidated = false;
287
288 u8* buffer_ptr = nullptr;
289 u64 buffer_offset = 0;
290 u64 buffer_offset_base = 0;
291
292 std::size_t marked_for_destruction_index = 0;
293 std::array<std::vector<BufferStorageType>, 4> marked_for_destruction_ring_buffer;
294
295 std::unordered_set<CacheAddr> internalized_entries;
296 std::unordered_map<CacheAddr, std::vector<Buffer>> buffer_reserve;
297};
298
299} // namespace VideoCommon
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 3175579cc..bd036cbe8 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -22,7 +22,7 @@ void DmaPusher::DispatchCalls() {
22 MICROPROFILE_SCOPE(DispatchCalls); 22 MICROPROFILE_SCOPE(DispatchCalls);
23 23
24 // On entering GPU code, assume all memory may be touched by the ARM core. 24 // On entering GPU code, assume all memory may be touched by the ARM core.
25 gpu.Maxwell3D().dirty_flags.OnMemoryWrite(); 25 gpu.Maxwell3D().dirty.OnMemoryWrite();
26 26
27 dma_pushbuffer_subindex = 0; 27 dma_pushbuffer_subindex = 0;
28 28
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 7404a8163..08586d33c 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -37,7 +37,7 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
37 const bool is_last_call = method_call.IsLastCall(); 37 const bool is_last_call = method_call.IsLastCall();
38 upload_state.ProcessData(method_call.argument, is_last_call); 38 upload_state.ProcessData(method_call.argument, is_last_call);
39 if (is_last_call) { 39 if (is_last_call) {
40 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); 40 system.GPU().Maxwell3D().dirty.OnMemoryWrite();
41 } 41 }
42 break; 42 break;
43 } 43 }
@@ -50,13 +50,14 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
50} 50}
51 51
52void KeplerCompute::ProcessLaunch() { 52void KeplerCompute::ProcessLaunch() {
53
54 const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); 53 const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
55 memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, 54 memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
56 LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32)); 55 LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32));
57 56
58 const GPUVAddr code_loc = regs.code_loc.Address() + launch_description.program_start; 57 const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start;
59 LOG_WARNING(HW_GPU, "Compute Kernel Execute at Address 0x{:016x}, STUBBED", code_loc); 58 LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr);
59
60 rasterizer.DispatchCompute(code_addr);
60} 61}
61 62
62} // namespace Tegra::Engines 63} // namespace Tegra::Engines
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 0561f676c..44279de00 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -34,7 +34,7 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {
34 const bool is_last_call = method_call.IsLastCall(); 34 const bool is_last_call = method_call.IsLastCall();
35 upload_state.ProcessData(method_call.argument, is_last_call); 35 upload_state.ProcessData(method_call.argument, is_last_call);
36 if (is_last_call) { 36 if (is_last_call) {
37 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); 37 system.GPU().Maxwell3D().dirty.OnMemoryWrite();
38 } 38 }
39 break; 39 break;
40 } 40 }
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 8755b8af4..74c46ec04 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -22,6 +22,7 @@ Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& raste
22 MemoryManager& memory_manager) 22 MemoryManager& memory_manager)
23 : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, 23 : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager},
24 macro_interpreter{*this}, upload_state{memory_manager, regs.upload} { 24 macro_interpreter{*this}, upload_state{memory_manager, regs.upload} {
25 InitDirtySettings();
25 InitializeRegisterDefaults(); 26 InitializeRegisterDefaults();
26} 27}
27 28
@@ -69,6 +70,10 @@ void Maxwell3D::InitializeRegisterDefaults() {
69 regs.stencil_back_func_mask = 0xFFFFFFFF; 70 regs.stencil_back_func_mask = 0xFFFFFFFF;
70 regs.stencil_back_mask = 0xFFFFFFFF; 71 regs.stencil_back_mask = 0xFFFFFFFF;
71 72
73 regs.depth_test_func = Regs::ComparisonOp::Always;
74 regs.cull.front_face = Regs::Cull::FrontFace::CounterClockWise;
75 regs.cull.cull_face = Regs::Cull::CullFace::Back;
76
72 // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a 77 // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a
73 // register carrying a default value. Assume it's OpenGL's default (1). 78 // register carrying a default value. Assume it's OpenGL's default (1).
74 regs.point_size = 1.0f; 79 regs.point_size = 1.0f;
@@ -86,6 +91,159 @@ void Maxwell3D::InitializeRegisterDefaults() {
86 regs.rt_separate_frag_data = 1; 91 regs.rt_separate_frag_data = 1;
87} 92}
88 93
94#define DIRTY_REGS_POS(field_name) (offsetof(Maxwell3D::DirtyRegs, field_name))
95
96void Maxwell3D::InitDirtySettings() {
97 const auto set_block = [this](const u32 start, const u32 range, const u8 position) {
98 const auto start_itr = dirty_pointers.begin() + start;
99 const auto end_itr = start_itr + range;
100 std::fill(start_itr, end_itr, position);
101 };
102 dirty.regs.fill(true);
103
104 // Init Render Targets
105 constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
106 constexpr u32 rt_start_reg = MAXWELL3D_REG_INDEX(rt);
107 constexpr u32 rt_end_reg = rt_start_reg + registers_per_rt * 8;
108 u32 rt_dirty_reg = DIRTY_REGS_POS(render_target);
109 for (u32 rt_reg = rt_start_reg; rt_reg < rt_end_reg; rt_reg += registers_per_rt) {
110 set_block(rt_reg, registers_per_rt, rt_dirty_reg);
111 rt_dirty_reg++;
112 }
113 constexpr u32 depth_buffer_flag = DIRTY_REGS_POS(depth_buffer);
114 dirty_pointers[MAXWELL3D_REG_INDEX(zeta_enable)] = depth_buffer_flag;
115 dirty_pointers[MAXWELL3D_REG_INDEX(zeta_width)] = depth_buffer_flag;
116 dirty_pointers[MAXWELL3D_REG_INDEX(zeta_height)] = depth_buffer_flag;
117 constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32);
118 constexpr u32 zeta_reg = MAXWELL3D_REG_INDEX(zeta);
119 set_block(zeta_reg, registers_in_zeta, depth_buffer_flag);
120
121 // Init Vertex Arrays
122 constexpr u32 vertex_array_start = MAXWELL3D_REG_INDEX(vertex_array);
123 constexpr u32 vertex_array_size = sizeof(regs.vertex_array[0]) / sizeof(u32);
124 constexpr u32 vertex_array_end = vertex_array_start + vertex_array_size * Regs::NumVertexArrays;
125 u32 va_reg = DIRTY_REGS_POS(vertex_array);
126 u32 vi_reg = DIRTY_REGS_POS(vertex_instance);
127 for (u32 vertex_reg = vertex_array_start; vertex_reg < vertex_array_end;
128 vertex_reg += vertex_array_size) {
129 set_block(vertex_reg, 3, va_reg);
130 // The divisor concerns vertex array instances
131 dirty_pointers[vertex_reg + 3] = vi_reg;
132 va_reg++;
133 vi_reg++;
134 }
135 constexpr u32 vertex_limit_start = MAXWELL3D_REG_INDEX(vertex_array_limit);
136 constexpr u32 vertex_limit_size = sizeof(regs.vertex_array_limit[0]) / sizeof(u32);
137 constexpr u32 vertex_limit_end = vertex_limit_start + vertex_limit_size * Regs::NumVertexArrays;
138 va_reg = DIRTY_REGS_POS(vertex_array);
139 for (u32 vertex_reg = vertex_limit_start; vertex_reg < vertex_limit_end;
140 vertex_reg += vertex_limit_size) {
141 set_block(vertex_reg, vertex_limit_size, va_reg);
142 va_reg++;
143 }
144 constexpr u32 vertex_instance_start = MAXWELL3D_REG_INDEX(instanced_arrays);
145 constexpr u32 vertex_instance_size =
146 sizeof(regs.instanced_arrays.is_instanced[0]) / sizeof(u32);
147 constexpr u32 vertex_instance_end =
148 vertex_instance_start + vertex_instance_size * Regs::NumVertexArrays;
149 vi_reg = DIRTY_REGS_POS(vertex_instance);
150 for (u32 vertex_reg = vertex_instance_start; vertex_reg < vertex_instance_end;
151 vertex_reg += vertex_instance_size) {
152 set_block(vertex_reg, vertex_instance_size, vi_reg);
153 vi_reg++;
154 }
155 set_block(MAXWELL3D_REG_INDEX(vertex_attrib_format), regs.vertex_attrib_format.size(),
156 DIRTY_REGS_POS(vertex_attrib_format));
157
158 // Init Shaders
159 constexpr u32 shader_registers_count =
160 sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
161 set_block(MAXWELL3D_REG_INDEX(shader_config[0]), shader_registers_count,
162 DIRTY_REGS_POS(shaders));
163
164 // State
165
166 // Viewport
167 constexpr u32 viewport_dirty_reg = DIRTY_REGS_POS(viewport);
168 constexpr u32 viewport_start = MAXWELL3D_REG_INDEX(viewports);
169 constexpr u32 viewport_size = sizeof(regs.viewports) / sizeof(u32);
170 set_block(viewport_start, viewport_size, viewport_dirty_reg);
171 constexpr u32 view_volume_start = MAXWELL3D_REG_INDEX(view_volume_clip_control);
172 constexpr u32 view_volume_size = sizeof(regs.view_volume_clip_control) / sizeof(u32);
173 set_block(view_volume_start, view_volume_size, viewport_dirty_reg);
174
175 // Viewport transformation
176 constexpr u32 viewport_trans_start = MAXWELL3D_REG_INDEX(viewport_transform);
177 constexpr u32 viewport_trans_size = sizeof(regs.viewport_transform) / sizeof(u32);
178 set_block(viewport_trans_start, viewport_trans_size, DIRTY_REGS_POS(viewport_transform));
179
180 // Cullmode
181 constexpr u32 cull_mode_start = MAXWELL3D_REG_INDEX(cull);
182 constexpr u32 cull_mode_size = sizeof(regs.cull) / sizeof(u32);
183 set_block(cull_mode_start, cull_mode_size, DIRTY_REGS_POS(cull_mode));
184
185 // Screen y control
186 dirty_pointers[MAXWELL3D_REG_INDEX(screen_y_control)] = DIRTY_REGS_POS(screen_y_control);
187
188 // Primitive Restart
189 constexpr u32 primitive_restart_start = MAXWELL3D_REG_INDEX(primitive_restart);
190 constexpr u32 primitive_restart_size = sizeof(regs.primitive_restart) / sizeof(u32);
191 set_block(primitive_restart_start, primitive_restart_size, DIRTY_REGS_POS(primitive_restart));
192
193 // Depth Test
194 constexpr u32 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test);
195 dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_enable)] = depth_test_dirty_reg;
196 dirty_pointers[MAXWELL3D_REG_INDEX(depth_write_enabled)] = depth_test_dirty_reg;
197 dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_func)] = depth_test_dirty_reg;
198
199 // Stencil Test
200 constexpr u32 stencil_test_dirty_reg = DIRTY_REGS_POS(stencil_test);
201 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_enable)] = stencil_test_dirty_reg;
202 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_func)] = stencil_test_dirty_reg;
203 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_ref)] = stencil_test_dirty_reg;
204 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_mask)] = stencil_test_dirty_reg;
205 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_fail)] = stencil_test_dirty_reg;
206 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zfail)] = stencil_test_dirty_reg;
207 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zpass)] = stencil_test_dirty_reg;
208 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_mask)] = stencil_test_dirty_reg;
209 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_two_side_enable)] = stencil_test_dirty_reg;
210 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_func)] = stencil_test_dirty_reg;
211 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_ref)] = stencil_test_dirty_reg;
212 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_mask)] = stencil_test_dirty_reg;
213 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_fail)] = stencil_test_dirty_reg;
214 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zfail)] = stencil_test_dirty_reg;
215 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zpass)] = stencil_test_dirty_reg;
216 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_mask)] = stencil_test_dirty_reg;
217
218 // Color Mask
219 constexpr u32 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask);
220 dirty_pointers[MAXWELL3D_REG_INDEX(color_mask_common)] = color_mask_dirty_reg;
221 set_block(MAXWELL3D_REG_INDEX(color_mask), sizeof(regs.color_mask) / sizeof(u32),
222 color_mask_dirty_reg);
223 // Blend State
224 constexpr u32 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state);
225 set_block(MAXWELL3D_REG_INDEX(blend_color), sizeof(regs.blend_color) / sizeof(u32),
226 blend_state_dirty_reg);
227 dirty_pointers[MAXWELL3D_REG_INDEX(independent_blend_enable)] = blend_state_dirty_reg;
228 set_block(MAXWELL3D_REG_INDEX(blend), sizeof(regs.blend) / sizeof(u32), blend_state_dirty_reg);
229 set_block(MAXWELL3D_REG_INDEX(independent_blend), sizeof(regs.independent_blend) / sizeof(u32),
230 blend_state_dirty_reg);
231
232 // Scissor State
233 constexpr u32 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test);
234 set_block(MAXWELL3D_REG_INDEX(scissor_test), sizeof(regs.scissor_test) / sizeof(u32),
235 scissor_test_dirty_reg);
236
237 // Polygon Offset
238 constexpr u32 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset);
239 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_fill_enable)] = polygon_offset_dirty_reg;
240 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_line_enable)] = polygon_offset_dirty_reg;
241 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_point_enable)] = polygon_offset_dirty_reg;
242 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_units)] = polygon_offset_dirty_reg;
243 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_factor)] = polygon_offset_dirty_reg;
244 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_clamp)] = polygon_offset_dirty_reg;
245}
246
89void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { 247void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
90 // Reset the current macro. 248 // Reset the current macro.
91 executing_macro = 0; 249 executing_macro = 0;
@@ -108,6 +266,14 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
108 266
109 const u32 method = method_call.method; 267 const u32 method = method_call.method;
110 268
269 if (method == cb_data_state.current) {
270 regs.reg_array[method] = method_call.argument;
271 ProcessCBData(method_call.argument);
272 return;
273 } else if (cb_data_state.current != null_cb_data) {
274 FinishCBData();
275 }
276
111 // It is an error to write to a register other than the current macro's ARG register before it 277 // It is an error to write to a register other than the current macro's ARG register before it
112 // has finished execution. 278 // has finished execution.
113 if (executing_macro != 0) { 279 if (executing_macro != 0) {
@@ -143,49 +309,19 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
143 309
144 if (regs.reg_array[method] != method_call.argument) { 310 if (regs.reg_array[method] != method_call.argument) {
145 regs.reg_array[method] = method_call.argument; 311 regs.reg_array[method] = method_call.argument;
146 // Color buffers 312 const std::size_t dirty_reg = dirty_pointers[method];
147 constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt); 313 if (dirty_reg) {
148 constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); 314 dirty.regs[dirty_reg] = true;
149 if (method >= first_rt_reg && 315 if (dirty_reg >= DIRTY_REGS_POS(vertex_array) &&
150 method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) { 316 dirty_reg < DIRTY_REGS_POS(vertex_array_buffers)) {
151 const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt; 317 dirty.vertex_array_buffers = true;
152 dirty_flags.color_buffer.set(rt_index); 318 } else if (dirty_reg >= DIRTY_REGS_POS(vertex_instance) &&
153 } 319 dirty_reg < DIRTY_REGS_POS(vertex_instances)) {
154 320 dirty.vertex_instances = true;
155 // Zeta buffer 321 } else if (dirty_reg >= DIRTY_REGS_POS(render_target) &&
156 constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32); 322 dirty_reg < DIRTY_REGS_POS(render_settings)) {
157 if (method == MAXWELL3D_REG_INDEX(zeta_enable) || 323 dirty.render_settings = true;
158 method == MAXWELL3D_REG_INDEX(zeta_width) || 324 }
159 method == MAXWELL3D_REG_INDEX(zeta_height) ||
160 (method >= MAXWELL3D_REG_INDEX(zeta) &&
161 method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
162 dirty_flags.zeta_buffer = true;
163 }
164
165 // Shader
166 constexpr u32 shader_registers_count =
167 sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
168 if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) &&
169 method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) {
170 dirty_flags.shaders = true;
171 }
172
173 // Vertex format
174 if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
175 method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
176 dirty_flags.vertex_attrib_format = true;
177 }
178
179 // Vertex buffer
180 if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
181 method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * Regs::NumVertexArrays) {
182 dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
183 } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
184 method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * Regs::NumVertexArrays) {
185 dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
186 } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
187 method < MAXWELL3D_REG_INDEX(instanced_arrays) + Regs::NumVertexArrays) {
188 dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays));
189 } 325 }
190 } 326 }
191 327
@@ -214,7 +350,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
214 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]): 350 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]):
215 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]): 351 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]):
216 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): { 352 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): {
217 ProcessCBData(method_call.argument); 353 StartCBData(method);
218 break; 354 break;
219 } 355 }
220 case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): { 356 case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): {
@@ -249,6 +385,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
249 ProcessQueryGet(); 385 ProcessQueryGet();
250 break; 386 break;
251 } 387 }
388 case MAXWELL3D_REG_INDEX(condition.mode): {
389 ProcessQueryCondition();
390 break;
391 }
252 case MAXWELL3D_REG_INDEX(sync_info): { 392 case MAXWELL3D_REG_INDEX(sync_info): {
253 ProcessSyncPoint(); 393 ProcessSyncPoint();
254 break; 394 break;
@@ -261,7 +401,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
261 const bool is_last_call = method_call.IsLastCall(); 401 const bool is_last_call = method_call.IsLastCall();
262 upload_state.ProcessData(method_call.argument, is_last_call); 402 upload_state.ProcessData(method_call.argument, is_last_call);
263 if (is_last_call) { 403 if (is_last_call) {
264 dirty_flags.OnMemoryWrite(); 404 dirty.OnMemoryWrite();
265 } 405 }
266 break; 406 break;
267 } 407 }
@@ -302,6 +442,7 @@ void Maxwell3D::ProcessQueryGet() {
302 result = regs.query.query_sequence; 442 result = regs.query.query_sequence;
303 break; 443 break;
304 default: 444 default:
445 result = 1;
305 UNIMPLEMENTED_MSG("Unimplemented query select type {}", 446 UNIMPLEMENTED_MSG("Unimplemented query select type {}",
306 static_cast<u32>(regs.query.query_get.select.Value())); 447 static_cast<u32>(regs.query.query_get.select.Value()));
307 } 448 }
@@ -333,7 +474,6 @@ void Maxwell3D::ProcessQueryGet() {
333 query_result.timestamp = system.CoreTiming().GetTicks(); 474 query_result.timestamp = system.CoreTiming().GetTicks();
334 memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); 475 memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
335 } 476 }
336 dirty_flags.OnMemoryWrite();
337 break; 477 break;
338 } 478 }
339 default: 479 default:
@@ -342,6 +482,45 @@ void Maxwell3D::ProcessQueryGet() {
342 } 482 }
343} 483}
344 484
485void Maxwell3D::ProcessQueryCondition() {
486 const GPUVAddr condition_address{regs.condition.Address()};
487 switch (regs.condition.mode) {
488 case Regs::ConditionMode::Always: {
489 execute_on = true;
490 break;
491 }
492 case Regs::ConditionMode::Never: {
493 execute_on = false;
494 break;
495 }
496 case Regs::ConditionMode::ResNonZero: {
497 Regs::QueryCompare cmp;
498 memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
499 execute_on = cmp.initial_sequence != 0U && cmp.initial_mode != 0U;
500 break;
501 }
502 case Regs::ConditionMode::Equal: {
503 Regs::QueryCompare cmp;
504 memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
505 execute_on =
506 cmp.initial_sequence == cmp.current_sequence && cmp.initial_mode == cmp.current_mode;
507 break;
508 }
509 case Regs::ConditionMode::NotEqual: {
510 Regs::QueryCompare cmp;
511 memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
512 execute_on =
513 cmp.initial_sequence != cmp.current_sequence || cmp.initial_mode != cmp.current_mode;
514 break;
515 }
516 default: {
517 UNIMPLEMENTED_MSG("Uninplemented Condition Mode!");
518 execute_on = true;
519 break;
520 }
521 }
522}
523
345void Maxwell3D::ProcessSyncPoint() { 524void Maxwell3D::ProcessSyncPoint() {
346 const u32 sync_point = regs.sync_info.sync_point.Value(); 525 const u32 sync_point = regs.sync_info.sync_point.Value();
347 const u32 increment = regs.sync_info.increment.Value(); 526 const u32 increment = regs.sync_info.increment.Value();
@@ -405,23 +584,39 @@ void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
405} 584}
406 585
407void Maxwell3D::ProcessCBData(u32 value) { 586void Maxwell3D::ProcessCBData(u32 value) {
587 const u32 id = cb_data_state.id;
588 cb_data_state.buffer[id][cb_data_state.counter] = value;
589 // Increment the current buffer position.
590 regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
591 cb_data_state.counter++;
592}
593
594void Maxwell3D::StartCBData(u32 method) {
595 constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]);
596 cb_data_state.start_pos = regs.const_buffer.cb_pos;
597 cb_data_state.id = method - first_cb_data;
598 cb_data_state.current = method;
599 cb_data_state.counter = 0;
600 ProcessCBData(regs.const_buffer.cb_data[cb_data_state.id]);
601}
602
603void Maxwell3D::FinishCBData() {
408 // Write the input value to the current const buffer at the current position. 604 // Write the input value to the current const buffer at the current position.
409 const GPUVAddr buffer_address = regs.const_buffer.BufferAddress(); 605 const GPUVAddr buffer_address = regs.const_buffer.BufferAddress();
410 ASSERT(buffer_address != 0); 606 ASSERT(buffer_address != 0);
411 607
412 // Don't allow writing past the end of the buffer. 608 // Don't allow writing past the end of the buffer.
413 ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size); 609 ASSERT(regs.const_buffer.cb_pos <= regs.const_buffer.cb_size);
414
415 const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos};
416 610
417 u8* ptr{memory_manager.GetPointer(address)}; 611 const GPUVAddr address{buffer_address + cb_data_state.start_pos};
418 rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32)); 612 const std::size_t size = regs.const_buffer.cb_pos - cb_data_state.start_pos;
419 memory_manager.Write<u32>(address, value);
420 613
421 dirty_flags.OnMemoryWrite(); 614 const u32 id = cb_data_state.id;
615 memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size);
616 dirty.OnMemoryWrite();
422 617
423 // Increment the current buffer position. 618 cb_data_state.id = null_cb_data;
424 regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; 619 cb_data_state.current = null_cb_data;
425} 620}
426 621
427Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { 622Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 13e314944..1ee982b76 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -67,6 +67,7 @@ public:
67 static constexpr std::size_t MaxShaderStage = 5; 67 static constexpr std::size_t MaxShaderStage = 5;
68 // Maximum number of const buffers per shader stage. 68 // Maximum number of const buffers per shader stage.
69 static constexpr std::size_t MaxConstBuffers = 18; 69 static constexpr std::size_t MaxConstBuffers = 18;
70 static constexpr std::size_t MaxConstBufferSize = 0x10000;
70 71
71 enum class QueryMode : u32 { 72 enum class QueryMode : u32 {
72 Write = 0, 73 Write = 0,
@@ -89,6 +90,20 @@ public:
89 90
90 enum class QuerySelect : u32 { 91 enum class QuerySelect : u32 {
91 Zero = 0, 92 Zero = 0,
93 TimeElapsed = 2,
94 TransformFeedbackPrimitivesGenerated = 11,
95 PrimitivesGenerated = 18,
96 SamplesPassed = 21,
97 TransformFeedbackUnknown = 26,
98 };
99
100 struct QueryCompare {
101 u32 initial_sequence;
102 u32 initial_mode;
103 u32 unknown1;
104 u32 unknown2;
105 u32 current_sequence;
106 u32 current_mode;
92 }; 107 };
93 108
94 enum class QuerySyncCondition : u32 { 109 enum class QuerySyncCondition : u32 {
@@ -96,6 +111,14 @@ public:
96 GreaterThan = 1, 111 GreaterThan = 1,
97 }; 112 };
98 113
114 enum class ConditionMode : u32 {
115 Never = 0,
116 Always = 1,
117 ResNonZero = 2,
118 Equal = 3,
119 NotEqual = 4,
120 };
121
99 enum class ShaderProgram : u32 { 122 enum class ShaderProgram : u32 {
100 VertexA = 0, 123 VertexA = 0,
101 VertexB = 1, 124 VertexB = 1,
@@ -814,7 +837,18 @@ public:
814 BitField<4, 1, u32> alpha_to_one; 837 BitField<4, 1, u32> alpha_to_one;
815 } multisample_control; 838 } multisample_control;
816 839
817 INSERT_PADDING_WORDS(0x7); 840 INSERT_PADDING_WORDS(0x4);
841
842 struct {
843 u32 address_high;
844 u32 address_low;
845 ConditionMode mode;
846
847 GPUVAddr Address() const {
848 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
849 address_low);
850 }
851 } condition;
818 852
819 struct { 853 struct {
820 u32 tsc_address_high; 854 u32 tsc_address_high;
@@ -1123,23 +1157,77 @@ public:
1123 1157
1124 State state{}; 1158 State state{};
1125 1159
1126 struct DirtyFlags { 1160 struct DirtyRegs {
1127 std::bitset<8> color_buffer{0xFF}; 1161 static constexpr std::size_t NUM_REGS = 256;
1128 std::bitset<32> vertex_array{0xFFFFFFFF}; 1162 union {
1163 struct {
1164 bool null_dirty;
1165
1166 // Vertex Attributes
1167 bool vertex_attrib_format;
1168
1169 // Vertex Arrays
1170 std::array<bool, 32> vertex_array;
1171
1172 bool vertex_array_buffers;
1173
1174 // Vertex Instances
1175 std::array<bool, 32> vertex_instance;
1129 1176
1130 bool vertex_attrib_format = true; 1177 bool vertex_instances;
1131 bool zeta_buffer = true; 1178
1132 bool shaders = true; 1179 // Render Targets
1180 std::array<bool, 8> render_target;
1181 bool depth_buffer;
1182
1183 bool render_settings;
1184
1185 // Shaders
1186 bool shaders;
1187
1188 // Rasterizer State
1189 bool viewport;
1190 bool clip_coefficient;
1191 bool cull_mode;
1192 bool primitive_restart;
1193 bool depth_test;
1194 bool stencil_test;
1195 bool blend_state;
1196 bool scissor_test;
1197 bool transform_feedback;
1198 bool color_mask;
1199 bool polygon_offset;
1200
1201 // Complementary
1202 bool viewport_transform;
1203 bool screen_y_control;
1204
1205 bool memory_general;
1206 };
1207 std::array<bool, NUM_REGS> regs;
1208 };
1209
1210 void ResetVertexArrays() {
1211 vertex_array.fill(true);
1212 vertex_array_buffers = true;
1213 }
1214
1215 void ResetRenderTargets() {
1216 depth_buffer = true;
1217 render_target.fill(true);
1218 render_settings = true;
1219 }
1133 1220
1134 void OnMemoryWrite() { 1221 void OnMemoryWrite() {
1135 zeta_buffer = true;
1136 shaders = true; 1222 shaders = true;
1137 color_buffer.set(); 1223 memory_general = true;
1138 vertex_array.set(); 1224 ResetRenderTargets();
1225 ResetVertexArrays();
1139 } 1226 }
1140 };
1141 1227
1142 DirtyFlags dirty_flags; 1228 } dirty{};
1229
1230 std::array<u8, Regs::NUM_REGS> dirty_pointers{};
1143 1231
1144 /// Reads a register value located at the input method address 1232 /// Reads a register value located at the input method address
1145 u32 GetRegisterValue(u32 method) const; 1233 u32 GetRegisterValue(u32 method) const;
@@ -1168,6 +1256,10 @@ public:
1168 return macro_memory; 1256 return macro_memory;
1169 } 1257 }
1170 1258
1259 bool ShouldExecute() const {
1260 return execute_on;
1261 }
1262
1171private: 1263private:
1172 void InitializeRegisterDefaults(); 1264 void InitializeRegisterDefaults();
1173 1265
@@ -1191,14 +1283,27 @@ private:
1191 /// Interpreter for the macro codes uploaded to the GPU. 1283 /// Interpreter for the macro codes uploaded to the GPU.
1192 MacroInterpreter macro_interpreter; 1284 MacroInterpreter macro_interpreter;
1193 1285
1286 static constexpr u32 null_cb_data = 0xFFFFFFFF;
1287 struct {
1288 std::array<std::array<u32, 0x4000>, 16> buffer;
1289 u32 current{null_cb_data};
1290 u32 id{null_cb_data};
1291 u32 start_pos{};
1292 u32 counter{};
1293 } cb_data_state;
1294
1194 Upload::State upload_state; 1295 Upload::State upload_state;
1195 1296
1297 bool execute_on{true};
1298
1196 /// Retrieves information about a specific TIC entry from the TIC buffer. 1299 /// Retrieves information about a specific TIC entry from the TIC buffer.
1197 Texture::TICEntry GetTICEntry(u32 tic_index) const; 1300 Texture::TICEntry GetTICEntry(u32 tic_index) const;
1198 1301
1199 /// Retrieves information about a specific TSC entry from the TSC buffer. 1302 /// Retrieves information about a specific TSC entry from the TSC buffer.
1200 Texture::TSCEntry GetTSCEntry(u32 tsc_index) const; 1303 Texture::TSCEntry GetTSCEntry(u32 tsc_index) const;
1201 1304
1305 void InitDirtySettings();
1306
1202 /** 1307 /**
1203 * Call a macro on this engine. 1308 * Call a macro on this engine.
1204 * @param method Method to call 1309 * @param method Method to call
@@ -1218,11 +1323,16 @@ private:
1218 /// Handles a write to the QUERY_GET register. 1323 /// Handles a write to the QUERY_GET register.
1219 void ProcessQueryGet(); 1324 void ProcessQueryGet();
1220 1325
1326 // Handles Conditional Rendering
1327 void ProcessQueryCondition();
1328
1221 /// Handles writes to syncing register. 1329 /// Handles writes to syncing register.
1222 void ProcessSyncPoint(); 1330 void ProcessSyncPoint();
1223 1331
1224 /// Handles a write to the CB_DATA[i] register. 1332 /// Handles a write to the CB_DATA[i] register.
1333 void StartCBData(u32 method);
1225 void ProcessCBData(u32 value); 1334 void ProcessCBData(u32 value);
1335 void FinishCBData();
1226 1336
1227 /// Handles a write to the CB_BIND register. 1337 /// Handles a write to the CB_BIND register.
1228 void ProcessCBBind(Regs::ShaderStage stage); 1338 void ProcessCBBind(Regs::ShaderStage stage);
@@ -1289,6 +1399,7 @@ ASSERT_REG_POSITION(clip_distance_enabled, 0x544);
1289ASSERT_REG_POSITION(point_size, 0x546); 1399ASSERT_REG_POSITION(point_size, 0x546);
1290ASSERT_REG_POSITION(zeta_enable, 0x54E); 1400ASSERT_REG_POSITION(zeta_enable, 0x54E);
1291ASSERT_REG_POSITION(multisample_control, 0x54F); 1401ASSERT_REG_POSITION(multisample_control, 0x54F);
1402ASSERT_REG_POSITION(condition, 0x554);
1292ASSERT_REG_POSITION(tsc, 0x557); 1403ASSERT_REG_POSITION(tsc, 0x557);
1293ASSERT_REG_POSITION(polygon_offset_factor, 0x55b); 1404ASSERT_REG_POSITION(polygon_offset_factor, 0x55b);
1294ASSERT_REG_POSITION(tic, 0x55D); 1405ASSERT_REG_POSITION(tic, 0x55D);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index afb9578d0..a28c04473 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -38,7 +38,7 @@ void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) {
38} 38}
39 39
40void MaxwellDMA::HandleCopy() { 40void MaxwellDMA::HandleCopy() {
41 LOG_WARNING(HW_GPU, "Requested a DMA copy"); 41 LOG_TRACE(HW_GPU, "Requested a DMA copy");
42 42
43 const GPUVAddr source = regs.src_address.Address(); 43 const GPUVAddr source = regs.src_address.Address();
44 const GPUVAddr dest = regs.dst_address.Address(); 44 const GPUVAddr dest = regs.dst_address.Address();
@@ -58,7 +58,7 @@ void MaxwellDMA::HandleCopy() {
58 } 58 }
59 59
60 // All copies here update the main memory, so mark all rasterizer states as invalid. 60 // All copies here update the main memory, so mark all rasterizer states as invalid.
61 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); 61 system.GPU().Maxwell3D().dirty.OnMemoryWrite();
62 62
63 if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { 63 if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {
64 // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D 64 // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 404d4f5aa..8520a0143 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -78,7 +78,7 @@ union Attribute {
78 constexpr explicit Attribute(u64 value) : value(value) {} 78 constexpr explicit Attribute(u64 value) : value(value) {}
79 79
80 enum class Index : u64 { 80 enum class Index : u64 {
81 PointSize = 6, 81 LayerViewportPointSize = 6,
82 Position = 7, 82 Position = 7,
83 Attribute_0 = 8, 83 Attribute_0 = 8,
84 Attribute_31 = 39, 84 Attribute_31 = 39,
@@ -931,8 +931,6 @@ union Instruction {
931 } csetp; 931 } csetp;
932 932
933 union { 933 union {
934 BitField<35, 4, PredCondition> cond;
935 BitField<49, 1, u64> h_and;
936 BitField<6, 1, u64> ftz; 934 BitField<6, 1, u64> ftz;
937 BitField<45, 2, PredOperation> op; 935 BitField<45, 2, PredOperation> op;
938 BitField<3, 3, u64> pred3; 936 BitField<3, 3, u64> pred3;
@@ -940,9 +938,21 @@ union Instruction {
940 BitField<43, 1, u64> negate_a; 938 BitField<43, 1, u64> negate_a;
941 BitField<44, 1, u64> abs_a; 939 BitField<44, 1, u64> abs_a;
942 BitField<47, 2, HalfType> type_a; 940 BitField<47, 2, HalfType> type_a;
943 BitField<31, 1, u64> negate_b; 941 union {
944 BitField<30, 1, u64> abs_b; 942 BitField<35, 4, PredCondition> cond;
945 BitField<28, 2, HalfType> type_b; 943 BitField<49, 1, u64> h_and;
944 BitField<31, 1, u64> negate_b;
945 BitField<30, 1, u64> abs_b;
946 BitField<28, 2, HalfType> type_b;
947 } reg;
948 union {
949 BitField<56, 1, u64> negate_b;
950 BitField<54, 1, u64> abs_b;
951 } cbuf;
952 union {
953 BitField<49, 4, PredCondition> cond;
954 BitField<53, 1, u64> h_and;
955 } cbuf_and_imm;
946 BitField<42, 1, u64> neg_pred; 956 BitField<42, 1, u64> neg_pred;
947 BitField<39, 3, u64> pred39; 957 BitField<39, 3, u64> pred39;
948 } hsetp2; 958 } hsetp2;
@@ -1278,6 +1288,7 @@ union Instruction {
1278 union { 1288 union {
1279 BitField<49, 1, u64> nodep_flag; 1289 BitField<49, 1, u64> nodep_flag;
1280 BitField<53, 4, u64> texture_info; 1290 BitField<53, 4, u64> texture_info;
1291 BitField<59, 1, u64> fp32_flag;
1281 1292
1282 TextureType GetTextureType() const { 1293 TextureType GetTextureType() const {
1283 // The TLDS instruction has a weird encoding for the texture type. 1294 // The TLDS instruction has a weird encoding for the texture type.
@@ -1368,6 +1379,20 @@ union Instruction {
1368 } bra; 1379 } bra;
1369 1380
1370 union { 1381 union {
1382 BitField<20, 24, u64> target;
1383 BitField<5, 1, u64> constant_buffer;
1384
1385 s32 GetBranchExtend() const {
1386 // Sign extend the branch target offset
1387 u32 mask = 1U << (24 - 1);
1388 u32 value = static_cast<u32>(target);
1389 // The branch offset is relative to the next instruction and is stored in bytes, so
1390 // divide it by the size of an instruction and add 1 to it.
1391 return static_cast<s32>((value ^ mask) - mask) / sizeof(Instruction) + 1;
1392 }
1393 } brx;
1394
1395 union {
1371 BitField<39, 1, u64> emit; // EmitVertex 1396 BitField<39, 1, u64> emit; // EmitVertex
1372 BitField<40, 1, u64> cut; // EndPrimitive 1397 BitField<40, 1, u64> cut; // EndPrimitive
1373 } out; 1398 } out;
@@ -1464,6 +1489,7 @@ public:
1464 BFE_IMM, 1489 BFE_IMM,
1465 BFI_IMM_R, 1490 BFI_IMM_R,
1466 BRA, 1491 BRA,
1492 BRX,
1467 PBK, 1493 PBK,
1468 LD_A, 1494 LD_A,
1469 LD_L, 1495 LD_L,
@@ -1532,7 +1558,9 @@ public:
1532 HFMA2_RC, 1558 HFMA2_RC,
1533 HFMA2_RR, 1559 HFMA2_RR,
1534 HFMA2_IMM_R, 1560 HFMA2_IMM_R,
1561 HSETP2_C,
1535 HSETP2_R, 1562 HSETP2_R,
1563 HSETP2_IMM,
1536 HSET2_R, 1564 HSET2_R,
1537 POPC_C, 1565 POPC_C,
1538 POPC_R, 1566 POPC_R,
@@ -1738,6 +1766,7 @@ private:
1738 INST("111000101001----", Id::SSY, Type::Flow, "SSY"), 1766 INST("111000101001----", Id::SSY, Type::Flow, "SSY"),
1739 INST("111000101010----", Id::PBK, Type::Flow, "PBK"), 1767 INST("111000101010----", Id::PBK, Type::Flow, "PBK"),
1740 INST("111000100100----", Id::BRA, Type::Flow, "BRA"), 1768 INST("111000100100----", Id::BRA, Type::Flow, "BRA"),
1769 INST("111000100101----", Id::BRX, Type::Flow, "BRX"),
1741 INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"), 1770 INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"),
1742 INST("111000110100---", Id::BRK, Type::Flow, "BRK"), 1771 INST("111000110100---", Id::BRK, Type::Flow, "BRK"),
1743 INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), 1772 INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"),
@@ -1760,7 +1789,7 @@ private:
1760 INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"), 1789 INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"),
1761 INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"), 1790 INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"),
1762 INST("11011100--11----", Id::TLD, Type::Texture, "TLD"), 1791 INST("11011100--11----", Id::TLD, Type::Texture, "TLD"),
1763 INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"), 1792 INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"),
1764 INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), 1793 INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
1765 INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"), 1794 INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
1766 INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), 1795 INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
@@ -1814,7 +1843,9 @@ private:
1814 INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"), 1843 INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"),
1815 INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"), 1844 INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"),
1816 INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"), 1845 INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"),
1817 INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP_R"), 1846 INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"),
1847 INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"),
1848 INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"),
1818 INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), 1849 INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"),
1819 INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), 1850 INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
1820 INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), 1851 INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 52706505b..21007d8b2 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -31,7 +31,7 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
31 31
32GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} { 32GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} {
33 auto& rasterizer{renderer.Rasterizer()}; 33 auto& rasterizer{renderer.Rasterizer()};
34 memory_manager = std::make_unique<Tegra::MemoryManager>(rasterizer); 34 memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer);
35 dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); 35 dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
36 maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); 36 maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
37 fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); 37 fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
@@ -50,6 +50,14 @@ const Engines::Maxwell3D& GPU::Maxwell3D() const {
50 return *maxwell_3d; 50 return *maxwell_3d;
51} 51}
52 52
53Engines::KeplerCompute& GPU::KeplerCompute() {
54 return *kepler_compute;
55}
56
57const Engines::KeplerCompute& GPU::KeplerCompute() const {
58 return *kepler_compute;
59}
60
53MemoryManager& GPU::MemoryManager() { 61MemoryManager& GPU::MemoryManager() {
54 return *memory_manager; 62 return *memory_manager;
55} 63}
@@ -143,12 +151,12 @@ enum class BufferMethods {
143 NotifyIntr = 0x8, 151 NotifyIntr = 0x8,
144 WrcacheFlush = 0x9, 152 WrcacheFlush = 0x9,
145 Unk28 = 0xA, 153 Unk28 = 0xA,
146 Unk2c = 0xB, 154 UnkCacheFlush = 0xB,
147 RefCnt = 0x14, 155 RefCnt = 0x14,
148 SemaphoreAcquire = 0x1A, 156 SemaphoreAcquire = 0x1A,
149 SemaphoreRelease = 0x1B, 157 SemaphoreRelease = 0x1B,
150 Unk70 = 0x1C, 158 FenceValue = 0x1C,
151 Unk74 = 0x1D, 159 FenceAction = 0x1D,
152 Unk78 = 0x1E, 160 Unk78 = 0x1E,
153 Unk7c = 0x1F, 161 Unk7c = 0x1F,
154 Yield = 0x20, 162 Yield = 0x20,
@@ -194,6 +202,10 @@ void GPU::CallPullerMethod(const MethodCall& method_call) {
194 case BufferMethods::SemaphoreAddressLow: 202 case BufferMethods::SemaphoreAddressLow:
195 case BufferMethods::SemaphoreSequence: 203 case BufferMethods::SemaphoreSequence:
196 case BufferMethods::RefCnt: 204 case BufferMethods::RefCnt:
205 case BufferMethods::UnkCacheFlush:
206 case BufferMethods::WrcacheFlush:
207 case BufferMethods::FenceValue:
208 case BufferMethods::FenceAction:
197 break; 209 break;
198 case BufferMethods::SemaphoreTrigger: { 210 case BufferMethods::SemaphoreTrigger: {
199 ProcessSemaphoreTriggerMethod(); 211 ProcessSemaphoreTriggerMethod();
@@ -204,21 +216,11 @@ void GPU::CallPullerMethod(const MethodCall& method_call) {
204 LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented"); 216 LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented");
205 break; 217 break;
206 } 218 }
207 case BufferMethods::WrcacheFlush: {
208 // TODO(Kmather73): Research and implement this method.
209 LOG_ERROR(HW_GPU, "Special puller engine method WrcacheFlush not implemented");
210 break;
211 }
212 case BufferMethods::Unk28: { 219 case BufferMethods::Unk28: {
213 // TODO(Kmather73): Research and implement this method. 220 // TODO(Kmather73): Research and implement this method.
214 LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented"); 221 LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented");
215 break; 222 break;
216 } 223 }
217 case BufferMethods::Unk2c: {
218 // TODO(Kmather73): Research and implement this method.
219 LOG_ERROR(HW_GPU, "Special puller engine method Unk2c not implemented");
220 break;
221 }
222 case BufferMethods::SemaphoreAcquire: { 224 case BufferMethods::SemaphoreAcquire: {
223 ProcessSemaphoreAcquire(); 225 ProcessSemaphoreAcquire();
224 break; 226 break;
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index fe6628923..0055e5326 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -155,6 +155,12 @@ public:
155 /// Returns a const reference to the Maxwell3D GPU engine. 155 /// Returns a const reference to the Maxwell3D GPU engine.
156 const Engines::Maxwell3D& Maxwell3D() const; 156 const Engines::Maxwell3D& Maxwell3D() const;
157 157
158 /// Returns a reference to the KeplerCompute GPU engine.
159 Engines::KeplerCompute& KeplerCompute();
160
161 /// Returns a reference to the KeplerCompute GPU engine.
162 const Engines::KeplerCompute& KeplerCompute() const;
163
158 /// Returns a reference to the GPU memory manager. 164 /// Returns a reference to the GPU memory manager.
159 Tegra::MemoryManager& MemoryManager(); 165 Tegra::MemoryManager& MemoryManager();
160 166
@@ -194,7 +200,12 @@ public:
194 200
195 u32 semaphore_acquire; 201 u32 semaphore_acquire;
196 u32 semaphore_release; 202 u32 semaphore_release;
197 INSERT_PADDING_WORDS(0xE4); 203 u32 fence_value;
204 union {
205 BitField<4, 4, u32> operation;
206 BitField<8, 8, u32> id;
207 } fence_action;
208 INSERT_PADDING_WORDS(0xE2);
198 209
199 // Puller state 210 // Puller state
200 u32 acquire_mode; 211 u32 acquire_mode;
@@ -274,6 +285,8 @@ ASSERT_REG_POSITION(semaphore_trigger, 0x7);
274ASSERT_REG_POSITION(reference_count, 0x14); 285ASSERT_REG_POSITION(reference_count, 0x14);
275ASSERT_REG_POSITION(semaphore_acquire, 0x1A); 286ASSERT_REG_POSITION(semaphore_acquire, 0x1A);
276ASSERT_REG_POSITION(semaphore_release, 0x1B); 287ASSERT_REG_POSITION(semaphore_release, 0x1B);
288ASSERT_REG_POSITION(fence_value, 0x1C);
289ASSERT_REG_POSITION(fence_action, 0x1D);
277 290
278ASSERT_REG_POSITION(acquire_mode, 0x100); 291ASSERT_REG_POSITION(acquire_mode, 0x100);
279ASSERT_REG_POSITION(acquire_source, 0x101); 292ASSERT_REG_POSITION(acquire_source, 0x101);
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp
index c766ed692..9f59a2dc1 100644
--- a/src/video_core/macro_interpreter.cpp
+++ b/src/video_core/macro_interpreter.cpp
@@ -4,14 +4,18 @@
4 4
5#include "common/assert.h" 5#include "common/assert.h"
6#include "common/logging/log.h" 6#include "common/logging/log.h"
7#include "common/microprofile.h"
7#include "video_core/engines/maxwell_3d.h" 8#include "video_core/engines/maxwell_3d.h"
8#include "video_core/macro_interpreter.h" 9#include "video_core/macro_interpreter.h"
9 10
11MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192));
12
10namespace Tegra { 13namespace Tegra {
11 14
12MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} 15MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
13 16
14void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) { 17void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) {
18 MICROPROFILE_SCOPE(MacroInterp);
15 Reset(); 19 Reset();
16 registers[1] = parameters[0]; 20 registers[1] = parameters[0];
17 this->parameters = std::move(parameters); 21 this->parameters = std::move(parameters);
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 322453116..bffae940c 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -5,13 +5,17 @@
5#include "common/alignment.h" 5#include "common/alignment.h"
6#include "common/assert.h" 6#include "common/assert.h"
7#include "common/logging/log.h" 7#include "common/logging/log.h"
8#include "core/core.h"
9#include "core/hle/kernel/process.h"
10#include "core/hle/kernel/vm_manager.h"
8#include "core/memory.h" 11#include "core/memory.h"
9#include "video_core/memory_manager.h" 12#include "video_core/memory_manager.h"
10#include "video_core/rasterizer_interface.h" 13#include "video_core/rasterizer_interface.h"
11 14
12namespace Tegra { 15namespace Tegra {
13 16
14MemoryManager::MemoryManager(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} { 17MemoryManager::MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
18 : rasterizer{rasterizer}, system{system} {
15 std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr); 19 std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr);
16 std::fill(page_table.attributes.begin(), page_table.attributes.end(), 20 std::fill(page_table.attributes.begin(), page_table.attributes.end(),
17 Common::PageType::Unmapped); 21 Common::PageType::Unmapped);
@@ -49,6 +53,11 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) {
49 const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)}; 53 const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)};
50 54
51 MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr); 55 MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr);
56 ASSERT(system.CurrentProcess()
57 ->VMManager()
58 .SetMemoryAttribute(cpu_addr, size, Kernel::MemoryAttribute::DeviceMapped,
59 Kernel::MemoryAttribute::DeviceMapped)
60 .IsSuccess());
52 61
53 return gpu_addr; 62 return gpu_addr;
54} 63}
@@ -59,7 +68,11 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size)
59 const u64 aligned_size{Common::AlignUp(size, page_size)}; 68 const u64 aligned_size{Common::AlignUp(size, page_size)};
60 69
61 MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr); 70 MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr);
62 71 ASSERT(system.CurrentProcess()
72 ->VMManager()
73 .SetMemoryAttribute(cpu_addr, size, Kernel::MemoryAttribute::DeviceMapped,
74 Kernel::MemoryAttribute::DeviceMapped)
75 .IsSuccess());
63 return gpu_addr; 76 return gpu_addr;
64} 77}
65 78
@@ -68,9 +81,16 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
68 81
69 const u64 aligned_size{Common::AlignUp(size, page_size)}; 82 const u64 aligned_size{Common::AlignUp(size, page_size)};
70 const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))}; 83 const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))};
84 const auto cpu_addr = GpuToCpuAddress(gpu_addr);
85 ASSERT(cpu_addr);
71 86
72 rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size); 87 rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size);
73 UnmapRange(gpu_addr, aligned_size); 88 UnmapRange(gpu_addr, aligned_size);
89 ASSERT(system.CurrentProcess()
90 ->VMManager()
91 .SetMemoryAttribute(cpu_addr.value(), size, Kernel::MemoryAttribute::DeviceMapped,
92 Kernel::MemoryAttribute::None)
93 .IsSuccess());
74 94
75 return gpu_addr; 95 return gpu_addr;
76} 96}
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 43a84bd52..aea010087 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -14,6 +14,10 @@ namespace VideoCore {
14class RasterizerInterface; 14class RasterizerInterface;
15} 15}
16 16
17namespace Core {
18class System;
19}
20
17namespace Tegra { 21namespace Tegra {
18 22
19/** 23/**
@@ -47,7 +51,7 @@ struct VirtualMemoryArea {
47 51
48class MemoryManager final { 52class MemoryManager final {
49public: 53public:
50 explicit MemoryManager(VideoCore::RasterizerInterface& rasterizer); 54 explicit MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer);
51 ~MemoryManager(); 55 ~MemoryManager();
52 56
53 GPUVAddr AllocateSpace(u64 size, u64 align); 57 GPUVAddr AllocateSpace(u64 size, u64 align);
@@ -173,6 +177,8 @@ private:
173 Common::PageTable page_table{page_bits}; 177 Common::PageTable page_table{page_bits};
174 VMAMap vma_map; 178 VMAMap vma_map;
175 VideoCore::RasterizerInterface& rasterizer; 179 VideoCore::RasterizerInterface& rasterizer;
180
181 Core::System& system;
176}; 182};
177 183
178} // namespace Tegra 184} // namespace Tegra
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 5ee4f8e8e..9881df0d5 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -34,6 +34,9 @@ public:
34 /// Clear the current framebuffer 34 /// Clear the current framebuffer
35 virtual void Clear() = 0; 35 virtual void Clear() = 0;
36 36
37 /// Dispatches a compute shader invocation
38 virtual void DispatchCompute(GPUVAddr code_addr) = 0;
39
37 /// Notify rasterizer that all caches should be flushed to Switch memory 40 /// Notify rasterizer that all caches should be flushed to Switch memory
38 virtual void FlushAll() = 0; 41 virtual void FlushAll() = 0;
39 42
@@ -47,6 +50,9 @@ public:
47 /// and invalidated 50 /// and invalidated
48 virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; 51 virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
49 52
53 /// Notify rasterizer that a frame is about to finish
54 virtual void TickFrame() = 0;
55
50 /// Attempt to use a faster method to perform a surface copy 56 /// Attempt to use a faster method to perform a surface copy
51 virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 57 virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
52 const Tegra::Engines::Fermi2D::Regs::Surface& dst, 58 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 2b9bd142e..2a9b523f5 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -2,103 +2,57 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cstring>
6#include <memory> 5#include <memory>
7 6
8#include "common/alignment.h" 7#include <glad/glad.h>
9#include "core/core.h" 8
10#include "video_core/memory_manager.h" 9#include "common/assert.h"
11#include "video_core/renderer_opengl/gl_buffer_cache.h" 10#include "video_core/renderer_opengl/gl_buffer_cache.h"
12#include "video_core/renderer_opengl/gl_rasterizer.h" 11#include "video_core/renderer_opengl/gl_rasterizer.h"
12#include "video_core/renderer_opengl/gl_resource_manager.h"
13 13
14namespace OpenGL { 14namespace OpenGL {
15 15
16CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset, 16OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
17 std::size_t alignment, u8* host_ptr) 17 std::size_t stream_size)
18 : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset}, 18 : VideoCommon::BufferCache<OGLBuffer, GLuint, OGLStreamBuffer>{
19 alignment{alignment} {} 19 rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {}
20
21OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size)
22 : RasterizerCache{rasterizer}, stream_buffer(size, true) {}
23
24GLintptr OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment,
25 bool cache) {
26 std::lock_guard lock{mutex};
27 auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
28
29 // Cache management is a big overhead, so only cache entries with a given size.
30 // TODO: Figure out which size is the best for given games.
31 cache &= size >= 2048;
32
33 const auto& host_ptr{memory_manager.GetPointer(gpu_addr)};
34 if (cache) {
35 auto entry = TryGet(host_ptr);
36 if (entry) {
37 if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
38 return entry->GetOffset();
39 }
40 Unregister(entry);
41 }
42 }
43 20
44 AlignBuffer(alignment); 21OGLBufferCache::~OGLBufferCache() = default;
45 const GLintptr uploaded_offset = buffer_offset;
46 22
47 if (!host_ptr) { 23OGLBuffer OGLBufferCache::CreateBuffer(std::size_t size) {
48 return uploaded_offset; 24 OGLBuffer buffer;
49 } 25 buffer.Create();
50 26 glNamedBufferData(buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
51 std::memcpy(buffer_ptr, host_ptr, size); 27 return buffer;
52 buffer_ptr += size;
53 buffer_offset += size;
54
55 if (cache) {
56 auto entry = std::make_shared<CachedBufferEntry>(
57 *memory_manager.GpuToCpuAddress(gpu_addr), size, uploaded_offset, alignment, host_ptr);
58 Register(entry);
59 }
60
61 return uploaded_offset;
62} 28}
63 29
64GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t size, 30const GLuint* OGLBufferCache::ToHandle(const OGLBuffer& buffer) {
65 std::size_t alignment) { 31 return &buffer.handle;
66 std::lock_guard lock{mutex};
67 AlignBuffer(alignment);
68 std::memcpy(buffer_ptr, raw_pointer, size);
69 const GLintptr uploaded_offset = buffer_offset;
70
71 buffer_ptr += size;
72 buffer_offset += size;
73 return uploaded_offset;
74} 32}
75 33
76bool OGLBufferCache::Map(std::size_t max_size) { 34const GLuint* OGLBufferCache::GetEmptyBuffer(std::size_t) {
77 bool invalidate; 35 static const GLuint null_buffer = 0;
78 std::tie(buffer_ptr, buffer_offset_base, invalidate) = 36 return &null_buffer;
79 stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4);
80 buffer_offset = buffer_offset_base;
81
82 if (invalidate) {
83 InvalidateAll();
84 }
85 return invalidate;
86} 37}
87 38
88void OGLBufferCache::Unmap() { 39void OGLBufferCache::UploadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size,
89 stream_buffer.Unmap(buffer_offset - buffer_offset_base); 40 const u8* data) {
41 glNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset),
42 static_cast<GLsizeiptr>(size), data);
90} 43}
91 44
92GLuint OGLBufferCache::GetHandle() const { 45void OGLBufferCache::DownloadBufferData(const OGLBuffer& buffer, std::size_t offset,
93 return stream_buffer.GetHandle(); 46 std::size_t size, u8* data) {
47 glGetNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset),
48 static_cast<GLsizeiptr>(size), data);
94} 49}
95 50
96void OGLBufferCache::AlignBuffer(std::size_t alignment) { 51void OGLBufferCache::CopyBufferData(const OGLBuffer& src, const OGLBuffer& dst,
97 // Align the offset, not the mapped pointer 52 std::size_t src_offset, std::size_t dst_offset,
98 const GLintptr offset_aligned = 53 std::size_t size) {
99 static_cast<GLintptr>(Common::AlignUp(static_cast<std::size_t>(buffer_offset), alignment)); 54 glCopyNamedBufferSubData(src.handle, dst.handle, static_cast<GLintptr>(src_offset),
100 buffer_ptr += offset_aligned - buffer_offset; 55 static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size));
101 buffer_offset = offset_aligned;
102} 56}
103 57
104} // namespace OpenGL 58} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index f2347581b..8c8ac4038 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -4,80 +4,44 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <cstddef>
8#include <memory> 7#include <memory>
9#include <tuple>
10 8
11#include "common/common_types.h" 9#include "common/common_types.h"
10#include "video_core/buffer_cache.h"
12#include "video_core/rasterizer_cache.h" 11#include "video_core/rasterizer_cache.h"
13#include "video_core/renderer_opengl/gl_resource_manager.h" 12#include "video_core/renderer_opengl/gl_resource_manager.h"
14#include "video_core/renderer_opengl/gl_stream_buffer.h" 13#include "video_core/renderer_opengl/gl_stream_buffer.h"
15 14
15namespace Core {
16class System;
17}
18
16namespace OpenGL { 19namespace OpenGL {
17 20
21class OGLStreamBuffer;
18class RasterizerOpenGL; 22class RasterizerOpenGL;
19 23
20class CachedBufferEntry final : public RasterizerCacheObject { 24class OGLBufferCache final : public VideoCommon::BufferCache<OGLBuffer, GLuint, OGLStreamBuffer> {
21public:
22 explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset,
23 std::size_t alignment, u8* host_ptr);
24
25 VAddr GetCpuAddr() const override {
26 return cpu_addr;
27 }
28
29 std::size_t GetSizeInBytes() const override {
30 return size;
31 }
32
33 std::size_t GetSize() const {
34 return size;
35 }
36
37 GLintptr GetOffset() const {
38 return offset;
39 }
40
41 std::size_t GetAlignment() const {
42 return alignment;
43 }
44
45private:
46 VAddr cpu_addr{};
47 std::size_t size{};
48 GLintptr offset{};
49 std::size_t alignment{};
50};
51
52class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
53public: 25public:
54 explicit OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size); 26 explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
55 27 std::size_t stream_size);
56 /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been 28 ~OGLBufferCache();
57 /// allocated.
58 GLintptr UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
59 bool cache = true);
60 29
61 /// Uploads from a host memory. Returns host's buffer offset where it's been allocated. 30 const GLuint* GetEmptyBuffer(std::size_t) override;
62 GLintptr UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4);
63
64 bool Map(std::size_t max_size);
65 void Unmap();
66
67 GLuint GetHandle() const;
68 31
69protected: 32protected:
70 void AlignBuffer(std::size_t alignment); 33 OGLBuffer CreateBuffer(std::size_t size) override;
34
35 const GLuint* ToHandle(const OGLBuffer& buffer) override;
71 36
72 // We do not have to flush this cache as things in it are never modified by us. 37 void UploadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size,
73 void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {} 38 const u8* data) override;
74 39
75private: 40 void DownloadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size,
76 OGLStreamBuffer stream_buffer; 41 u8* data) override;
77 42
78 u8* buffer_ptr = nullptr; 43 void CopyBufferData(const OGLBuffer& src, const OGLBuffer& dst, std::size_t src_offset,
79 GLintptr buffer_offset = 0; 44 std::size_t dst_offset, std::size_t size) override;
80 GLintptr buffer_offset_base = 0;
81}; 45};
82 46
83} // namespace OpenGL 47} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index a48e14d2e..85424a4c9 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -24,8 +24,10 @@ T GetInteger(GLenum pname) {
24 24
25Device::Device() { 25Device::Device() {
26 uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); 26 uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
27 shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
27 max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); 28 max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
28 max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); 29 max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
30 has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;
29 has_variable_aoffi = TestVariableAoffi(); 31 has_variable_aoffi = TestVariableAoffi();
30 has_component_indexing_bug = TestComponentIndexingBug(); 32 has_component_indexing_bug = TestComponentIndexingBug();
31} 33}
@@ -34,6 +36,7 @@ Device::Device(std::nullptr_t) {
34 uniform_buffer_alignment = 0; 36 uniform_buffer_alignment = 0;
35 max_vertex_attributes = 16; 37 max_vertex_attributes = 16;
36 max_varyings = 15; 38 max_varyings = 15;
39 has_vertex_viewport_layer = true;
37 has_variable_aoffi = true; 40 has_variable_aoffi = true;
38 has_component_indexing_bug = false; 41 has_component_indexing_bug = false;
39} 42}
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 8c8c93760..dc883722d 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -18,6 +18,10 @@ public:
18 return uniform_buffer_alignment; 18 return uniform_buffer_alignment;
19 } 19 }
20 20
21 std::size_t GetShaderStorageBufferAlignment() const {
22 return shader_storage_alignment;
23 }
24
21 u32 GetMaxVertexAttributes() const { 25 u32 GetMaxVertexAttributes() const {
22 return max_vertex_attributes; 26 return max_vertex_attributes;
23 } 27 }
@@ -26,6 +30,10 @@ public:
26 return max_varyings; 30 return max_varyings;
27 } 31 }
28 32
33 bool HasVertexViewportLayer() const {
34 return has_vertex_viewport_layer;
35 }
36
29 bool HasVariableAoffi() const { 37 bool HasVariableAoffi() const {
30 return has_variable_aoffi; 38 return has_variable_aoffi;
31 } 39 }
@@ -39,8 +47,10 @@ private:
39 static bool TestComponentIndexingBug(); 47 static bool TestComponentIndexingBug();
40 48
41 std::size_t uniform_buffer_alignment{}; 49 std::size_t uniform_buffer_alignment{};
50 std::size_t shader_storage_alignment{};
42 u32 max_vertex_attributes{}; 51 u32 max_vertex_attributes{};
43 u32 max_varyings{}; 52 u32 max_varyings{};
53 bool has_vertex_viewport_layer{};
44 bool has_variable_aoffi{}; 54 bool has_variable_aoffi{};
45 bool has_component_indexing_bug{}; 55 bool has_component_indexing_bug{};
46}; 56};
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp
deleted file mode 100644
index d5e385151..000000000
--- a/src/video_core/renderer_opengl/gl_global_cache.cpp
+++ /dev/null
@@ -1,102 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <glad/glad.h>
6
7#include "common/logging/log.h"
8#include "core/core.h"
9#include "video_core/memory_manager.h"
10#include "video_core/renderer_opengl/gl_global_cache.h"
11#include "video_core/renderer_opengl/gl_rasterizer.h"
12#include "video_core/renderer_opengl/gl_shader_decompiler.h"
13#include "video_core/renderer_opengl/utils.h"
14
15namespace OpenGL {
16
17CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size)
18 : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, host_ptr{host_ptr}, size{size},
19 max_size{max_size} {
20 buffer.Create();
21 LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory");
22}
23
24CachedGlobalRegion::~CachedGlobalRegion() = default;
25
26void CachedGlobalRegion::Reload(u32 size_) {
27 size = size_;
28 if (size > max_size) {
29 size = max_size;
30 LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the supported size {}!", size_,
31 max_size);
32 }
33 glNamedBufferData(buffer.handle, size, host_ptr, GL_STREAM_DRAW);
34}
35
36void CachedGlobalRegion::Flush() {
37 LOG_DEBUG(Render_OpenGL, "Flushing {} bytes to CPU memory address 0x{:16}", size, cpu_addr);
38 glGetNamedBufferSubData(buffer.handle, 0, static_cast<GLsizeiptr>(size), host_ptr);
39}
40
41GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const {
42 const auto search{reserve.find(addr)};
43 if (search == reserve.end()) {
44 return {};
45 }
46 return search->second;
47}
48
49GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr,
50 u32 size) {
51 GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)};
52 if (!region) {
53 // No reserved surface available, create a new one and reserve it
54 auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
55 const auto cpu_addr{memory_manager.GpuToCpuAddress(addr)};
56 ASSERT(cpu_addr);
57
58 region = std::make_shared<CachedGlobalRegion>(*cpu_addr, host_ptr, size, max_ssbo_size);
59 ReserveGlobalRegion(region);
60 }
61 region->Reload(size);
62 return region;
63}
64
65void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) {
66 reserve.insert_or_assign(region->GetCacheAddr(), std::move(region));
67}
68
69GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
70 : RasterizerCache{rasterizer} {
71 GLint max_ssbo_size_;
72 glGetIntegerv(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &max_ssbo_size_);
73 max_ssbo_size = static_cast<u32>(max_ssbo_size_);
74}
75
76GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
77 const GLShader::GlobalMemoryEntry& global_region,
78 Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) {
79 std::lock_guard lock{mutex};
80
81 auto& gpu{Core::System::GetInstance().GPU()};
82 auto& memory_manager{gpu.MemoryManager()};
83 const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]};
84 const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address +
85 global_region.GetCbufOffset()};
86 const auto actual_addr{memory_manager.Read<u64>(addr)};
87 const auto size{memory_manager.Read<u32>(addr + 8)};
88
89 // Look up global region in the cache based on address
90 const auto& host_ptr{memory_manager.GetPointer(actual_addr)};
91 GlobalRegion region{TryGet(host_ptr)};
92
93 if (!region) {
94 // No global region found - create a new one
95 region = GetUncachedGlobalRegion(actual_addr, host_ptr, size);
96 Register(region);
97 }
98
99 return region;
100}
101
102} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h
deleted file mode 100644
index 2d467a240..000000000
--- a/src/video_core/renderer_opengl/gl_global_cache.h
+++ /dev/null
@@ -1,82 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <unordered_map>
9
10#include <glad/glad.h>
11
12#include "common/assert.h"
13#include "common/common_types.h"
14#include "video_core/engines/maxwell_3d.h"
15#include "video_core/rasterizer_cache.h"
16#include "video_core/renderer_opengl/gl_resource_manager.h"
17
18namespace OpenGL {
19
20namespace GLShader {
21class GlobalMemoryEntry;
22}
23
24class RasterizerOpenGL;
25class CachedGlobalRegion;
26using GlobalRegion = std::shared_ptr<CachedGlobalRegion>;
27
28class CachedGlobalRegion final : public RasterizerCacheObject {
29public:
30 explicit CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size);
31 ~CachedGlobalRegion();
32
33 VAddr GetCpuAddr() const override {
34 return cpu_addr;
35 }
36
37 std::size_t GetSizeInBytes() const override {
38 return size;
39 }
40
41 /// Gets the GL program handle for the buffer
42 GLuint GetBufferHandle() const {
43 return buffer.handle;
44 }
45
46 /// Reloads the global region from guest memory
47 void Reload(u32 size_);
48
49 void Flush();
50
51private:
52 VAddr cpu_addr{};
53 u8* host_ptr{};
54 u32 size{};
55 u32 max_size{};
56
57 OGLBuffer buffer;
58};
59
60class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> {
61public:
62 explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer);
63
64 /// Gets the current specified shader stage program
65 GlobalRegion GetGlobalRegion(const GLShader::GlobalMemoryEntry& descriptor,
66 Tegra::Engines::Maxwell3D::Regs::ShaderStage stage);
67
68protected:
69 void FlushObjectInner(const GlobalRegion& object) override {
70 object->Flush();
71 }
72
73private:
74 GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const;
75 GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, u32 size);
76 void ReserveGlobalRegion(GlobalRegion region);
77
78 std::unordered_map<CacheAddr, GlobalRegion> reserve;
79 u32 max_ssbo_size{};
80};
81
82} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index f45a3c5ef..c28ae795c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -4,6 +4,7 @@
4 4
5#include <algorithm> 5#include <algorithm>
6#include <array> 6#include <array>
7#include <bitset>
7#include <memory> 8#include <memory>
8#include <string> 9#include <string>
9#include <string_view> 10#include <string_view>
@@ -19,7 +20,9 @@
19#include "core/core.h" 20#include "core/core.h"
20#include "core/hle/kernel/process.h" 21#include "core/hle/kernel/process.h"
21#include "core/settings.h" 22#include "core/settings.h"
23#include "video_core/engines/kepler_compute.h"
22#include "video_core/engines/maxwell_3d.h" 24#include "video_core/engines/maxwell_3d.h"
25#include "video_core/memory_manager.h"
23#include "video_core/renderer_opengl/gl_rasterizer.h" 26#include "video_core/renderer_opengl/gl_rasterizer.h"
24#include "video_core/renderer_opengl/gl_shader_cache.h" 27#include "video_core/renderer_opengl/gl_shader_cache.h"
25#include "video_core/renderer_opengl/gl_shader_gen.h" 28#include "video_core/renderer_opengl/gl_shader_gen.h"
@@ -80,16 +83,31 @@ struct DrawParameters {
80 } 83 }
81}; 84};
82 85
86static std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
87 const GLShader::ConstBufferEntry& entry) {
88 if (!entry.IsIndirect()) {
89 return entry.GetSize();
90 }
91
92 if (buffer.size > Maxwell::MaxConstBufferSize) {
93 LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size,
94 Maxwell::MaxConstBufferSize);
95 return Maxwell::MaxConstBufferSize;
96 }
97
98 return buffer.size;
99}
100
83RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, 101RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
84 ScreenInfo& info) 102 ScreenInfo& info)
85 : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device}, 103 : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device},
86 global_cache{*this}, system{system}, screen_info{info}, 104 system{system}, screen_info{info}, buffer_cache{*this, system, STREAM_BUFFER_SIZE} {
87 buffer_cache(*this, STREAM_BUFFER_SIZE) {
88 OpenGLState::ApplyDefaultState(); 105 OpenGLState::ApplyDefaultState();
89 106
90 shader_program_manager = std::make_unique<GLShader::ProgramManager>(); 107 shader_program_manager = std::make_unique<GLShader::ProgramManager>();
91 state.draw.shader_program = 0; 108 state.draw.shader_program = 0;
92 state.Apply(); 109 state.Apply();
110 clear_framebuffer.Create();
93 111
94 LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here"); 112 LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here");
95 CheckExtensions(); 113 CheckExtensions();
@@ -109,10 +127,10 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
109 auto& gpu = system.GPU().Maxwell3D(); 127 auto& gpu = system.GPU().Maxwell3D();
110 const auto& regs = gpu.regs; 128 const auto& regs = gpu.regs;
111 129
112 if (!gpu.dirty_flags.vertex_attrib_format) { 130 if (!gpu.dirty.vertex_attrib_format) {
113 return state.draw.vertex_array; 131 return state.draw.vertex_array;
114 } 132 }
115 gpu.dirty_flags.vertex_attrib_format = false; 133 gpu.dirty.vertex_attrib_format = false;
116 134
117 MICROPROFILE_SCOPE(OpenGL_VAO); 135 MICROPROFILE_SCOPE(OpenGL_VAO);
118 136
@@ -129,8 +147,6 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
129 state.draw.vertex_array = vao; 147 state.draw.vertex_array = vao;
130 state.ApplyVertexArrayState(); 148 state.ApplyVertexArrayState();
131 149
132 glVertexArrayElementBuffer(vao, buffer_cache.GetHandle());
133
134 // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. 150 // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL.
135 // Enables the first 16 vertex attributes always, as we don't know which ones are actually 151 // Enables the first 16 vertex attributes always, as we don't know which ones are actually
136 // used until shader time. Note, Tegra technically supports 32, but we're capping this to 16 152 // used until shader time. Note, Tegra technically supports 32, but we're capping this to 16
@@ -168,7 +184,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
168 } 184 }
169 185
170 // Rebinding the VAO invalidates the vertex buffer bindings. 186 // Rebinding the VAO invalidates the vertex buffer bindings.
171 gpu.dirty_flags.vertex_array.set(); 187 gpu.dirty.ResetVertexArrays();
172 188
173 state.draw.vertex_array = vao_entry.handle; 189 state.draw.vertex_array = vao_entry.handle;
174 return vao_entry.handle; 190 return vao_entry.handle;
@@ -176,17 +192,20 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
176 192
177void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { 193void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
178 auto& gpu = system.GPU().Maxwell3D(); 194 auto& gpu = system.GPU().Maxwell3D();
179 const auto& regs = gpu.regs; 195 if (!gpu.dirty.vertex_array_buffers)
180
181 if (gpu.dirty_flags.vertex_array.none())
182 return; 196 return;
197 gpu.dirty.vertex_array_buffers = false;
198
199 const auto& regs = gpu.regs;
183 200
184 MICROPROFILE_SCOPE(OpenGL_VB); 201 MICROPROFILE_SCOPE(OpenGL_VB);
185 202
186 // Upload all guest vertex arrays sequentially to our buffer 203 // Upload all guest vertex arrays sequentially to our buffer
187 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { 204 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
188 if (!gpu.dirty_flags.vertex_array[index]) 205 if (!gpu.dirty.vertex_array[index])
189 continue; 206 continue;
207 gpu.dirty.vertex_array[index] = false;
208 gpu.dirty.vertex_instance[index] = false;
190 209
191 const auto& vertex_array = regs.vertex_array[index]; 210 const auto& vertex_array = regs.vertex_array[index];
192 if (!vertex_array.IsEnabled()) 211 if (!vertex_array.IsEnabled())
@@ -197,11 +216,11 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
197 216
198 ASSERT(end > start); 217 ASSERT(end > start);
199 const u64 size = end - start + 1; 218 const u64 size = end - start + 1;
200 const GLintptr vertex_buffer_offset = buffer_cache.UploadMemory(start, size); 219 const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size);
201 220
202 // Bind the vertex array to the buffer at the current offset. 221 // Bind the vertex array to the buffer at the current offset.
203 glVertexArrayVertexBuffer(vao, index, buffer_cache.GetHandle(), vertex_buffer_offset, 222 vertex_array_pushbuffer.SetVertexBuffer(index, vertex_buffer, vertex_buffer_offset,
204 vertex_array.stride); 223 vertex_array.stride);
205 224
206 if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) { 225 if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) {
207 // Enable vertex buffer instancing with the specified divisor. 226 // Enable vertex buffer instancing with the specified divisor.
@@ -211,11 +230,47 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
211 glVertexArrayBindingDivisor(vao, index, 0); 230 glVertexArrayBindingDivisor(vao, index, 0);
212 } 231 }
213 } 232 }
233}
234
235void RasterizerOpenGL::SetupVertexInstances(GLuint vao) {
236 auto& gpu = system.GPU().Maxwell3D();
237
238 if (!gpu.dirty.vertex_instances)
239 return;
240 gpu.dirty.vertex_instances = false;
241
242 const auto& regs = gpu.regs;
243 // Upload all guest vertex arrays sequentially to our buffer
244 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
245 if (!gpu.dirty.vertex_instance[index])
246 continue;
247
248 gpu.dirty.vertex_instance[index] = false;
249
250 if (regs.instanced_arrays.IsInstancingEnabled(index) &&
251 regs.vertex_array[index].divisor != 0) {
252 // Enable vertex buffer instancing with the specified divisor.
253 glVertexArrayBindingDivisor(vao, index, regs.vertex_array[index].divisor);
254 } else {
255 // Disable the vertex buffer instancing.
256 glVertexArrayBindingDivisor(vao, index, 0);
257 }
258 }
259}
214 260
215 gpu.dirty_flags.vertex_array.reset(); 261GLintptr RasterizerOpenGL::SetupIndexBuffer() {
262 if (accelerate_draw != AccelDraw::Indexed) {
263 return 0;
264 }
265 MICROPROFILE_SCOPE(OpenGL_Index);
266 const auto& regs = system.GPU().Maxwell3D().regs;
267 const std::size_t size = CalculateIndexBufferSize();
268 const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
269 vertex_array_pushbuffer.SetIndexBuffer(buffer);
270 return offset;
216} 271}
217 272
218DrawParameters RasterizerOpenGL::SetupDraw() { 273DrawParameters RasterizerOpenGL::SetupDraw(GLintptr index_buffer_offset) {
219 const auto& gpu = system.GPU().Maxwell3D(); 274 const auto& gpu = system.GPU().Maxwell3D();
220 const auto& regs = gpu.regs; 275 const auto& regs = gpu.regs;
221 const bool is_indexed = accelerate_draw == AccelDraw::Indexed; 276 const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
@@ -227,11 +282,9 @@ DrawParameters RasterizerOpenGL::SetupDraw() {
227 params.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology); 282 params.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology);
228 283
229 if (is_indexed) { 284 if (is_indexed) {
230 MICROPROFILE_SCOPE(OpenGL_Index);
231 params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format); 285 params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format);
232 params.count = regs.index_array.count; 286 params.count = regs.index_array.count;
233 params.index_buffer_offset = 287 params.index_buffer_offset = index_buffer_offset;
234 buffer_cache.UploadMemory(regs.index_array.IndexStart(), CalculateIndexBufferSize());
235 params.base_vertex = static_cast<GLint>(regs.vb_element_base); 288 params.base_vertex = static_cast<GLint>(regs.vb_element_base);
236 } else { 289 } else {
237 params.count = regs.vertex_buffer.count; 290 params.count = regs.vertex_buffer.count;
@@ -247,10 +300,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
247 BaseBindings base_bindings; 300 BaseBindings base_bindings;
248 std::array<bool, Maxwell::NumClipDistances> clip_distances{}; 301 std::array<bool, Maxwell::NumClipDistances> clip_distances{};
249 302
250 // Prepare packed bindings
251 bind_ubo_pushbuffer.Setup(base_bindings.cbuf);
252 bind_ssbo_pushbuffer.Setup(base_bindings.gmem);
253
254 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { 303 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
255 const auto& shader_config = gpu.regs.shader_config[index]; 304 const auto& shader_config = gpu.regs.shader_config[index];
256 const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; 305 const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)};
@@ -271,18 +320,17 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
271 320
272 GLShader::MaxwellUniformData ubo{}; 321 GLShader::MaxwellUniformData ubo{};
273 ubo.SetFromRegs(gpu, stage); 322 ubo.SetFromRegs(gpu, stage);
274 const GLintptr offset = 323 const auto [buffer, offset] =
275 buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); 324 buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
276 325
277 // Bind the emulation info buffer 326 // Bind the emulation info buffer
278 bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset, 327 bind_ubo_pushbuffer.Push(buffer, offset, static_cast<GLsizeiptr>(sizeof(ubo)));
279 static_cast<GLsizeiptr>(sizeof(ubo)));
280 328
281 Shader shader{shader_cache.GetStageProgram(program)}; 329 Shader shader{shader_cache.GetStageProgram(program)};
282 330
283 const auto stage_enum{static_cast<Maxwell::ShaderStage>(stage)}; 331 const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
284 SetupDrawConstBuffers(stage_enum, shader); 332 SetupDrawConstBuffers(stage_enum, shader);
285 SetupGlobalRegions(stage_enum, shader); 333 SetupDrawGlobalMemory(stage_enum, shader);
286 const auto texture_buffer_usage{SetupTextures(stage_enum, shader, base_bindings)}; 334 const auto texture_buffer_usage{SetupTextures(stage_enum, shader, base_bindings)};
287 335
288 const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage}; 336 const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage};
@@ -321,12 +369,9 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
321 base_bindings = next_bindings; 369 base_bindings = next_bindings;
322 } 370 }
323 371
324 bind_ubo_pushbuffer.Bind();
325 bind_ssbo_pushbuffer.Bind();
326
327 SyncClipEnabled(clip_distances); 372 SyncClipEnabled(clip_distances);
328 373
329 gpu.dirty_flags.shaders = false; 374 gpu.dirty.shaders = false;
330} 375}
331 376
332std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { 377std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
@@ -409,13 +454,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
409 454
410 const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents, 455 const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents,
411 single_color_target}; 456 single_color_target};
412 if (fb_config_state == current_framebuffer_config_state && 457 if (fb_config_state == current_framebuffer_config_state && !gpu.dirty.render_settings) {
413 gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) {
414 // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or 458 // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or
415 // single color targets). This is done because the guest registers may not change but the 459 // single color targets). This is done because the guest registers may not change but the
416 // host framebuffer may contain different attachments 460 // host framebuffer may contain different attachments
417 return current_depth_stencil_usage; 461 return current_depth_stencil_usage;
418 } 462 }
463 gpu.dirty.render_settings = false;
419 current_framebuffer_config_state = fb_config_state; 464 current_framebuffer_config_state = fb_config_state;
420 465
421 texture_cache.GuardRenderTargets(true); 466 texture_cache.GuardRenderTargets(true);
@@ -504,13 +549,71 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
504 return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable}; 549 return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable};
505} 550}
506 551
552void RasterizerOpenGL::ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb,
553 bool using_depth_fb, bool using_stencil_fb) {
554 auto& gpu = system.GPU().Maxwell3D();
555 const auto& regs = gpu.regs;
556
557 texture_cache.GuardRenderTargets(true);
558 View color_surface{};
559 if (using_color_fb) {
560 color_surface = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT, false);
561 }
562 View depth_surface{};
563 if (using_depth_fb || using_stencil_fb) {
564 depth_surface = texture_cache.GetDepthBufferSurface(false);
565 }
566 texture_cache.GuardRenderTargets(false);
567
568 current_state.draw.draw_framebuffer = clear_framebuffer.handle;
569 current_state.ApplyFramebufferState();
570
571 if (color_surface) {
572 color_surface->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER);
573 } else {
574 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
575 }
576
577 if (depth_surface) {
578 const auto& params = depth_surface->GetSurfaceParams();
579 switch (params.type) {
580 case VideoCore::Surface::SurfaceType::Depth: {
581 depth_surface->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
582 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
583 break;
584 }
585 case VideoCore::Surface::SurfaceType::DepthStencil: {
586 depth_surface->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
587 break;
588 }
589 default: { UNIMPLEMENTED(); }
590 }
591 } else {
592 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
593 0);
594 }
595}
596
507void RasterizerOpenGL::Clear() { 597void RasterizerOpenGL::Clear() {
508 const auto& regs = system.GPU().Maxwell3D().regs; 598 const auto& maxwell3d = system.GPU().Maxwell3D();
599
600 if (!maxwell3d.ShouldExecute()) {
601 return;
602 }
603
604 const auto& regs = maxwell3d.regs;
509 bool use_color{}; 605 bool use_color{};
510 bool use_depth{}; 606 bool use_depth{};
511 bool use_stencil{}; 607 bool use_stencil{};
512 608
513 OpenGLState clear_state; 609 OpenGLState prev_state{OpenGLState::GetCurState()};
610 SCOPE_EXIT({
611 prev_state.AllDirty();
612 prev_state.Apply();
613 });
614
615 OpenGLState clear_state{OpenGLState::GetCurState()};
616 clear_state.SetDefaultViewports();
514 if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || 617 if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
515 regs.clear_buffers.A) { 618 regs.clear_buffers.A) {
516 use_color = true; 619 use_color = true;
@@ -530,6 +633,7 @@ void RasterizerOpenGL::Clear() {
530 // true. 633 // true.
531 clear_state.depth.test_enabled = true; 634 clear_state.depth.test_enabled = true;
532 clear_state.depth.test_func = GL_ALWAYS; 635 clear_state.depth.test_func = GL_ALWAYS;
636 clear_state.depth.write_mask = GL_TRUE;
533 } 637 }
534 if (regs.clear_buffers.S) { 638 if (regs.clear_buffers.S) {
535 ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!"); 639 ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!");
@@ -566,8 +670,9 @@ void RasterizerOpenGL::Clear() {
566 return; 670 return;
567 } 671 }
568 672
569 const auto [clear_depth, clear_stencil] = ConfigureFramebuffers( 673 ConfigureClearFramebuffer(clear_state, use_color, use_depth, use_stencil);
570 clear_state, use_color, use_depth || use_stencil, false, regs.clear_buffers.RT.Value()); 674
675 SyncViewport(clear_state);
571 if (regs.clear_flags.scissor) { 676 if (regs.clear_flags.scissor) {
572 SyncScissorTest(clear_state); 677 SyncScissorTest(clear_state);
573 } 678 }
@@ -576,21 +681,18 @@ void RasterizerOpenGL::Clear() {
576 clear_state.EmulateViewportWithScissor(); 681 clear_state.EmulateViewportWithScissor();
577 } 682 }
578 683
579 clear_state.ApplyColorMask(); 684 clear_state.AllDirty();
580 clear_state.ApplyDepth(); 685 clear_state.Apply();
581 clear_state.ApplyStencilTest();
582 clear_state.ApplyViewport();
583 clear_state.ApplyFramebufferState();
584 686
585 if (use_color) { 687 if (use_color) {
586 glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); 688 glClearBufferfv(GL_COLOR, 0, regs.clear_color);
587 } 689 }
588 690
589 if (clear_depth && clear_stencil) { 691 if (use_depth && use_stencil) {
590 glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil); 692 glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil);
591 } else if (clear_depth) { 693 } else if (use_depth) {
592 glClearBufferfv(GL_DEPTH, 0, &regs.clear_depth); 694 glClearBufferfv(GL_DEPTH, 0, &regs.clear_depth);
593 } else if (clear_stencil) { 695 } else if (use_stencil) {
594 glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil); 696 glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil);
595 } 697 }
596} 698}
@@ -601,6 +703,11 @@ void RasterizerOpenGL::DrawArrays() {
601 703
602 MICROPROFILE_SCOPE(OpenGL_Drawing); 704 MICROPROFILE_SCOPE(OpenGL_Drawing);
603 auto& gpu = system.GPU().Maxwell3D(); 705 auto& gpu = system.GPU().Maxwell3D();
706
707 if (!gpu.ShouldExecute()) {
708 return;
709 }
710
604 const auto& regs = gpu.regs; 711 const auto& regs = gpu.regs;
605 712
606 SyncColorMask(); 713 SyncColorMask();
@@ -634,26 +741,47 @@ void RasterizerOpenGL::DrawArrays() {
634 Maxwell::MaxShaderStage; 741 Maxwell::MaxShaderStage;
635 742
636 // Add space for at least 18 constant buffers 743 // Add space for at least 18 constant buffers
637 buffer_size += 744 buffer_size += Maxwell::MaxConstBuffers *
638 Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment()); 745 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
639 746
640 const bool invalidate = buffer_cache.Map(buffer_size); 747 // Prepare the vertex array.
641 if (invalidate) { 748 buffer_cache.Map(buffer_size);
642 // As all cached buffers are invalidated, we need to recheck their state.
643 gpu.dirty_flags.vertex_array.set();
644 }
645 749
750 // Prepare vertex array format.
646 const GLuint vao = SetupVertexFormat(); 751 const GLuint vao = SetupVertexFormat();
752 vertex_array_pushbuffer.Setup(vao);
753
754 // Upload vertex and index data.
647 SetupVertexBuffer(vao); 755 SetupVertexBuffer(vao);
756 SetupVertexInstances(vao);
757 const GLintptr index_buffer_offset = SetupIndexBuffer();
758
759 // Setup draw parameters. It will automatically choose what glDraw* method to use.
760 const DrawParameters params = SetupDraw(index_buffer_offset);
761
762 // Prepare packed bindings.
763 bind_ubo_pushbuffer.Setup(0);
764 bind_ssbo_pushbuffer.Setup(0);
648 765
649 DrawParameters params = SetupDraw(); 766 // Setup shaders and their used resources.
650 texture_cache.GuardSamplers(true); 767 texture_cache.GuardSamplers(true);
651 SetupShaders(params.primitive_mode); 768 SetupShaders(params.primitive_mode);
652 texture_cache.GuardSamplers(false); 769 texture_cache.GuardSamplers(false);
653 770
654 ConfigureFramebuffers(state); 771 ConfigureFramebuffers(state);
655 772
656 buffer_cache.Unmap(); 773 // Signal the buffer cache that we are not going to upload more things.
774 const bool invalidate = buffer_cache.Unmap();
775
776 // Now that we are no longer uploading data, we can safely bind the buffers to OpenGL.
777 vertex_array_pushbuffer.Bind();
778 bind_ubo_pushbuffer.Bind();
779 bind_ssbo_pushbuffer.Bind();
780
781 if (invalidate) {
782 // As all cached buffers are invalidated, we need to recheck their state.
783 gpu.dirty.ResetVertexArrays();
784 }
657 785
658 shader_program_manager->ApplyTo(state); 786 shader_program_manager->ApplyTo(state);
659 state.Apply(); 787 state.Apply();
@@ -665,6 +793,46 @@ void RasterizerOpenGL::DrawArrays() {
665 params.DispatchDraw(); 793 params.DispatchDraw();
666 794
667 accelerate_draw = AccelDraw::Disabled; 795 accelerate_draw = AccelDraw::Disabled;
796 gpu.dirty.memory_general = false;
797}
798
799void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
800 if (!GLAD_GL_ARB_compute_variable_group_size) {
801 LOG_ERROR(Render_OpenGL, "Compute is currently not supported on this device due to the "
802 "lack of GL_ARB_compute_variable_group_size");
803 return;
804 }
805
806 auto kernel = shader_cache.GetComputeKernel(code_addr);
807 const auto [program, next_bindings] = kernel->GetProgramHandle({});
808 state.draw.shader_program = program;
809 state.draw.program_pipeline = 0;
810
811 const std::size_t buffer_size =
812 Tegra::Engines::KeplerCompute::NumConstBuffers *
813 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
814 buffer_cache.Map(buffer_size);
815
816 bind_ubo_pushbuffer.Setup(0);
817 bind_ssbo_pushbuffer.Setup(0);
818
819 SetupComputeConstBuffers(kernel);
820 SetupComputeGlobalMemory(kernel);
821
822 // TODO(Rodrigo): Bind images and samplers
823
824 buffer_cache.Unmap();
825
826 bind_ubo_pushbuffer.Bind();
827 bind_ssbo_pushbuffer.Bind();
828
829 state.ApplyShaderProgram();
830 state.ApplyProgramPipeline();
831
832 const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
833 glDispatchComputeGroupSizeARB(launch_desc.grid_dim_x, launch_desc.grid_dim_y,
834 launch_desc.grid_dim_z, launch_desc.block_dim_x,
835 launch_desc.block_dim_y, launch_desc.block_dim_z);
668} 836}
669 837
670void RasterizerOpenGL::FlushAll() {} 838void RasterizerOpenGL::FlushAll() {}
@@ -675,7 +843,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
675 return; 843 return;
676 } 844 }
677 texture_cache.FlushRegion(addr, size); 845 texture_cache.FlushRegion(addr, size);
678 global_cache.FlushRegion(addr, size); 846 buffer_cache.FlushRegion(addr, size);
679} 847}
680 848
681void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { 849void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
@@ -685,7 +853,6 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
685 } 853 }
686 texture_cache.InvalidateRegion(addr, size); 854 texture_cache.InvalidateRegion(addr, size);
687 shader_cache.InvalidateRegion(addr, size); 855 shader_cache.InvalidateRegion(addr, size);
688 global_cache.InvalidateRegion(addr, size);
689 buffer_cache.InvalidateRegion(addr, size); 856 buffer_cache.InvalidateRegion(addr, size);
690} 857}
691 858
@@ -696,6 +863,10 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
696 InvalidateRegion(addr, size); 863 InvalidateRegion(addr, size);
697} 864}
698 865
866void RasterizerOpenGL::TickFrame() {
867 buffer_cache.TickFrame();
868}
869
699bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 870bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
700 const Tegra::Engines::Fermi2D::Regs::Surface& dst, 871 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
701 const Tegra::Engines::Fermi2D::Config& copy_config) { 872 const Tegra::Engines::Fermi2D::Config& copy_config) {
@@ -737,14 +908,25 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
737void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, 908void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
738 const Shader& shader) { 909 const Shader& shader) {
739 MICROPROFILE_SCOPE(OpenGL_UBO); 910 MICROPROFILE_SCOPE(OpenGL_UBO);
740 const auto stage_index = static_cast<std::size_t>(stage); 911 const auto& stages = system.GPU().Maxwell3D().state.shader_stages;
741 const auto& shader_stage = system.GPU().Maxwell3D().state.shader_stages[stage_index]; 912 const auto& shader_stage = stages[static_cast<std::size_t>(stage)];
742 const auto& entries = shader->GetShaderEntries().const_buffers; 913 for (const auto& entry : shader->GetShaderEntries().const_buffers) {
914 const auto& buffer = shader_stage.const_buffers[entry.GetIndex()];
915 SetupConstBuffer(buffer, entry);
916 }
917}
743 918
744 // Upload only the enabled buffers from the 16 constbuffers of each shader stage 919void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
745 for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { 920 MICROPROFILE_SCOPE(OpenGL_UBO);
746 const auto& entry = entries[bindpoint]; 921 const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
747 SetupConstBuffer(shader_stage.const_buffers[entry.GetIndex()], entry); 922 for (const auto& entry : kernel->GetShaderEntries().const_buffers) {
923 const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
924 const std::bitset<8> mask = launch_desc.memory_config.const_buffer_enable_mask.Value();
925 Tegra::Engines::ConstBufferInfo buffer;
926 buffer.address = config.Address();
927 buffer.size = config.size;
928 buffer.enabled = mask[entry.GetIndex()];
929 SetupConstBuffer(buffer, entry);
748 } 930 }
749} 931}
750 932
@@ -752,49 +934,52 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b
752 const GLShader::ConstBufferEntry& entry) { 934 const GLShader::ConstBufferEntry& entry) {
753 if (!buffer.enabled) { 935 if (!buffer.enabled) {
754 // Set values to zero to unbind buffers 936 // Set values to zero to unbind buffers
755 bind_ubo_pushbuffer.Push(0, 0, 0); 937 bind_ubo_pushbuffer.Push(buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float));
756 return; 938 return;
757 } 939 }
758 940
759 std::size_t size;
760 if (entry.IsIndirect()) {
761 // Buffer is accessed indirectly, so upload the entire thing
762 size = buffer.size;
763
764 if (size > MaxConstbufferSize) {
765 LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size,
766 MaxConstbufferSize);
767 size = MaxConstbufferSize;
768 }
769 } else {
770 // Buffer is accessed directly, upload just what we use
771 size = entry.GetSize();
772 }
773
774 // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140 941 // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140
775 // UBO alignment requirements. 942 // UBO alignment requirements.
776 size = Common::AlignUp(size, sizeof(GLvec4)); 943 const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
777 ASSERT_MSG(size <= MaxConstbufferSize, "Constant buffer is too big");
778 944
779 const std::size_t alignment = device.GetUniformBufferAlignment(); 945 const auto alignment = device.GetUniformBufferAlignment();
780 const GLintptr offset = buffer_cache.UploadMemory(buffer.address, size, alignment); 946 const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment);
781 bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset, size); 947 bind_ubo_pushbuffer.Push(cbuf, offset, size);
782} 948}
783 949
784void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, 950void RasterizerOpenGL::SetupDrawGlobalMemory(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
785 const Shader& shader) { 951 const Shader& shader) {
786 const auto& entries = shader->GetShaderEntries().global_memory_entries; 952 auto& gpu{system.GPU()};
787 for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { 953 auto& memory_manager{gpu.MemoryManager()};
788 const auto& entry{entries[bindpoint]}; 954 const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]};
789 const auto& region{global_cache.GetGlobalRegion(entry, stage)}; 955 for (const auto& entry : shader->GetShaderEntries().global_memory_entries) {
790 if (entry.IsWritten()) { 956 const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()};
791 region->MarkAsModified(true, global_cache); 957 const auto gpu_addr{memory_manager.Read<u64>(addr)};
792 } 958 const auto size{memory_manager.Read<u32>(addr + 8)};
793 bind_ssbo_pushbuffer.Push(region->GetBufferHandle(), 0, 959 SetupGlobalMemory(entry, gpu_addr, size);
794 static_cast<GLsizeiptr>(region->GetSizeInBytes())); 960 }
961}
962
963void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) {
964 auto& gpu{system.GPU()};
965 auto& memory_manager{gpu.MemoryManager()};
966 const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
967 for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) {
968 const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()};
969 const auto gpu_addr{memory_manager.Read<u64>(addr)};
970 const auto size{memory_manager.Read<u32>(addr + 8)};
971 SetupGlobalMemory(entry, gpu_addr, size);
795 } 972 }
796} 973}
797 974
975void RasterizerOpenGL::SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry,
976 GPUVAddr gpu_addr, std::size_t size) {
977 const auto alignment{device.GetShaderStorageBufferAlignment()};
978 const auto [ssbo, buffer_offset] =
979 buffer_cache.UploadMemory(gpu_addr, size, alignment, true, entry.IsWritten());
980 bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size));
981}
982
798TextureBufferUsage RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader, 983TextureBufferUsage RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader,
799 BaseBindings base_bindings) { 984 BaseBindings base_bindings) {
800 MICROPROFILE_SCOPE(OpenGL_Texture); 985 MICROPROFILE_SCOPE(OpenGL_Texture);
@@ -883,10 +1068,11 @@ void RasterizerOpenGL::SyncClipCoef() {
883} 1068}
884 1069
885void RasterizerOpenGL::SyncCullMode() { 1070void RasterizerOpenGL::SyncCullMode() {
886 const auto& regs = system.GPU().Maxwell3D().regs; 1071 auto& maxwell3d = system.GPU().Maxwell3D();
887 1072
888 state.cull.enabled = regs.cull.enabled != 0; 1073 const auto& regs = maxwell3d.regs;
889 1074
1075 state.cull.enabled = regs.cull.enabled != 0;
890 if (state.cull.enabled) { 1076 if (state.cull.enabled) {
891 state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face); 1077 state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);
892 state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face); 1078 state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face);
@@ -919,16 +1105,21 @@ void RasterizerOpenGL::SyncDepthTestState() {
919 state.depth.test_enabled = regs.depth_test_enable != 0; 1105 state.depth.test_enabled = regs.depth_test_enable != 0;
920 state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE; 1106 state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE;
921 1107
922 if (!state.depth.test_enabled) 1108 if (!state.depth.test_enabled) {
923 return; 1109 return;
1110 }
924 1111
925 state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func); 1112 state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func);
926} 1113}
927 1114
928void RasterizerOpenGL::SyncStencilTestState() { 1115void RasterizerOpenGL::SyncStencilTestState() {
929 const auto& regs = system.GPU().Maxwell3D().regs; 1116 auto& maxwell3d = system.GPU().Maxwell3D();
930 state.stencil.test_enabled = regs.stencil_enable != 0; 1117 if (!maxwell3d.dirty.stencil_test) {
1118 return;
1119 }
1120 const auto& regs = maxwell3d.regs;
931 1121
1122 state.stencil.test_enabled = regs.stencil_enable != 0;
932 if (!regs.stencil_enable) { 1123 if (!regs.stencil_enable) {
933 return; 1124 return;
934 } 1125 }
@@ -957,10 +1148,17 @@ void RasterizerOpenGL::SyncStencilTestState() {
957 state.stencil.back.action_depth_fail = GL_KEEP; 1148 state.stencil.back.action_depth_fail = GL_KEEP;
958 state.stencil.back.action_depth_pass = GL_KEEP; 1149 state.stencil.back.action_depth_pass = GL_KEEP;
959 } 1150 }
1151 state.MarkDirtyStencilState();
1152 maxwell3d.dirty.stencil_test = false;
960} 1153}
961 1154
962void RasterizerOpenGL::SyncColorMask() { 1155void RasterizerOpenGL::SyncColorMask() {
963 const auto& regs = system.GPU().Maxwell3D().regs; 1156 auto& maxwell3d = system.GPU().Maxwell3D();
1157 if (!maxwell3d.dirty.color_mask) {
1158 return;
1159 }
1160 const auto& regs = maxwell3d.regs;
1161
964 const std::size_t count = 1162 const std::size_t count =
965 regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1; 1163 regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1;
966 for (std::size_t i = 0; i < count; i++) { 1164 for (std::size_t i = 0; i < count; i++) {
@@ -971,6 +1169,9 @@ void RasterizerOpenGL::SyncColorMask() {
971 dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE; 1169 dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE;
972 dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE; 1170 dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE;
973 } 1171 }
1172
1173 state.MarkDirtyColorMask();
1174 maxwell3d.dirty.color_mask = false;
974} 1175}
975 1176
976void RasterizerOpenGL::SyncMultiSampleState() { 1177void RasterizerOpenGL::SyncMultiSampleState() {
@@ -985,7 +1186,11 @@ void RasterizerOpenGL::SyncFragmentColorClampState() {
985} 1186}
986 1187
987void RasterizerOpenGL::SyncBlendState() { 1188void RasterizerOpenGL::SyncBlendState() {
988 const auto& regs = system.GPU().Maxwell3D().regs; 1189 auto& maxwell3d = system.GPU().Maxwell3D();
1190 if (!maxwell3d.dirty.blend_state) {
1191 return;
1192 }
1193 const auto& regs = maxwell3d.regs;
989 1194
990 state.blend_color.red = regs.blend_color.r; 1195 state.blend_color.red = regs.blend_color.r;
991 state.blend_color.green = regs.blend_color.g; 1196 state.blend_color.green = regs.blend_color.g;
@@ -1008,6 +1213,8 @@ void RasterizerOpenGL::SyncBlendState() {
1008 for (std::size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { 1213 for (std::size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
1009 state.blend[i].enabled = false; 1214 state.blend[i].enabled = false;
1010 } 1215 }
1216 maxwell3d.dirty.blend_state = false;
1217 state.MarkDirtyBlendState();
1011 return; 1218 return;
1012 } 1219 }
1013 1220
@@ -1024,6 +1231,9 @@ void RasterizerOpenGL::SyncBlendState() {
1024 blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a); 1231 blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a);
1025 blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a); 1232 blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a);
1026 } 1233 }
1234
1235 state.MarkDirtyBlendState();
1236 maxwell3d.dirty.blend_state = false;
1027} 1237}
1028 1238
1029void RasterizerOpenGL::SyncLogicOpState() { 1239void RasterizerOpenGL::SyncLogicOpState() {
@@ -1075,13 +1285,21 @@ void RasterizerOpenGL::SyncPointState() {
1075} 1285}
1076 1286
1077void RasterizerOpenGL::SyncPolygonOffset() { 1287void RasterizerOpenGL::SyncPolygonOffset() {
1078 const auto& regs = system.GPU().Maxwell3D().regs; 1288 auto& maxwell3d = system.GPU().Maxwell3D();
1289 if (!maxwell3d.dirty.polygon_offset) {
1290 return;
1291 }
1292 const auto& regs = maxwell3d.regs;
1293
1079 state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0; 1294 state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0;
1080 state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0; 1295 state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0;
1081 state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0; 1296 state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0;
1082 state.polygon_offset.units = regs.polygon_offset_units; 1297 state.polygon_offset.units = regs.polygon_offset_units;
1083 state.polygon_offset.factor = regs.polygon_offset_factor; 1298 state.polygon_offset.factor = regs.polygon_offset_factor;
1084 state.polygon_offset.clamp = regs.polygon_offset_clamp; 1299 state.polygon_offset.clamp = regs.polygon_offset_clamp;
1300
1301 state.MarkDirtyPolygonOffset();
1302 maxwell3d.dirty.polygon_offset = false;
1085} 1303}
1086 1304
1087void RasterizerOpenGL::SyncAlphaTest() { 1305void RasterizerOpenGL::SyncAlphaTest() {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index bf67e3a70..8b123c48d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -24,7 +24,6 @@
24#include "video_core/renderer_opengl/gl_buffer_cache.h" 24#include "video_core/renderer_opengl/gl_buffer_cache.h"
25#include "video_core/renderer_opengl/gl_device.h" 25#include "video_core/renderer_opengl/gl_device.h"
26#include "video_core/renderer_opengl/gl_framebuffer_cache.h" 26#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
27#include "video_core/renderer_opengl/gl_global_cache.h"
28#include "video_core/renderer_opengl/gl_resource_manager.h" 27#include "video_core/renderer_opengl/gl_resource_manager.h"
29#include "video_core/renderer_opengl/gl_sampler_cache.h" 28#include "video_core/renderer_opengl/gl_sampler_cache.h"
30#include "video_core/renderer_opengl/gl_shader_cache.h" 29#include "video_core/renderer_opengl/gl_shader_cache.h"
@@ -59,10 +58,12 @@ public:
59 58
60 void DrawArrays() override; 59 void DrawArrays() override;
61 void Clear() override; 60 void Clear() override;
61 void DispatchCompute(GPUVAddr code_addr) override;
62 void FlushAll() override; 62 void FlushAll() override;
63 void FlushRegion(CacheAddr addr, u64 size) override; 63 void FlushRegion(CacheAddr addr, u64 size) override;
64 void InvalidateRegion(CacheAddr addr, u64 size) override; 64 void InvalidateRegion(CacheAddr addr, u64 size) override;
65 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; 65 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
66 void TickFrame() override;
66 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 67 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
67 const Tegra::Engines::Fermi2D::Regs::Surface& dst, 68 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
68 const Tegra::Engines::Fermi2D::Config& copy_config) override; 69 const Tegra::Engines::Fermi2D::Config& copy_config) override;
@@ -73,11 +74,6 @@ public:
73 void LoadDiskResources(const std::atomic_bool& stop_loading, 74 void LoadDiskResources(const std::atomic_bool& stop_loading,
74 const VideoCore::DiskResourceLoadCallback& callback) override; 75 const VideoCore::DiskResourceLoadCallback& callback) override;
75 76
76 /// Maximum supported size that a constbuffer can have in bytes.
77 static constexpr std::size_t MaxConstbufferSize = 0x10000;
78 static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0,
79 "The maximum size of a constbuffer must be a multiple of the size of GLvec4");
80
81private: 77private:
82 struct FramebufferConfigState { 78 struct FramebufferConfigState {
83 bool using_color_fb{}; 79 bool using_color_fb{};
@@ -98,30 +94,45 @@ private:
98 94
99 /** 95 /**
100 * Configures the color and depth framebuffer states. 96 * Configures the color and depth framebuffer states.
101 * @param must_reconfigure If true, tells the framebuffer to skip the cache and reconfigure 97 *
102 * again. Used by the texture cache to solve texception conflicts 98 * @param current_state The current OpenGL state.
103 * @param use_color_fb If true, configure color framebuffers. 99 * @param using_color_fb If true, configure color framebuffers.
104 * @param using_depth_fb If true, configure the depth/stencil framebuffer. 100 * @param using_depth_fb If true, configure the depth/stencil framebuffer.
105 * @param preserve_contents If true, tries to preserve data from a previously used framebuffer. 101 * @param preserve_contents If true, tries to preserve data from a previously used
102 * framebuffer.
106 * @param single_color_target Specifies if a single color buffer target should be used. 103 * @param single_color_target Specifies if a single color buffer target should be used.
104 *
107 * @returns If depth (first) or stencil (second) are being stored in the bound zeta texture 105 * @returns If depth (first) or stencil (second) are being stored in the bound zeta texture
108 * (requires using_depth_fb to be true) 106 * (requires using_depth_fb to be true)
109 */ 107 */
110 std::pair<bool, bool> ConfigureFramebuffers( 108 std::pair<bool, bool> ConfigureFramebuffers(
111 OpenGLState& current_state, bool use_color_fb = true, bool using_depth_fb = true, 109 OpenGLState& current_state, bool using_color_fb = true, bool using_depth_fb = true,
112 bool preserve_contents = true, std::optional<std::size_t> single_color_target = {}); 110 bool preserve_contents = true, std::optional<std::size_t> single_color_target = {});
113 111
112 void ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb,
113 bool using_depth_fb, bool using_stencil_fb);
114
114 /// Configures the current constbuffers to use for the draw command. 115 /// Configures the current constbuffers to use for the draw command.
115 void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, 116 void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
116 const Shader& shader); 117 const Shader& shader);
117 118
119 /// Configures the current constbuffers to use for the kernel invocation.
120 void SetupComputeConstBuffers(const Shader& kernel);
121
118 /// Configures a constant buffer. 122 /// Configures a constant buffer.
119 void SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer, 123 void SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer,
120 const GLShader::ConstBufferEntry& entry); 124 const GLShader::ConstBufferEntry& entry);
121 125
122 /// Configures the current global memory entries to use for the draw command. 126 /// Configures the current global memory entries to use for the draw command.
123 void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, 127 void SetupDrawGlobalMemory(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
124 const Shader& shader); 128 const Shader& shader);
129
130 /// Configures the current global memory entries to use for the kernel invocation.
131 void SetupComputeGlobalMemory(const Shader& kernel);
132
133 /// Configures a constant buffer.
134 void SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
135 std::size_t size);
125 136
126 /// Configures the current textures to use for the draw command. Returns shaders texture buffer 137 /// Configures the current textures to use for the draw command. Returns shaders texture buffer
127 /// usage. 138 /// usage.
@@ -189,7 +200,6 @@ private:
189 200
190 TextureCacheOpenGL texture_cache; 201 TextureCacheOpenGL texture_cache;
191 ShaderCacheOpenGL shader_cache; 202 ShaderCacheOpenGL shader_cache;
192 GlobalRegionCacheOpenGL global_cache;
193 SamplerCacheOpenGL sampler_cache; 203 SamplerCacheOpenGL sampler_cache;
194 FramebufferCacheOpenGL framebuffer_cache; 204 FramebufferCacheOpenGL framebuffer_cache;
195 205
@@ -208,6 +218,7 @@ private:
208 static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; 218 static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
209 OGLBufferCache buffer_cache; 219 OGLBufferCache buffer_cache;
210 220
221 VertexArrayPushBuffer vertex_array_pushbuffer;
211 BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; 222 BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
212 BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; 223 BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};
213 224
@@ -219,14 +230,19 @@ private:
219 GLuint SetupVertexFormat(); 230 GLuint SetupVertexFormat();
220 231
221 void SetupVertexBuffer(GLuint vao); 232 void SetupVertexBuffer(GLuint vao);
233 void SetupVertexInstances(GLuint vao);
222 234
223 DrawParameters SetupDraw(); 235 GLintptr SetupIndexBuffer();
236
237 DrawParameters SetupDraw(GLintptr index_buffer_offset);
224 238
225 void SetupShaders(GLenum primitive_mode); 239 void SetupShaders(GLenum primitive_mode);
226 240
227 enum class AccelDraw { Disabled, Arrays, Indexed }; 241 enum class AccelDraw { Disabled, Arrays, Indexed };
228 AccelDraw accelerate_draw = AccelDraw::Disabled; 242 AccelDraw accelerate_draw = AccelDraw::Disabled;
229 243
244 OGLFramebuffer clear_framebuffer;
245
230 using CachedPageMap = boost::icl::interval_map<u64, int>; 246 using CachedPageMap = boost::icl::interval_map<u64, int>;
231 CachedPageMap cached_pages; 247 CachedPageMap cached_pages;
232}; 248};
diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.h b/src/video_core/renderer_opengl/gl_sampler_cache.h
index defbc2d81..34ee37f00 100644
--- a/src/video_core/renderer_opengl/gl_sampler_cache.h
+++ b/src/video_core/renderer_opengl/gl_sampler_cache.h
@@ -17,9 +17,9 @@ public:
17 ~SamplerCacheOpenGL(); 17 ~SamplerCacheOpenGL();
18 18
19protected: 19protected:
20 OGLSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const; 20 OGLSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override;
21 21
22 GLuint ToSamplerType(const OGLSampler& sampler) const; 22 GLuint ToSamplerType(const OGLSampler& sampler) const override;
23}; 23};
24 24
25} // namespace OpenGL 25} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index f9b2b03a0..1c90facc3 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -23,13 +23,13 @@ namespace OpenGL {
23 23
24using VideoCommon::Shader::ProgramCode; 24using VideoCommon::Shader::ProgramCode;
25 25
26// One UBO is always reserved for emulation values 26// One UBO is always reserved for emulation values on staged shaders
27constexpr u32 RESERVED_UBOS = 1; 27constexpr u32 STAGE_RESERVED_UBOS = 1;
28 28
29struct UnspecializedShader { 29struct UnspecializedShader {
30 std::string code; 30 std::string code;
31 GLShader::ShaderEntries entries; 31 GLShader::ShaderEntries entries;
32 Maxwell::ShaderProgram program_type; 32 ProgramType program_type;
33}; 33};
34 34
35namespace { 35namespace {
@@ -55,15 +55,17 @@ ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr g
55} 55}
56 56
57/// Gets the shader type from a Maxwell program type 57/// Gets the shader type from a Maxwell program type
58constexpr GLenum GetShaderType(Maxwell::ShaderProgram program_type) { 58constexpr GLenum GetShaderType(ProgramType program_type) {
59 switch (program_type) { 59 switch (program_type) {
60 case Maxwell::ShaderProgram::VertexA: 60 case ProgramType::VertexA:
61 case Maxwell::ShaderProgram::VertexB: 61 case ProgramType::VertexB:
62 return GL_VERTEX_SHADER; 62 return GL_VERTEX_SHADER;
63 case Maxwell::ShaderProgram::Geometry: 63 case ProgramType::Geometry:
64 return GL_GEOMETRY_SHADER; 64 return GL_GEOMETRY_SHADER;
65 case Maxwell::ShaderProgram::Fragment: 65 case ProgramType::Fragment:
66 return GL_FRAGMENT_SHADER; 66 return GL_FRAGMENT_SHADER;
67 case ProgramType::Compute:
68 return GL_COMPUTE_SHADER;
67 default: 69 default:
68 return GL_NONE; 70 return GL_NONE;
69 } 71 }
@@ -100,6 +102,25 @@ constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLen
100 } 102 }
101} 103}
102 104
105ProgramType GetProgramType(Maxwell::ShaderProgram program) {
106 switch (program) {
107 case Maxwell::ShaderProgram::VertexA:
108 return ProgramType::VertexA;
109 case Maxwell::ShaderProgram::VertexB:
110 return ProgramType::VertexB;
111 case Maxwell::ShaderProgram::TesselationControl:
112 return ProgramType::TessellationControl;
113 case Maxwell::ShaderProgram::TesselationEval:
114 return ProgramType::TessellationEval;
115 case Maxwell::ShaderProgram::Geometry:
116 return ProgramType::Geometry;
117 case Maxwell::ShaderProgram::Fragment:
118 return ProgramType::Fragment;
119 }
120 UNREACHABLE();
121 return {};
122}
123
103/// Calculates the size of a program stream 124/// Calculates the size of a program stream
104std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { 125std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
105 constexpr std::size_t start_offset = 10; 126 constexpr std::size_t start_offset = 10;
@@ -128,11 +149,13 @@ std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
128} 149}
129 150
130/// Hashes one (or two) program streams 151/// Hashes one (or two) program streams
131u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& code, 152u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code,
132 const ProgramCode& code_b) { 153 const ProgramCode& code_b, std::size_t size_a = 0, std::size_t size_b = 0) {
133 u64 unique_identifier = 154 if (size_a == 0) {
134 Common::CityHash64(reinterpret_cast<const char*>(code.data()), CalculateProgramSize(code)); 155 size_a = CalculateProgramSize(code);
135 if (program_type != Maxwell::ShaderProgram::VertexA) { 156 }
157 u64 unique_identifier = Common::CityHash64(reinterpret_cast<const char*>(code.data()), size_a);
158 if (program_type != ProgramType::VertexA) {
136 return unique_identifier; 159 return unique_identifier;
137 } 160 }
138 // VertexA programs include two programs 161 // VertexA programs include two programs
@@ -140,50 +163,67 @@ u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode&
140 std::size_t seed = 0; 163 std::size_t seed = 0;
141 boost::hash_combine(seed, unique_identifier); 164 boost::hash_combine(seed, unique_identifier);
142 165
143 const u64 identifier_b = Common::CityHash64(reinterpret_cast<const char*>(code_b.data()), 166 if (size_b == 0) {
144 CalculateProgramSize(code_b)); 167 size_b = CalculateProgramSize(code_b);
168 }
169 const u64 identifier_b =
170 Common::CityHash64(reinterpret_cast<const char*>(code_b.data()), size_b);
145 boost::hash_combine(seed, identifier_b); 171 boost::hash_combine(seed, identifier_b);
146 return static_cast<u64>(seed); 172 return static_cast<u64>(seed);
147} 173}
148 174
149/// Creates an unspecialized program from code streams 175/// Creates an unspecialized program from code streams
150GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgram program_type, 176GLShader::ProgramResult CreateProgram(const Device& device, ProgramType program_type,
151 ProgramCode program_code, ProgramCode program_code_b) { 177 ProgramCode program_code, ProgramCode program_code_b) {
152 GLShader::ShaderSetup setup(program_code); 178 GLShader::ShaderSetup setup(program_code);
153 if (program_type == Maxwell::ShaderProgram::VertexA) { 179 setup.program.size_a = CalculateProgramSize(program_code);
180 setup.program.size_b = 0;
181 if (program_type == ProgramType::VertexA) {
154 // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. 182 // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders.
155 // Conventional HW does not support this, so we combine VertexA and VertexB into one 183 // Conventional HW does not support this, so we combine VertexA and VertexB into one
156 // stage here. 184 // stage here.
157 setup.SetProgramB(program_code_b); 185 setup.SetProgramB(program_code_b);
186 setup.program.size_b = CalculateProgramSize(program_code_b);
158 } 187 }
159 setup.program.unique_identifier = 188 setup.program.unique_identifier = GetUniqueIdentifier(
160 GetUniqueIdentifier(program_type, program_code, program_code_b); 189 program_type, program_code, program_code_b, setup.program.size_a, setup.program.size_b);
161 190
162 switch (program_type) { 191 switch (program_type) {
163 case Maxwell::ShaderProgram::VertexA: 192 case ProgramType::VertexA:
164 case Maxwell::ShaderProgram::VertexB: 193 case ProgramType::VertexB:
165 return GLShader::GenerateVertexShader(device, setup); 194 return GLShader::GenerateVertexShader(device, setup);
166 case Maxwell::ShaderProgram::Geometry: 195 case ProgramType::Geometry:
167 return GLShader::GenerateGeometryShader(device, setup); 196 return GLShader::GenerateGeometryShader(device, setup);
168 case Maxwell::ShaderProgram::Fragment: 197 case ProgramType::Fragment:
169 return GLShader::GenerateFragmentShader(device, setup); 198 return GLShader::GenerateFragmentShader(device, setup);
199 case ProgramType::Compute:
200 return GLShader::GenerateComputeShader(device, setup);
170 default: 201 default:
171 LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type)); 202 UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type));
172 UNREACHABLE();
173 return {}; 203 return {};
174 } 204 }
175} 205}
176 206
177CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries, 207CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries,
178 Maxwell::ShaderProgram program_type, const ProgramVariant& variant, 208 ProgramType program_type, const ProgramVariant& variant,
179 bool hint_retrievable = false) { 209 bool hint_retrievable = false) {
180 auto base_bindings{variant.base_bindings}; 210 auto base_bindings{variant.base_bindings};
181 const auto primitive_mode{variant.primitive_mode}; 211 const auto primitive_mode{variant.primitive_mode};
182 const auto texture_buffer_usage{variant.texture_buffer_usage}; 212 const auto texture_buffer_usage{variant.texture_buffer_usage};
183 213
184 std::string source = "#version 430 core\n" 214 std::string source = "#version 430 core\n"
185 "#extension GL_ARB_separate_shader_objects : enable\n\n"; 215 "#extension GL_ARB_separate_shader_objects : enable\n";
186 source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); 216 if (entries.shader_viewport_layer_array) {
217 source += "#extension GL_ARB_shader_viewport_layer_array : enable\n";
218 }
219 if (program_type == ProgramType::Compute) {
220 source += "#extension GL_ARB_compute_variable_group_size : require\n";
221 }
222 source += '\n';
223
224 if (program_type != ProgramType::Compute) {
225 source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
226 }
187 227
188 for (const auto& cbuf : entries.const_buffers) { 228 for (const auto& cbuf : entries.const_buffers) {
189 source += 229 source +=
@@ -210,13 +250,16 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
210 source += fmt::format("#define SAMPLER_{}_IS_BUFFER", i); 250 source += fmt::format("#define SAMPLER_{}_IS_BUFFER", i);
211 } 251 }
212 252
213 if (program_type == Maxwell::ShaderProgram::Geometry) { 253 if (program_type == ProgramType::Geometry) {
214 const auto [glsl_topology, debug_name, max_vertices] = 254 const auto [glsl_topology, debug_name, max_vertices] =
215 GetPrimitiveDescription(primitive_mode); 255 GetPrimitiveDescription(primitive_mode);
216 256
217 source += "layout (" + std::string(glsl_topology) + ") in;\n"; 257 source += "layout (" + std::string(glsl_topology) + ") in;\n";
218 source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n'; 258 source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n';
219 } 259 }
260 if (program_type == ProgramType::Compute) {
261 source += "layout (local_size_variable) in;\n";
262 }
220 263
221 source += code; 264 source += code;
222 265
@@ -244,7 +287,7 @@ std::set<GLenum> GetSupportedFormats() {
244 287
245} // Anonymous namespace 288} // Anonymous namespace
246 289
247CachedShader::CachedShader(const ShaderParameters& params, Maxwell::ShaderProgram program_type, 290CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type,
248 GLShader::ProgramResult result) 291 GLShader::ProgramResult result)
249 : RasterizerCacheObject{params.host_ptr}, host_ptr{params.host_ptr}, cpu_addr{params.cpu_addr}, 292 : RasterizerCacheObject{params.host_ptr}, host_ptr{params.host_ptr}, cpu_addr{params.cpu_addr},
250 unique_identifier{params.unique_identifier}, program_type{program_type}, 293 unique_identifier{params.unique_identifier}, program_type{program_type},
@@ -257,29 +300,50 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
257 ProgramCode&& program_code_b) { 300 ProgramCode&& program_code_b) {
258 const auto code_size{CalculateProgramSize(program_code)}; 301 const auto code_size{CalculateProgramSize(program_code)};
259 const auto code_size_b{CalculateProgramSize(program_code_b)}; 302 const auto code_size_b{CalculateProgramSize(program_code_b)};
260 auto result{CreateProgram(params.device, program_type, program_code, program_code_b)}; 303 auto result{
304 CreateProgram(params.device, GetProgramType(program_type), program_code, program_code_b)};
261 if (result.first.empty()) { 305 if (result.first.empty()) {
262 // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now 306 // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now
263 return {}; 307 return {};
264 } 308 }
265 309
266 params.disk_cache.SaveRaw(ShaderDiskCacheRaw( 310 params.disk_cache.SaveRaw(ShaderDiskCacheRaw(
267 params.unique_identifier, program_type, static_cast<u32>(code_size / sizeof(u64)), 311 params.unique_identifier, GetProgramType(program_type),
268 static_cast<u32>(code_size_b / sizeof(u64)), std::move(program_code), 312 static_cast<u32>(code_size / sizeof(u64)), static_cast<u32>(code_size_b / sizeof(u64)),
269 std::move(program_code_b))); 313 std::move(program_code), std::move(program_code_b)));
270 314
271 return std::shared_ptr<CachedShader>(new CachedShader(params, program_type, std::move(result))); 315 return std::shared_ptr<CachedShader>(
316 new CachedShader(params, GetProgramType(program_type), std::move(result)));
272} 317}
273 318
274Shader CachedShader::CreateStageFromCache(const ShaderParameters& params, 319Shader CachedShader::CreateStageFromCache(const ShaderParameters& params,
275 Maxwell::ShaderProgram program_type, 320 Maxwell::ShaderProgram program_type,
276 GLShader::ProgramResult result) { 321 GLShader::ProgramResult result) {
277 return std::shared_ptr<CachedShader>(new CachedShader(params, program_type, std::move(result))); 322 return std::shared_ptr<CachedShader>(
323 new CachedShader(params, GetProgramType(program_type), std::move(result)));
324}
325
326Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code) {
327 auto result{CreateProgram(params.device, ProgramType::Compute, code, {})};
328
329 const auto code_size{CalculateProgramSize(code)};
330 params.disk_cache.SaveRaw(ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute,
331 static_cast<u32>(code_size / sizeof(u64)), 0,
332 std::move(code), {}));
333
334 return std::shared_ptr<CachedShader>(
335 new CachedShader(params, ProgramType::Compute, std::move(result)));
336}
337
338Shader CachedShader::CreateKernelFromCache(const ShaderParameters& params,
339 GLShader::ProgramResult result) {
340 return std::shared_ptr<CachedShader>(
341 new CachedShader(params, ProgramType::Compute, std::move(result)));
278} 342}
279 343
280std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) { 344std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) {
281 GLuint handle{}; 345 GLuint handle{};
282 if (program_type == Maxwell::ShaderProgram::Geometry) { 346 if (program_type == ProgramType::Geometry) {
283 handle = GetGeometryShader(variant); 347 handle = GetGeometryShader(variant);
284 } else { 348 } else {
285 const auto [entry, is_cache_miss] = programs.try_emplace(variant); 349 const auto [entry, is_cache_miss] = programs.try_emplace(variant);
@@ -297,8 +361,11 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVar
297 handle = program->handle; 361 handle = program->handle;
298 } 362 }
299 363
300 auto base_bindings{variant.base_bindings}; 364 auto base_bindings = variant.base_bindings;
301 base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + RESERVED_UBOS; 365 base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size());
366 if (program_type != ProgramType::Compute) {
367 base_bindings.cbuf += STAGE_RESERVED_UBOS;
368 }
302 base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size()); 369 base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size());
303 base_bindings.sampler += static_cast<u32>(entries.samplers.size()); 370 base_bindings.sampler += static_cast<u32>(entries.samplers.size());
304 371
@@ -561,7 +628,7 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia
561} 628}
562 629
563Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { 630Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
564 if (!system.GPU().Maxwell3D().dirty_flags.shaders) { 631 if (!system.GPU().Maxwell3D().dirty.shaders) {
565 return last_shaders[static_cast<std::size_t>(program)]; 632 return last_shaders[static_cast<std::size_t>(program)];
566 } 633 }
567 634
@@ -578,13 +645,15 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
578 // No shader found - create a new one 645 // No shader found - create a new one
579 ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; 646 ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)};
580 ProgramCode program_code_b; 647 ProgramCode program_code_b;
581 if (program == Maxwell::ShaderProgram::VertexA) { 648 const bool is_program_a{program == Maxwell::ShaderProgram::VertexA};
649 if (is_program_a) {
582 const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; 650 const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)};
583 program_code_b = GetShaderCode(memory_manager, program_addr_b, 651 program_code_b = GetShaderCode(memory_manager, program_addr_b,
584 memory_manager.GetPointer(program_addr_b)); 652 memory_manager.GetPointer(program_addr_b));
585 } 653 }
586 654
587 const auto unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); 655 const auto unique_identifier =
656 GetUniqueIdentifier(GetProgramType(program), program_code, program_code_b);
588 const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; 657 const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)};
589 const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, 658 const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr,
590 host_ptr, unique_identifier}; 659 host_ptr, unique_identifier};
@@ -601,4 +670,30 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
601 return last_shaders[static_cast<std::size_t>(program)] = shader; 670 return last_shaders[static_cast<std::size_t>(program)] = shader;
602} 671}
603 672
673Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
674 auto& memory_manager{system.GPU().MemoryManager()};
675 const auto host_ptr{memory_manager.GetPointer(code_addr)};
676 auto kernel = TryGet(host_ptr);
677 if (kernel) {
678 return kernel;
679 }
680
681 // No kernel found - create a new one
682 auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
683 const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})};
684 const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)};
685 const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr,
686 host_ptr, unique_identifier};
687
688 const auto found = precompiled_shaders.find(unique_identifier);
689 if (found == precompiled_shaders.end()) {
690 kernel = CachedShader::CreateKernelFromMemory(params, std::move(code));
691 } else {
692 kernel = CachedShader::CreateKernelFromCache(params, found->second);
693 }
694
695 Register(kernel);
696 return kernel;
697}
698
604} // namespace OpenGL 699} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index bbb53cdf4..a3106a0ff 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -61,6 +61,11 @@ public:
61 Maxwell::ShaderProgram program_type, 61 Maxwell::ShaderProgram program_type,
62 GLShader::ProgramResult result); 62 GLShader::ProgramResult result);
63 63
64 static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code);
65
66 static Shader CreateKernelFromCache(const ShaderParameters& params,
67 GLShader::ProgramResult result);
68
64 VAddr GetCpuAddr() const override { 69 VAddr GetCpuAddr() const override {
65 return cpu_addr; 70 return cpu_addr;
66 } 71 }
@@ -78,7 +83,7 @@ public:
78 std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant); 83 std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant);
79 84
80private: 85private:
81 explicit CachedShader(const ShaderParameters& params, Maxwell::ShaderProgram program_type, 86 explicit CachedShader(const ShaderParameters& params, ProgramType program_type,
82 GLShader::ProgramResult result); 87 GLShader::ProgramResult result);
83 88
84 // Geometry programs. These are needed because GLSL needs an input topology but it's not 89 // Geometry programs. These are needed because GLSL needs an input topology but it's not
@@ -104,7 +109,7 @@ private:
104 u8* host_ptr{}; 109 u8* host_ptr{};
105 VAddr cpu_addr{}; 110 VAddr cpu_addr{};
106 u64 unique_identifier{}; 111 u64 unique_identifier{};
107 Maxwell::ShaderProgram program_type{}; 112 ProgramType program_type{};
108 ShaderDiskCacheOpenGL& disk_cache; 113 ShaderDiskCacheOpenGL& disk_cache;
109 const PrecompiledPrograms& precompiled_programs; 114 const PrecompiledPrograms& precompiled_programs;
110 115
@@ -132,6 +137,9 @@ public:
132 /// Gets the current specified shader stage program 137 /// Gets the current specified shader stage program
133 Shader GetStageProgram(Maxwell::ShaderProgram program); 138 Shader GetStageProgram(Maxwell::ShaderProgram program);
134 139
140 /// Gets a compute kernel in the passed address
141 Shader GetComputeKernel(GPUVAddr code_addr);
142
135protected: 143protected:
136 // We do not have to flush this cache as things in it are never modified by us. 144 // We do not have to flush this cache as things in it are never modified by us.
137 void FlushObjectInner(const Shader& object) override {} 145 void FlushObjectInner(const Shader& object) override {}
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 5f2f1510c..ffe26b241 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -14,6 +14,7 @@
14#include "common/alignment.h" 14#include "common/alignment.h"
15#include "common/assert.h" 15#include "common/assert.h"
16#include "common/common_types.h" 16#include "common/common_types.h"
17#include "common/logging/log.h"
17#include "video_core/engines/maxwell_3d.h" 18#include "video_core/engines/maxwell_3d.h"
18#include "video_core/renderer_opengl/gl_device.h" 19#include "video_core/renderer_opengl/gl_device.h"
19#include "video_core/renderer_opengl/gl_rasterizer.h" 20#include "video_core/renderer_opengl/gl_rasterizer.h"
@@ -36,7 +37,6 @@ using namespace std::string_literals;
36using namespace VideoCommon::Shader; 37using namespace VideoCommon::Shader;
37 38
38using Maxwell = Tegra::Engines::Maxwell3D::Regs; 39using Maxwell = Tegra::Engines::Maxwell3D::Regs;
39using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage;
40using Operation = const OperationNode&; 40using Operation = const OperationNode&;
41 41
42enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; 42enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
@@ -46,7 +46,7 @@ using TextureArgument = std::pair<Type, Node>;
46using TextureIR = std::variant<TextureAoffi, TextureArgument>; 46using TextureIR = std::variant<TextureAoffi, TextureArgument>;
47 47
48constexpr u32 MAX_CONSTBUFFER_ELEMENTS = 48constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
49 static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); 49 static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float));
50 50
51class ShaderWriter { 51class ShaderWriter {
52public: 52public:
@@ -161,9 +161,13 @@ std::string FlowStackTopName(MetaStackClass stack) {
161 return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); 161 return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
162} 162}
163 163
164constexpr bool IsVertexShader(ProgramType stage) {
165 return stage == ProgramType::VertexA || stage == ProgramType::VertexB;
166}
167
164class GLSLDecompiler final { 168class GLSLDecompiler final {
165public: 169public:
166 explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderStage stage, 170 explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ProgramType stage,
167 std::string suffix) 171 std::string suffix)
168 : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} 172 : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {}
169 173
@@ -191,10 +195,12 @@ public:
191 195
192 // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems 196 // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
193 // unlikely that shaders will use 20 nested SSYs and PBKs. 197 // unlikely that shaders will use 20 nested SSYs and PBKs.
194 constexpr u32 FLOW_STACK_SIZE = 20; 198 if (!ir.IsFlowStackDisabled()) {
195 for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { 199 constexpr u32 FLOW_STACK_SIZE = 20;
196 code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); 200 for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) {
197 code.AddLine("uint {} = 0u;", FlowStackTopName(stack)); 201 code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE);
202 code.AddLine("uint {} = 0u;", FlowStackTopName(stack));
203 }
198 } 204 }
199 205
200 code.AddLine("while (true) {{"); 206 code.AddLine("while (true) {{");
@@ -244,24 +250,22 @@ public:
244 usage.is_read, usage.is_written); 250 usage.is_read, usage.is_written);
245 } 251 }
246 entries.clip_distances = ir.GetClipDistances(); 252 entries.clip_distances = ir.GetClipDistances();
253 entries.shader_viewport_layer_array =
254 IsVertexShader(stage) && (ir.UsesLayer() || ir.UsesViewportIndex());
247 entries.shader_length = ir.GetLength(); 255 entries.shader_length = ir.GetLength();
248 return entries; 256 return entries;
249 } 257 }
250 258
251private: 259private:
252 using OperationDecompilerFn = std::string (GLSLDecompiler::*)(Operation);
253 using OperationDecompilersArray =
254 std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>;
255
256 void DeclareVertex() { 260 void DeclareVertex() {
257 if (stage != ShaderStage::Vertex) 261 if (!IsVertexShader(stage))
258 return; 262 return;
259 263
260 DeclareVertexRedeclarations(); 264 DeclareVertexRedeclarations();
261 } 265 }
262 266
263 void DeclareGeometry() { 267 void DeclareGeometry() {
264 if (stage != ShaderStage::Geometry) { 268 if (stage != ProgramType::Geometry) {
265 return; 269 return;
266 } 270 }
267 271
@@ -280,22 +284,35 @@ private:
280 } 284 }
281 285
282 void DeclareVertexRedeclarations() { 286 void DeclareVertexRedeclarations() {
283 bool clip_distances_declared = false;
284
285 code.AddLine("out gl_PerVertex {{"); 287 code.AddLine("out gl_PerVertex {{");
286 ++code.scope; 288 ++code.scope;
287 289
288 code.AddLine("vec4 gl_Position;"); 290 code.AddLine("vec4 gl_Position;");
289 291
290 for (const auto o : ir.GetOutputAttributes()) { 292 for (const auto attribute : ir.GetOutputAttributes()) {
291 if (o == Attribute::Index::PointSize) 293 if (attribute == Attribute::Index::ClipDistances0123 ||
292 code.AddLine("float gl_PointSize;"); 294 attribute == Attribute::Index::ClipDistances4567) {
293 if (!clip_distances_declared && (o == Attribute::Index::ClipDistances0123 ||
294 o == Attribute::Index::ClipDistances4567)) {
295 code.AddLine("float gl_ClipDistance[];"); 295 code.AddLine("float gl_ClipDistance[];");
296 clip_distances_declared = true; 296 break;
297 } 297 }
298 } 298 }
299 if (!IsVertexShader(stage) || device.HasVertexViewportLayer()) {
300 if (ir.UsesLayer()) {
301 code.AddLine("int gl_Layer;");
302 }
303 if (ir.UsesViewportIndex()) {
304 code.AddLine("int gl_ViewportIndex;");
305 }
306 } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && IsVertexShader(stage) &&
307 !device.HasVertexViewportLayer()) {
308 LOG_ERROR(
309 Render_OpenGL,
310 "GL_ARB_shader_viewport_layer_array is not available and its required by a shader");
311 }
312
313 if (ir.UsesPointSize()) {
314 code.AddLine("float gl_PointSize;");
315 }
299 316
300 --code.scope; 317 --code.scope;
301 code.AddLine("}};"); 318 code.AddLine("}};");
@@ -323,11 +340,16 @@ private:
323 } 340 }
324 341
325 void DeclareLocalMemory() { 342 void DeclareLocalMemory() {
326 if (const u64 local_memory_size = header.GetLocalMemorySize(); local_memory_size > 0) { 343 // TODO(Rodrigo): Unstub kernel local memory size and pass it from a register at
327 const auto element_count = Common::AlignUp(local_memory_size, 4) / 4; 344 // specialization time.
328 code.AddLine("float {}[{}];", GetLocalMemory(), element_count); 345 const u64 local_memory_size =
329 code.AddNewLine(); 346 stage == ProgramType::Compute ? 0x400 : header.GetLocalMemorySize();
347 if (local_memory_size == 0) {
348 return;
330 } 349 }
350 const auto element_count = Common::AlignUp(local_memory_size, 4) / 4;
351 code.AddLine("float {}[{}];", GetLocalMemory(), element_count);
352 code.AddNewLine();
331 } 353 }
332 354
333 void DeclareInternalFlags() { 355 void DeclareInternalFlags() {
@@ -381,12 +403,12 @@ private:
381 const u32 location{GetGenericAttributeIndex(index)}; 403 const u32 location{GetGenericAttributeIndex(index)};
382 404
383 std::string name{GetInputAttribute(index)}; 405 std::string name{GetInputAttribute(index)};
384 if (stage == ShaderStage::Geometry) { 406 if (stage == ProgramType::Geometry) {
385 name = "gs_" + name + "[]"; 407 name = "gs_" + name + "[]";
386 } 408 }
387 409
388 std::string suffix; 410 std::string suffix;
389 if (stage == ShaderStage::Fragment) { 411 if (stage == ProgramType::Fragment) {
390 const auto input_mode{header.ps.GetAttributeUse(location)}; 412 const auto input_mode{header.ps.GetAttributeUse(location)};
391 if (skip_unused && input_mode == AttributeUse::Unused) { 413 if (skip_unused && input_mode == AttributeUse::Unused) {
392 return; 414 return;
@@ -398,7 +420,7 @@ private:
398 } 420 }
399 421
400 void DeclareOutputAttributes() { 422 void DeclareOutputAttributes() {
401 if (ir.HasPhysicalAttributes() && stage != ShaderStage::Fragment) { 423 if (ir.HasPhysicalAttributes() && stage != ProgramType::Fragment) {
402 for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) { 424 for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) {
403 DeclareOutputAttribute(ToGenericAttribute(i)); 425 DeclareOutputAttribute(ToGenericAttribute(i));
404 } 426 }
@@ -520,7 +542,7 @@ private:
520 constexpr u32 element_stride{4}; 542 constexpr u32 element_stride{4};
521 const u32 address{generic_base + index * generic_stride + element * element_stride}; 543 const u32 address{generic_base + index * generic_stride + element * element_stride};
522 544
523 const bool declared{stage != ShaderStage::Fragment || 545 const bool declared{stage != ProgramType::Fragment ||
524 header.ps.GetAttributeUse(index) != AttributeUse::Unused}; 546 header.ps.GetAttributeUse(index) != AttributeUse::Unused};
525 const std::string value{declared ? ReadAttribute(attribute, element) : "0"}; 547 const std::string value{declared ? ReadAttribute(attribute, element) : "0"};
526 code.AddLine("case 0x{:x}: return {};", address, value); 548 code.AddLine("case 0x{:x}: return {};", address, value);
@@ -624,7 +646,7 @@ private:
624 } 646 }
625 647
626 if (const auto abuf = std::get_if<AbufNode>(&*node)) { 648 if (const auto abuf = std::get_if<AbufNode>(&*node)) {
627 UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderStage::Geometry, 649 UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ProgramType::Geometry,
628 "Physical attributes in geometry shaders are not implemented"); 650 "Physical attributes in geometry shaders are not implemented");
629 if (abuf->IsPhysicalBuffer()) { 651 if (abuf->IsPhysicalBuffer()) {
630 return fmt::format("readPhysicalAttribute(ftou({}))", 652 return fmt::format("readPhysicalAttribute(ftou({}))",
@@ -679,6 +701,9 @@ private:
679 } 701 }
680 702
681 if (const auto lmem = std::get_if<LmemNode>(&*node)) { 703 if (const auto lmem = std::get_if<LmemNode>(&*node)) {
704 if (stage == ProgramType::Compute) {
705 LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders");
706 }
682 return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); 707 return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress()));
683 } 708 }
684 709
@@ -708,7 +733,7 @@ private:
708 733
709 std::string ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) { 734 std::string ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) {
710 const auto GeometryPass = [&](std::string_view name) { 735 const auto GeometryPass = [&](std::string_view name) {
711 if (stage == ShaderStage::Geometry && buffer) { 736 if (stage == ProgramType::Geometry && buffer) {
712 // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games 737 // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
713 // set an 0x80000000 index for those and the shader fails to build. Find out why 738 // set an 0x80000000 index for those and the shader fails to build. Find out why
714 // this happens and what's its intent. 739 // this happens and what's its intent.
@@ -720,10 +745,10 @@ private:
720 switch (attribute) { 745 switch (attribute) {
721 case Attribute::Index::Position: 746 case Attribute::Index::Position:
722 switch (stage) { 747 switch (stage) {
723 case ShaderStage::Geometry: 748 case ProgramType::Geometry:
724 return fmt::format("gl_in[ftou({})].gl_Position{}", Visit(buffer), 749 return fmt::format("gl_in[ftou({})].gl_Position{}", Visit(buffer),
725 GetSwizzle(element)); 750 GetSwizzle(element));
726 case ShaderStage::Fragment: 751 case ProgramType::Fragment:
727 return element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)); 752 return element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element));
728 default: 753 default:
729 UNREACHABLE(); 754 UNREACHABLE();
@@ -744,7 +769,7 @@ private:
744 // TODO(Subv): Find out what the values are for the first two elements when inside a 769 // TODO(Subv): Find out what the values are for the first two elements when inside a
745 // vertex shader, and what's the value of the fourth element when inside a Tess Eval 770 // vertex shader, and what's the value of the fourth element when inside a Tess Eval
746 // shader. 771 // shader.
747 ASSERT(stage == ShaderStage::Vertex); 772 ASSERT(IsVertexShader(stage));
748 switch (element) { 773 switch (element) {
749 case 2: 774 case 2:
750 // Config pack's first value is instance_id. 775 // Config pack's first value is instance_id.
@@ -756,7 +781,7 @@ private:
756 return "0"; 781 return "0";
757 case Attribute::Index::FrontFacing: 782 case Attribute::Index::FrontFacing:
758 // TODO(Subv): Find out what the values are for the other elements. 783 // TODO(Subv): Find out what the values are for the other elements.
759 ASSERT(stage == ShaderStage::Fragment); 784 ASSERT(stage == ProgramType::Fragment);
760 switch (element) { 785 switch (element) {
761 case 3: 786 case 3:
762 return "itof(gl_FrontFacing ? -1 : 0)"; 787 return "itof(gl_FrontFacing ? -1 : 0)";
@@ -778,7 +803,7 @@ private:
778 return value; 803 return value;
779 } 804 }
780 // There's a bug in NVidia's proprietary drivers that makes precise fail on fragment shaders 805 // There's a bug in NVidia's proprietary drivers that makes precise fail on fragment shaders
781 const std::string precise = stage != ShaderStage::Fragment ? "precise " : ""; 806 const std::string precise = stage != ProgramType::Fragment ? "precise " : "";
782 807
783 const std::string temporary = code.GenerateTemporary(); 808 const std::string temporary = code.GenerateTemporary();
784 code.AddLine("{}float {} = {};", precise, temporary, value); 809 code.AddLine("{}float {} = {};", precise, temporary, value);
@@ -803,6 +828,45 @@ private:
803 return CastOperand(VisitOperand(operation, operand_index), type); 828 return CastOperand(VisitOperand(operation, operand_index), type);
804 } 829 }
805 830
831 std::optional<std::pair<std::string, bool>> GetOutputAttribute(const AbufNode* abuf) {
832 switch (const auto attribute = abuf->GetIndex()) {
833 case Attribute::Index::Position:
834 return std::make_pair("gl_Position"s + GetSwizzle(abuf->GetElement()), false);
835 case Attribute::Index::LayerViewportPointSize:
836 switch (abuf->GetElement()) {
837 case 0:
838 UNIMPLEMENTED();
839 return {};
840 case 1:
841 if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) {
842 return {};
843 }
844 return std::make_pair("gl_Layer", true);
845 case 2:
846 if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) {
847 return {};
848 }
849 return std::make_pair("gl_ViewportIndex", true);
850 case 3:
851 UNIMPLEMENTED_MSG("Requires some state changes for gl_PointSize to work in shader");
852 return std::make_pair("gl_PointSize", false);
853 }
854 return {};
855 case Attribute::Index::ClipDistances0123:
856 return std::make_pair(fmt::format("gl_ClipDistance[{}]", abuf->GetElement()), false);
857 case Attribute::Index::ClipDistances4567:
858 return std::make_pair(fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4),
859 false);
860 default:
861 if (IsGenericAttribute(attribute)) {
862 return std::make_pair(
863 GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()), false);
864 }
865 UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute));
866 return {};
867 }
868 }
869
806 std::string CastOperand(const std::string& value, Type type) const { 870 std::string CastOperand(const std::string& value, Type type) const {
807 switch (type) { 871 switch (type) {
808 case Type::Bool: 872 case Type::Bool:
@@ -999,6 +1063,8 @@ private:
999 const Node& src = operation[1]; 1063 const Node& src = operation[1];
1000 1064
1001 std::string target; 1065 std::string target;
1066 bool is_integer = false;
1067
1002 if (const auto gpr = std::get_if<GprNode>(&*dest)) { 1068 if (const auto gpr = std::get_if<GprNode>(&*dest)) {
1003 if (gpr->GetIndex() == Register::ZeroIndex) { 1069 if (gpr->GetIndex() == Register::ZeroIndex) {
1004 // Writing to Register::ZeroIndex is a no op 1070 // Writing to Register::ZeroIndex is a no op
@@ -1007,27 +1073,16 @@ private:
1007 target = GetRegister(gpr->GetIndex()); 1073 target = GetRegister(gpr->GetIndex());
1008 } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { 1074 } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
1009 UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer()); 1075 UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer());
1010 1076 const auto result = GetOutputAttribute(abuf);
1011 target = [&]() -> std::string { 1077 if (!result) {
1012 switch (const auto attribute = abuf->GetIndex(); abuf->GetIndex()) { 1078 return {};
1013 case Attribute::Index::Position: 1079 }
1014 return "gl_Position"s + GetSwizzle(abuf->GetElement()); 1080 target = result->first;
1015 case Attribute::Index::PointSize: 1081 is_integer = result->second;
1016 return "gl_PointSize";
1017 case Attribute::Index::ClipDistances0123:
1018 return fmt::format("gl_ClipDistance[{}]", abuf->GetElement());
1019 case Attribute::Index::ClipDistances4567:
1020 return fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4);
1021 default:
1022 if (IsGenericAttribute(attribute)) {
1023 return GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement());
1024 }
1025 UNIMPLEMENTED_MSG("Unhandled output attribute: {}",
1026 static_cast<u32>(attribute));
1027 return "0";
1028 }
1029 }();
1030 } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { 1082 } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
1083 if (stage == ProgramType::Compute) {
1084 LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders");
1085 }
1031 target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); 1086 target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress()));
1032 } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { 1087 } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
1033 const std::string real = Visit(gmem->GetRealAddress()); 1088 const std::string real = Visit(gmem->GetRealAddress());
@@ -1038,7 +1093,11 @@ private:
1038 UNREACHABLE_MSG("Assign called without a proper target"); 1093 UNREACHABLE_MSG("Assign called without a proper target");
1039 } 1094 }
1040 1095
1041 code.AddLine("{} = {};", target, Visit(src)); 1096 if (is_integer) {
1097 code.AddLine("{} = ftoi({});", target, Visit(src));
1098 } else {
1099 code.AddLine("{} = {};", target, Visit(src));
1100 }
1042 return {}; 1101 return {};
1043 } 1102 }
1044 1103
@@ -1351,14 +1410,10 @@ private:
1351 return fmt::format("{}[{}]", pair, VisitOperand(operation, 1, Type::Uint)); 1410 return fmt::format("{}[{}]", pair, VisitOperand(operation, 1, Type::Uint));
1352 } 1411 }
1353 1412
1354 std::string LogicalAll2(Operation operation) { 1413 std::string LogicalAnd2(Operation operation) {
1355 return GenerateUnary(operation, "all", Type::Bool, Type::Bool2); 1414 return GenerateUnary(operation, "all", Type::Bool, Type::Bool2);
1356 } 1415 }
1357 1416
1358 std::string LogicalAny2(Operation operation) {
1359 return GenerateUnary(operation, "any", Type::Bool, Type::Bool2);
1360 }
1361
1362 template <bool with_nan> 1417 template <bool with_nan>
1363 std::string GenerateHalfComparison(Operation operation, const std::string& compare_op) { 1418 std::string GenerateHalfComparison(Operation operation, const std::string& compare_op) {
1364 const std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2, 1419 const std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2,
@@ -1555,6 +1610,14 @@ private:
1555 return {}; 1610 return {};
1556 } 1611 }
1557 1612
1613 std::string BranchIndirect(Operation operation) {
1614 const std::string op_a = VisitOperand(operation, 0, Type::Uint);
1615
1616 code.AddLine("jmp_to = {};", op_a);
1617 code.AddLine("break;");
1618 return {};
1619 }
1620
1558 std::string PushFlowStack(Operation operation) { 1621 std::string PushFlowStack(Operation operation) {
1559 const auto stack = std::get<MetaStackClass>(operation.GetMeta()); 1622 const auto stack = std::get<MetaStackClass>(operation.GetMeta());
1560 const auto target = std::get_if<ImmediateNode>(&*operation[0]); 1623 const auto target = std::get_if<ImmediateNode>(&*operation[0]);
@@ -1573,7 +1636,7 @@ private:
1573 } 1636 }
1574 1637
1575 std::string Exit(Operation operation) { 1638 std::string Exit(Operation operation) {
1576 if (stage != ShaderStage::Fragment) { 1639 if (stage != ProgramType::Fragment) {
1577 code.AddLine("return;"); 1640 code.AddLine("return;");
1578 return {}; 1641 return {};
1579 } 1642 }
@@ -1624,7 +1687,7 @@ private:
1624 } 1687 }
1625 1688
1626 std::string EmitVertex(Operation operation) { 1689 std::string EmitVertex(Operation operation) {
1627 ASSERT_MSG(stage == ShaderStage::Geometry, 1690 ASSERT_MSG(stage == ProgramType::Geometry,
1628 "EmitVertex is expected to be used in a geometry shader."); 1691 "EmitVertex is expected to be used in a geometry shader.");
1629 1692
1630 // If a geometry shader is attached, it will always flip (it's the last stage before 1693 // If a geometry shader is attached, it will always flip (it's the last stage before
@@ -1635,7 +1698,7 @@ private:
1635 } 1698 }
1636 1699
1637 std::string EndPrimitive(Operation operation) { 1700 std::string EndPrimitive(Operation operation) {
1638 ASSERT_MSG(stage == ShaderStage::Geometry, 1701 ASSERT_MSG(stage == ProgramType::Geometry,
1639 "EndPrimitive is expected to be used in a geometry shader."); 1702 "EndPrimitive is expected to be used in a geometry shader.");
1640 1703
1641 code.AddLine("EndPrimitive();"); 1704 code.AddLine("EndPrimitive();");
@@ -1657,7 +1720,7 @@ private:
1657 return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')'; 1720 return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')';
1658 } 1721 }
1659 1722
1660 static constexpr OperationDecompilersArray operation_decompilers = { 1723 static constexpr std::array operation_decompilers = {
1661 &GLSLDecompiler::Assign, 1724 &GLSLDecompiler::Assign,
1662 1725
1663 &GLSLDecompiler::Select, 1726 &GLSLDecompiler::Select,
@@ -1741,8 +1804,7 @@ private:
1741 &GLSLDecompiler::LogicalXor, 1804 &GLSLDecompiler::LogicalXor,
1742 &GLSLDecompiler::LogicalNegate, 1805 &GLSLDecompiler::LogicalNegate,
1743 &GLSLDecompiler::LogicalPick2, 1806 &GLSLDecompiler::LogicalPick2,
1744 &GLSLDecompiler::LogicalAll2, 1807 &GLSLDecompiler::LogicalAnd2,
1745 &GLSLDecompiler::LogicalAny2,
1746 1808
1747 &GLSLDecompiler::LogicalLessThan<Type::Float>, 1809 &GLSLDecompiler::LogicalLessThan<Type::Float>,
1748 &GLSLDecompiler::LogicalEqual<Type::Float>, 1810 &GLSLDecompiler::LogicalEqual<Type::Float>,
@@ -1789,6 +1851,7 @@ private:
1789 &GLSLDecompiler::ImageStore, 1851 &GLSLDecompiler::ImageStore,
1790 1852
1791 &GLSLDecompiler::Branch, 1853 &GLSLDecompiler::Branch,
1854 &GLSLDecompiler::BranchIndirect,
1792 &GLSLDecompiler::PushFlowStack, 1855 &GLSLDecompiler::PushFlowStack,
1793 &GLSLDecompiler::PopFlowStack, 1856 &GLSLDecompiler::PopFlowStack,
1794 &GLSLDecompiler::Exit, 1857 &GLSLDecompiler::Exit,
@@ -1805,6 +1868,7 @@ private:
1805 &GLSLDecompiler::WorkGroupId<1>, 1868 &GLSLDecompiler::WorkGroupId<1>,
1806 &GLSLDecompiler::WorkGroupId<2>, 1869 &GLSLDecompiler::WorkGroupId<2>,
1807 }; 1870 };
1871 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
1808 1872
1809 std::string GetRegister(u32 index) const { 1873 std::string GetRegister(u32 index) const {
1810 return GetDeclarationWithSuffix(index, "gpr"); 1874 return GetDeclarationWithSuffix(index, "gpr");
@@ -1869,7 +1933,7 @@ private:
1869 } 1933 }
1870 1934
1871 u32 GetNumPhysicalInputAttributes() const { 1935 u32 GetNumPhysicalInputAttributes() const {
1872 return stage == ShaderStage::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); 1936 return IsVertexShader(stage) ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings();
1873 } 1937 }
1874 1938
1875 u32 GetNumPhysicalAttributes() const { 1939 u32 GetNumPhysicalAttributes() const {
@@ -1882,7 +1946,7 @@ private:
1882 1946
1883 const Device& device; 1947 const Device& device;
1884 const ShaderIR& ir; 1948 const ShaderIR& ir;
1885 const ShaderStage stage; 1949 const ProgramType stage;
1886 const std::string suffix; 1950 const std::string suffix;
1887 const Header header; 1951 const Header header;
1888 1952
@@ -1913,7 +1977,7 @@ std::string GetCommonDeclarations() {
1913 MAX_CONSTBUFFER_ELEMENTS); 1977 MAX_CONSTBUFFER_ELEMENTS);
1914} 1978}
1915 1979
1916ProgramResult Decompile(const Device& device, const ShaderIR& ir, Maxwell::ShaderStage stage, 1980ProgramResult Decompile(const Device& device, const ShaderIR& ir, ProgramType stage,
1917 const std::string& suffix) { 1981 const std::string& suffix) {
1918 GLSLDecompiler decompiler(device, ir, stage, suffix); 1982 GLSLDecompiler decompiler(device, ir, stage, suffix);
1919 decompiler.Decompile(); 1983 decompiler.Decompile();
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index 14d11c7fc..2ea02f5bf 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -12,14 +12,26 @@
12#include "video_core/engines/maxwell_3d.h" 12#include "video_core/engines/maxwell_3d.h"
13#include "video_core/shader/shader_ir.h" 13#include "video_core/shader/shader_ir.h"
14 14
15namespace OpenGL {
16class Device;
17}
18
19namespace VideoCommon::Shader { 15namespace VideoCommon::Shader {
20class ShaderIR; 16class ShaderIR;
21} 17}
22 18
19namespace OpenGL {
20
21class Device;
22
23enum class ProgramType : u32 {
24 VertexA = 0,
25 VertexB = 1,
26 TessellationControl = 2,
27 TessellationEval = 3,
28 Geometry = 4,
29 Fragment = 5,
30 Compute = 6
31};
32
33} // namespace OpenGL
34
23namespace OpenGL::GLShader { 35namespace OpenGL::GLShader {
24 36
25struct ShaderEntries; 37struct ShaderEntries;
@@ -78,12 +90,13 @@ struct ShaderEntries {
78 std::vector<ImageEntry> images; 90 std::vector<ImageEntry> images;
79 std::vector<GlobalMemoryEntry> global_memory_entries; 91 std::vector<GlobalMemoryEntry> global_memory_entries;
80 std::array<bool, Maxwell::NumClipDistances> clip_distances{}; 92 std::array<bool, Maxwell::NumClipDistances> clip_distances{};
93 bool shader_viewport_layer_array{};
81 std::size_t shader_length{}; 94 std::size_t shader_length{};
82}; 95};
83 96
84std::string GetCommonDeclarations(); 97std::string GetCommonDeclarations();
85 98
86ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, 99ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
87 Maxwell::ShaderStage stage, const std::string& suffix); 100 ProgramType stage, const std::string& suffix);
88 101
89} // namespace OpenGL::GLShader 102} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 10688397b..969fe9ced 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -51,7 +51,7 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() {
51 51
52} // namespace 52} // namespace
53 53
54ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type, 54ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type,
55 u32 program_code_size, u32 program_code_size_b, 55 u32 program_code_size, u32 program_code_size_b,
56 ProgramCode program_code, ProgramCode program_code_b) 56 ProgramCode program_code, ProgramCode program_code_b)
57 : unique_identifier{unique_identifier}, program_type{program_type}, 57 : unique_identifier{unique_identifier}, program_type{program_type},
@@ -373,6 +373,12 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
373 } 373 }
374 } 374 }
375 375
376 bool shader_viewport_layer_array{};
377 if (!LoadObjectFromPrecompiled(shader_viewport_layer_array)) {
378 return {};
379 }
380 entry.entries.shader_viewport_layer_array = shader_viewport_layer_array;
381
376 u64 shader_length{}; 382 u64 shader_length{};
377 if (!LoadObjectFromPrecompiled(shader_length)) { 383 if (!LoadObjectFromPrecompiled(shader_length)) {
378 return {}; 384 return {};
@@ -445,6 +451,10 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
445 } 451 }
446 } 452 }
447 453
454 if (!SaveObjectToPrecompiled(entries.shader_viewport_layer_array)) {
455 return false;
456 }
457
448 if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) { 458 if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) {
449 return false; 459 return false;
450 } 460 }
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index 4f296dda6..cc8bbd61e 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -18,7 +18,6 @@
18#include "common/assert.h" 18#include "common/assert.h"
19#include "common/common_types.h" 19#include "common/common_types.h"
20#include "core/file_sys/vfs_vector.h" 20#include "core/file_sys/vfs_vector.h"
21#include "video_core/engines/maxwell_3d.h"
22#include "video_core/renderer_opengl/gl_shader_gen.h" 21#include "video_core/renderer_opengl/gl_shader_gen.h"
23 22
24namespace Core { 23namespace Core {
@@ -34,14 +33,11 @@ namespace OpenGL {
34struct ShaderDiskCacheUsage; 33struct ShaderDiskCacheUsage;
35struct ShaderDiskCacheDump; 34struct ShaderDiskCacheDump;
36 35
37using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>;
38
39using ProgramCode = std::vector<u64>; 36using ProgramCode = std::vector<u64>;
40using Maxwell = Tegra::Engines::Maxwell3D::Regs; 37using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>;
41
42using TextureBufferUsage = std::bitset<64>; 38using TextureBufferUsage = std::bitset<64>;
43 39
44/// Allocated bindings used by an OpenGL shader program. 40/// Allocated bindings used by an OpenGL shader program
45struct BaseBindings { 41struct BaseBindings {
46 u32 cbuf{}; 42 u32 cbuf{};
47 u32 gmem{}; 43 u32 gmem{};
@@ -126,7 +122,7 @@ namespace OpenGL {
126/// Describes a shader how it's used by the guest GPU 122/// Describes a shader how it's used by the guest GPU
127class ShaderDiskCacheRaw { 123class ShaderDiskCacheRaw {
128public: 124public:
129 explicit ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type, 125 explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type,
130 u32 program_code_size, u32 program_code_size_b, 126 u32 program_code_size, u32 program_code_size_b,
131 ProgramCode program_code, ProgramCode program_code_b); 127 ProgramCode program_code, ProgramCode program_code_b);
132 ShaderDiskCacheRaw(); 128 ShaderDiskCacheRaw();
@@ -141,30 +137,13 @@ public:
141 } 137 }
142 138
143 bool HasProgramA() const { 139 bool HasProgramA() const {
144 return program_type == Maxwell::ShaderProgram::VertexA; 140 return program_type == ProgramType::VertexA;
145 } 141 }
146 142
147 Maxwell::ShaderProgram GetProgramType() const { 143 ProgramType GetProgramType() const {
148 return program_type; 144 return program_type;
149 } 145 }
150 146
151 Maxwell::ShaderStage GetProgramStage() const {
152 switch (program_type) {
153 case Maxwell::ShaderProgram::VertexA:
154 case Maxwell::ShaderProgram::VertexB:
155 return Maxwell::ShaderStage::Vertex;
156 case Maxwell::ShaderProgram::TesselationControl:
157 return Maxwell::ShaderStage::TesselationControl;
158 case Maxwell::ShaderProgram::TesselationEval:
159 return Maxwell::ShaderStage::TesselationEval;
160 case Maxwell::ShaderProgram::Geometry:
161 return Maxwell::ShaderStage::Geometry;
162 case Maxwell::ShaderProgram::Fragment:
163 return Maxwell::ShaderStage::Fragment;
164 }
165 UNREACHABLE();
166 }
167
168 const ProgramCode& GetProgramCode() const { 147 const ProgramCode& GetProgramCode() const {
169 return program_code; 148 return program_code;
170 } 149 }
@@ -175,7 +154,7 @@ public:
175 154
176private: 155private:
177 u64 unique_identifier{}; 156 u64 unique_identifier{};
178 Maxwell::ShaderProgram program_type{}; 157 ProgramType program_type{};
179 u32 program_code_size{}; 158 u32 program_code_size{};
180 u32 program_code_size_b{}; 159 u32 program_code_size_b{};
181 160
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 9148629ec..3a8d9e1da 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -14,7 +14,8 @@ using Tegra::Engines::Maxwell3D;
14using VideoCommon::Shader::ProgramCode; 14using VideoCommon::Shader::ProgramCode;
15using VideoCommon::Shader::ShaderIR; 15using VideoCommon::Shader::ShaderIR;
16 16
17static constexpr u32 PROGRAM_OFFSET{10}; 17static constexpr u32 PROGRAM_OFFSET = 10;
18static constexpr u32 COMPUTE_OFFSET = 0;
18 19
19ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) { 20ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) {
20 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); 21 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
@@ -29,17 +30,15 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
29}; 30};
30 31
31)"; 32)";
32 const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
33 ProgramResult program =
34 Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex");
35 33
34 const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a);
35 const auto stage = setup.IsDualProgram() ? ProgramType::VertexA : ProgramType::VertexB;
36 ProgramResult program = Decompile(device, program_ir, stage, "vertex");
36 out += program.first; 37 out += program.first;
37 38
38 if (setup.IsDualProgram()) { 39 if (setup.IsDualProgram()) {
39 const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET); 40 const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b);
40 ProgramResult program_b = 41 ProgramResult program_b = Decompile(device, program_ir_b, ProgramType::VertexB, "vertex_b");
41 Decompile(device, program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b");
42
43 out += program_b.first; 42 out += program_b.first;
44 } 43 }
45 44
@@ -80,9 +79,9 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
80}; 79};
81 80
82)"; 81)";
83 const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); 82
84 ProgramResult program = 83 const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a);
85 Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry"); 84 ProgramResult program = Decompile(device, program_ir, ProgramType::Geometry, "geometry");
86 out += program.first; 85 out += program.first;
87 86
88 out += R"( 87 out += R"(
@@ -115,10 +114,8 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
115}; 114};
116 115
117)"; 116)";
118 const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); 117 const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a);
119 ProgramResult program = 118 ProgramResult program = Decompile(device, program_ir, ProgramType::Fragment, "fragment");
120 Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment");
121
122 out += program.first; 119 out += program.first;
123 120
124 out += R"( 121 out += R"(
@@ -130,4 +127,22 @@ void main() {
130 return {std::move(out), std::move(program.second)}; 127 return {std::move(out), std::move(program.second)};
131} 128}
132 129
130ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup) {
131 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
132
133 std::string out = "// Shader Unique Id: CS" + id + "\n\n";
134 out += GetCommonDeclarations();
135
136 const ShaderIR program_ir(setup.program.code, COMPUTE_OFFSET, setup.program.size_a);
137 ProgramResult program = Decompile(device, program_ir, ProgramType::Compute, "compute");
138 out += program.first;
139
140 out += R"(
141void main() {
142 execute_compute();
143}
144)";
145 return {std::move(out), std::move(program.second)};
146}
147
133} // namespace OpenGL::GLShader 148} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index 0536c8a03..3833e88ab 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -27,6 +27,8 @@ struct ShaderSetup {
27 ProgramCode code; 27 ProgramCode code;
28 ProgramCode code_b; // Used for dual vertex shaders 28 ProgramCode code_b; // Used for dual vertex shaders
29 u64 unique_identifier; 29 u64 unique_identifier;
30 std::size_t size_a;
31 std::size_t size_b;
30 } program; 32 } program;
31 33
32 /// Used in scenarios where we have a dual vertex shaders 34 /// Used in scenarios where we have a dual vertex shaders
@@ -52,4 +54,7 @@ ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& se
52/// Generates the GLSL fragment shader program source code for the given FS program 54/// Generates the GLSL fragment shader program source code for the given FS program
53ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup); 55ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup);
54 56
57/// Generates the GLSL compute shader program source code for the given CS program
58ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup);
59
55} // namespace OpenGL::GLShader 60} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp
index 5f3fe067e..9e74eda0d 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_util.cpp
@@ -10,21 +10,25 @@
10 10
11namespace OpenGL::GLShader { 11namespace OpenGL::GLShader {
12 12
13GLuint LoadShader(const char* source, GLenum type) { 13namespace {
14 const char* debug_type; 14const char* GetStageDebugName(GLenum type) {
15 switch (type) { 15 switch (type) {
16 case GL_VERTEX_SHADER: 16 case GL_VERTEX_SHADER:
17 debug_type = "vertex"; 17 return "vertex";
18 break;
19 case GL_GEOMETRY_SHADER: 18 case GL_GEOMETRY_SHADER:
20 debug_type = "geometry"; 19 return "geometry";
21 break;
22 case GL_FRAGMENT_SHADER: 20 case GL_FRAGMENT_SHADER:
23 debug_type = "fragment"; 21 return "fragment";
24 break; 22 case GL_COMPUTE_SHADER:
25 default: 23 return "compute";
26 UNREACHABLE();
27 } 24 }
25 UNIMPLEMENTED();
26 return "unknown";
27}
28} // Anonymous namespace
29
30GLuint LoadShader(const char* source, GLenum type) {
31 const char* debug_type = GetStageDebugName(type);
28 const GLuint shader_id = glCreateShader(type); 32 const GLuint shader_id = glCreateShader(type);
29 glShaderSource(shader_id, 1, &source, nullptr); 33 glShaderSource(shader_id, 1, &source, nullptr);
30 LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type); 34 LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type);
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index d86e137ac..f4777d0b0 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -6,8 +6,11 @@
6#include <glad/glad.h> 6#include <glad/glad.h>
7#include "common/assert.h" 7#include "common/assert.h"
8#include "common/logging/log.h" 8#include "common/logging/log.h"
9#include "common/microprofile.h"
9#include "video_core/renderer_opengl/gl_state.h" 10#include "video_core/renderer_opengl/gl_state.h"
10 11
12MICROPROFILE_DEFINE(OpenGL_State, "OpenGL", "State Change", MP_RGB(192, 128, 128));
13
11namespace OpenGL { 14namespace OpenGL {
12 15
13using Maxwell = Tegra::Engines::Maxwell3D::Regs; 16using Maxwell = Tegra::Engines::Maxwell3D::Regs;
@@ -162,6 +165,25 @@ OpenGLState::OpenGLState() {
162 alpha_test.ref = 0.0f; 165 alpha_test.ref = 0.0f;
163} 166}
164 167
168void OpenGLState::SetDefaultViewports() {
169 for (auto& item : viewports) {
170 item.x = 0;
171 item.y = 0;
172 item.width = 0;
173 item.height = 0;
174 item.depth_range_near = 0.0f;
175 item.depth_range_far = 1.0f;
176 item.scissor.enabled = false;
177 item.scissor.x = 0;
178 item.scissor.y = 0;
179 item.scissor.width = 0;
180 item.scissor.height = 0;
181 }
182
183 depth_clamp.far_plane = false;
184 depth_clamp.near_plane = false;
185}
186
165void OpenGLState::ApplyDefaultState() { 187void OpenGLState::ApplyDefaultState() {
166 glEnable(GL_BLEND); 188 glEnable(GL_BLEND);
167 glDisable(GL_FRAMEBUFFER_SRGB); 189 glDisable(GL_FRAMEBUFFER_SRGB);
@@ -523,7 +545,8 @@ void OpenGLState::ApplySamplers() const {
523 } 545 }
524} 546}
525 547
526void OpenGLState::Apply() const { 548void OpenGLState::Apply() {
549 MICROPROFILE_SCOPE(OpenGL_State);
527 ApplyFramebufferState(); 550 ApplyFramebufferState();
528 ApplyVertexArrayState(); 551 ApplyVertexArrayState();
529 ApplyShaderProgram(); 552 ApplyShaderProgram();
@@ -532,19 +555,31 @@ void OpenGLState::Apply() const {
532 ApplyPointSize(); 555 ApplyPointSize();
533 ApplyFragmentColorClamp(); 556 ApplyFragmentColorClamp();
534 ApplyMultisample(); 557 ApplyMultisample();
558 if (dirty.color_mask) {
559 ApplyColorMask();
560 dirty.color_mask = false;
561 }
535 ApplyDepthClamp(); 562 ApplyDepthClamp();
536 ApplyColorMask();
537 ApplyViewport(); 563 ApplyViewport();
538 ApplyStencilTest(); 564 if (dirty.stencil_state) {
565 ApplyStencilTest();
566 dirty.stencil_state = false;
567 }
539 ApplySRgb(); 568 ApplySRgb();
540 ApplyCulling(); 569 ApplyCulling();
541 ApplyDepth(); 570 ApplyDepth();
542 ApplyPrimitiveRestart(); 571 ApplyPrimitiveRestart();
543 ApplyBlending(); 572 if (dirty.blend_state) {
573 ApplyBlending();
574 dirty.blend_state = false;
575 }
544 ApplyLogicOp(); 576 ApplyLogicOp();
545 ApplyTextures(); 577 ApplyTextures();
546 ApplySamplers(); 578 ApplySamplers();
547 ApplyPolygonOffset(); 579 if (dirty.polygon_offset) {
580 ApplyPolygonOffset();
581 dirty.polygon_offset = false;
582 }
548 ApplyAlphaTest(); 583 ApplyAlphaTest();
549} 584}
550 585
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index b0140495d..fdf9a8a12 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -195,8 +195,9 @@ public:
195 s_rgb_used = false; 195 s_rgb_used = false;
196 } 196 }
197 197
198 void SetDefaultViewports();
198 /// Apply this state as the current OpenGL state 199 /// Apply this state as the current OpenGL state
199 void Apply() const; 200 void Apply();
200 201
201 void ApplyFramebufferState() const; 202 void ApplyFramebufferState() const;
202 void ApplyVertexArrayState() const; 203 void ApplyVertexArrayState() const;
@@ -237,11 +238,41 @@ public:
237 /// Viewport does not affects glClearBuffer so emulate viewport using scissor test 238 /// Viewport does not affects glClearBuffer so emulate viewport using scissor test
238 void EmulateViewportWithScissor(); 239 void EmulateViewportWithScissor();
239 240
241 void MarkDirtyBlendState() {
242 dirty.blend_state = true;
243 }
244
245 void MarkDirtyStencilState() {
246 dirty.stencil_state = true;
247 }
248
249 void MarkDirtyPolygonOffset() {
250 dirty.polygon_offset = true;
251 }
252
253 void MarkDirtyColorMask() {
254 dirty.color_mask = true;
255 }
256
257 void AllDirty() {
258 dirty.blend_state = true;
259 dirty.stencil_state = true;
260 dirty.polygon_offset = true;
261 dirty.color_mask = true;
262 }
263
240private: 264private:
241 static OpenGLState cur_state; 265 static OpenGLState cur_state;
242 266
243 // Workaround for sRGB problems caused by QT not supporting srgb output 267 // Workaround for sRGB problems caused by QT not supporting srgb output
244 static bool s_rgb_used; 268 static bool s_rgb_used;
269 struct {
270 bool blend_state;
271 bool stencil_state;
272 bool viewport_state;
273 bool polygon_offset;
274 bool color_mask;
275 } dirty{};
245}; 276};
246 277
247} // namespace OpenGL 278} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 08ae1a429..408332f90 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -31,6 +31,8 @@ using VideoCore::Surface::SurfaceType;
31 31
32MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128)); 32MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128));
33MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128)); 33MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128));
34MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy",
35 MP_RGB(128, 192, 128));
34 36
35namespace { 37namespace {
36 38
@@ -135,7 +137,6 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format
135const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { 137const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
136 ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size()); 138 ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
137 const auto& format{tex_format_tuples[static_cast<std::size_t>(pixel_format)]}; 139 const auto& format{tex_format_tuples[static_cast<std::size_t>(pixel_format)]};
138 ASSERT(component_type == format.component_type);
139 return format; 140 return format;
140} 141}
141 142
@@ -483,11 +484,15 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
483 const auto& dst_params{dst_view->GetSurfaceParams()}; 484 const auto& dst_params{dst_view->GetSurfaceParams()};
484 485
485 OpenGLState prev_state{OpenGLState::GetCurState()}; 486 OpenGLState prev_state{OpenGLState::GetCurState()};
486 SCOPE_EXIT({ prev_state.Apply(); }); 487 SCOPE_EXIT({
488 prev_state.AllDirty();
489 prev_state.Apply();
490 });
487 491
488 OpenGLState state; 492 OpenGLState state;
489 state.draw.read_framebuffer = src_framebuffer.handle; 493 state.draw.read_framebuffer = src_framebuffer.handle;
490 state.draw.draw_framebuffer = dst_framebuffer.handle; 494 state.draw.draw_framebuffer = dst_framebuffer.handle;
495 state.AllDirty();
491 state.Apply(); 496 state.Apply();
492 497
493 u32 buffers{}; 498 u32 buffers{};
@@ -535,6 +540,7 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
535} 540}
536 541
537void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) { 542void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) {
543 MICROPROFILE_SCOPE(OpenGL_Texture_Buffer_Copy);
538 const auto& src_params = src_surface->GetSurfaceParams(); 544 const auto& src_params = src_surface->GetSurfaceParams();
539 const auto& dst_params = dst_surface->GetSurfaceParams(); 545 const auto& dst_params = dst_surface->GetSurfaceParams();
540 UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1); 546 UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index b142521ec..a05cef3b9 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -101,7 +101,6 @@ RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::Syst
101 101
102RendererOpenGL::~RendererOpenGL() = default; 102RendererOpenGL::~RendererOpenGL() = default;
103 103
104/// Swap buffers (render frame)
105void RendererOpenGL::SwapBuffers( 104void RendererOpenGL::SwapBuffers(
106 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { 105 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
107 106
@@ -109,6 +108,7 @@ void RendererOpenGL::SwapBuffers(
109 108
110 // Maintain the rasterizer's state as a priority 109 // Maintain the rasterizer's state as a priority
111 OpenGLState prev_state = OpenGLState::GetCurState(); 110 OpenGLState prev_state = OpenGLState::GetCurState();
111 state.AllDirty();
112 state.Apply(); 112 state.Apply();
113 113
114 if (framebuffer) { 114 if (framebuffer) {
@@ -130,6 +130,8 @@ void RendererOpenGL::SwapBuffers(
130 130
131 DrawScreen(render_window.GetFramebufferLayout()); 131 DrawScreen(render_window.GetFramebufferLayout());
132 132
133 rasterizer->TickFrame();
134
133 render_window.SwapBuffers(); 135 render_window.SwapBuffers();
134 } 136 }
135 137
@@ -139,6 +141,7 @@ void RendererOpenGL::SwapBuffers(
139 system.GetPerfStats().BeginSystemFrame(); 141 system.GetPerfStats().BeginSystemFrame();
140 142
141 // Restore the rasterizer state 143 // Restore the rasterizer state
144 prev_state.AllDirty();
142 prev_state.Apply(); 145 prev_state.Apply();
143} 146}
144 147
@@ -205,6 +208,7 @@ void RendererOpenGL::InitOpenGLObjects() {
205 // Link shaders and get variable locations 208 // Link shaders and get variable locations
206 shader.CreateFromSource(vertex_shader, nullptr, fragment_shader); 209 shader.CreateFromSource(vertex_shader, nullptr, fragment_shader);
207 state.draw.shader_program = shader.handle; 210 state.draw.shader_program = shader.handle;
211 state.AllDirty();
208 state.Apply(); 212 state.Apply();
209 uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix"); 213 uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix");
210 uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture"); 214 uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture");
@@ -262,7 +266,6 @@ void RendererOpenGL::CreateRasterizer() {
262 if (rasterizer) { 266 if (rasterizer) {
263 return; 267 return;
264 } 268 }
265 // Initialize sRGB Usage
266 OpenGLState::ClearsRGBUsed(); 269 OpenGLState::ClearsRGBUsed();
267 rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info); 270 rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info);
268} 271}
@@ -338,12 +341,14 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
338 // Workaround brigthness problems in SMO by enabling sRGB in the final output 341 // Workaround brigthness problems in SMO by enabling sRGB in the final output
339 // if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987 342 // if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987
340 state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed(); 343 state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed();
344 state.AllDirty();
341 state.Apply(); 345 state.Apply();
342 glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data()); 346 glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data());
343 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); 347 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
344 // Restore default state 348 // Restore default state
345 state.framebuffer_srgb.enabled = false; 349 state.framebuffer_srgb.enabled = false;
346 state.texture_units[0].texture = 0; 350 state.texture_units[0].texture = 0;
351 state.AllDirty();
347 state.Apply(); 352 state.Apply();
348 // Clear sRGB state for the next frame 353 // Clear sRGB state for the next frame
349 OpenGLState::ClearsRGBUsed(); 354 OpenGLState::ClearsRGBUsed();
@@ -388,6 +393,7 @@ void RendererOpenGL::CaptureScreenshot() {
388 GLuint old_read_fb = state.draw.read_framebuffer; 393 GLuint old_read_fb = state.draw.read_framebuffer;
389 GLuint old_draw_fb = state.draw.draw_framebuffer; 394 GLuint old_draw_fb = state.draw.draw_framebuffer;
390 state.draw.read_framebuffer = state.draw.draw_framebuffer = screenshot_framebuffer.handle; 395 state.draw.read_framebuffer = state.draw.draw_framebuffer = screenshot_framebuffer.handle;
396 state.AllDirty();
391 state.Apply(); 397 state.Apply();
392 398
393 Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; 399 Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
@@ -407,6 +413,7 @@ void RendererOpenGL::CaptureScreenshot() {
407 screenshot_framebuffer.Release(); 413 screenshot_framebuffer.Release();
408 state.draw.read_framebuffer = old_read_fb; 414 state.draw.read_framebuffer = old_read_fb;
409 state.draw.draw_framebuffer = old_draw_fb; 415 state.draw.draw_framebuffer = old_draw_fb;
416 state.AllDirty();
410 state.Apply(); 417 state.Apply();
411 glDeleteRenderbuffers(1, &renderbuffer); 418 glDeleteRenderbuffers(1, &renderbuffer);
412 419
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp
index 68c36988d..c504a2c1a 100644
--- a/src/video_core/renderer_opengl/utils.cpp
+++ b/src/video_core/renderer_opengl/utils.cpp
@@ -13,29 +13,67 @@
13 13
14namespace OpenGL { 14namespace OpenGL {
15 15
16VertexArrayPushBuffer::VertexArrayPushBuffer() = default;
17
18VertexArrayPushBuffer::~VertexArrayPushBuffer() = default;
19
20void VertexArrayPushBuffer::Setup(GLuint vao_) {
21 vao = vao_;
22 index_buffer = nullptr;
23 vertex_buffers.clear();
24}
25
26void VertexArrayPushBuffer::SetIndexBuffer(const GLuint* buffer) {
27 index_buffer = buffer;
28}
29
30void VertexArrayPushBuffer::SetVertexBuffer(GLuint binding_index, const GLuint* buffer,
31 GLintptr offset, GLsizei stride) {
32 vertex_buffers.push_back(Entry{binding_index, buffer, offset, stride});
33}
34
35void VertexArrayPushBuffer::Bind() {
36 if (index_buffer) {
37 glVertexArrayElementBuffer(vao, *index_buffer);
38 }
39
40 // TODO(Rodrigo): Find a way to ARB_multi_bind this
41 for (const auto& entry : vertex_buffers) {
42 glVertexArrayVertexBuffer(vao, entry.binding_index, *entry.buffer, entry.offset,
43 entry.stride);
44 }
45}
46
16BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {} 47BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {}
17 48
18BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; 49BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default;
19 50
20void BindBuffersRangePushBuffer::Setup(GLuint first_) { 51void BindBuffersRangePushBuffer::Setup(GLuint first_) {
21 first = first_; 52 first = first_;
22 buffers.clear(); 53 buffer_pointers.clear();
23 offsets.clear(); 54 offsets.clear();
24 sizes.clear(); 55 sizes.clear();
25} 56}
26 57
27void BindBuffersRangePushBuffer::Push(GLuint buffer, GLintptr offset, GLsizeiptr size) { 58void BindBuffersRangePushBuffer::Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size) {
28 buffers.push_back(buffer); 59 buffer_pointers.push_back(buffer);
29 offsets.push_back(offset); 60 offsets.push_back(offset);
30 sizes.push_back(size); 61 sizes.push_back(size);
31} 62}
32 63
33void BindBuffersRangePushBuffer::Bind() const { 64void BindBuffersRangePushBuffer::Bind() {
34 const std::size_t count{buffers.size()}; 65 // Ensure sizes are valid.
66 const std::size_t count{buffer_pointers.size()};
35 DEBUG_ASSERT(count == offsets.size() && count == sizes.size()); 67 DEBUG_ASSERT(count == offsets.size() && count == sizes.size());
36 if (count == 0) { 68 if (count == 0) {
37 return; 69 return;
38 } 70 }
71
72 // Dereference buffers.
73 buffers.resize(count);
74 std::transform(buffer_pointers.begin(), buffer_pointers.end(), buffers.begin(),
75 [](const GLuint* pointer) { return *pointer; });
76
39 glBindBuffersRange(target, first, static_cast<GLsizei>(count), buffers.data(), offsets.data(), 77 glBindBuffersRange(target, first, static_cast<GLsizei>(count), buffers.data(), offsets.data(),
40 sizes.data()); 78 sizes.data());
41} 79}
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h
index 4a752f3b4..6c2b45546 100644
--- a/src/video_core/renderer_opengl/utils.h
+++ b/src/video_core/renderer_opengl/utils.h
@@ -11,20 +11,49 @@
11 11
12namespace OpenGL { 12namespace OpenGL {
13 13
14class BindBuffersRangePushBuffer { 14class VertexArrayPushBuffer final {
15public: 15public:
16 BindBuffersRangePushBuffer(GLenum target); 16 explicit VertexArrayPushBuffer();
17 ~VertexArrayPushBuffer();
18
19 void Setup(GLuint vao_);
20
21 void SetIndexBuffer(const GLuint* buffer);
22
23 void SetVertexBuffer(GLuint binding_index, const GLuint* buffer, GLintptr offset,
24 GLsizei stride);
25
26 void Bind();
27
28private:
29 struct Entry {
30 GLuint binding_index{};
31 const GLuint* buffer{};
32 GLintptr offset{};
33 GLsizei stride{};
34 };
35
36 GLuint vao{};
37 const GLuint* index_buffer{};
38 std::vector<Entry> vertex_buffers;
39};
40
41class BindBuffersRangePushBuffer final {
42public:
43 explicit BindBuffersRangePushBuffer(GLenum target);
17 ~BindBuffersRangePushBuffer(); 44 ~BindBuffersRangePushBuffer();
18 45
19 void Setup(GLuint first_); 46 void Setup(GLuint first_);
20 47
21 void Push(GLuint buffer, GLintptr offset, GLsizeiptr size); 48 void Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size);
22 49
23 void Bind() const; 50 void Bind();
24 51
25private: 52private:
26 GLenum target; 53 GLenum target{};
27 GLuint first; 54 GLuint first{};
55 std::vector<const GLuint*> buffer_pointers;
56
28 std::vector<GLuint> buffers; 57 std::vector<GLuint> buffers;
29 std::vector<GLintptr> offsets; 58 std::vector<GLintptr> offsets;
30 std::vector<GLsizeiptr> sizes; 59 std::vector<GLsizeiptr> sizes;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 02a9f5ecb..d2e9f4031 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -109,8 +109,8 @@ void VKBufferCache::Reserve(std::size_t max_size) {
109 } 109 }
110} 110}
111 111
112VKExecutionContext VKBufferCache::Send(VKExecutionContext exctx) { 112void VKBufferCache::Send() {
113 return stream_buffer->Send(exctx, buffer_offset - buffer_offset_base); 113 stream_buffer->Send(buffer_offset - buffer_offset_base);
114} 114}
115 115
116void VKBufferCache::AlignBuffer(std::size_t alignment) { 116void VKBufferCache::AlignBuffer(std::size_t alignment) {
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 3edf460df..49f13bcdc 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -77,7 +77,7 @@ public:
77 void Reserve(std::size_t max_size); 77 void Reserve(std::size_t max_size);
78 78
79 /// Ensures that the set data is sent to the device. 79 /// Ensures that the set data is sent to the device.
80 [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx); 80 void Send();
81 81
82 /// Returns the buffer cache handle. 82 /// Returns the buffer cache handle.
83 vk::Buffer GetBuffer() const { 83 vk::Buffer GetBuffer() const {
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.h b/src/video_core/renderer_vulkan/vk_sampler_cache.h
index 771b05c73..1f73b716b 100644
--- a/src/video_core/renderer_vulkan/vk_sampler_cache.h
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.h
@@ -4,9 +4,6 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <unordered_map>
8
9#include "common/common_types.h"
10#include "video_core/renderer_vulkan/declarations.h" 7#include "video_core/renderer_vulkan/declarations.h"
11#include "video_core/sampler_cache.h" 8#include "video_core/sampler_cache.h"
12#include "video_core/textures/texture.h" 9#include "video_core/textures/texture.h"
@@ -21,9 +18,9 @@ public:
21 ~VKSamplerCache(); 18 ~VKSamplerCache();
22 19
23protected: 20protected:
24 UniqueSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const; 21 UniqueSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override;
25 22
26 vk::Sampler ToSamplerType(const UniqueSampler& sampler) const; 23 vk::Sampler ToSamplerType(const UniqueSampler& sampler) const override;
27 24
28private: 25private:
29 const VKDevice& device; 26 const VKDevice& device;
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index f1fea1871..0f8116458 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -19,23 +19,19 @@ VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_man
19 19
20VKScheduler::~VKScheduler() = default; 20VKScheduler::~VKScheduler() = default;
21 21
22VKExecutionContext VKScheduler::GetExecutionContext() const { 22void VKScheduler::Flush(bool release_fence, vk::Semaphore semaphore) {
23 return VKExecutionContext(current_fence, current_cmdbuf);
24}
25
26VKExecutionContext VKScheduler::Flush(vk::Semaphore semaphore) {
27 SubmitExecution(semaphore); 23 SubmitExecution(semaphore);
28 current_fence->Release(); 24 if (release_fence)
25 current_fence->Release();
29 AllocateNewContext(); 26 AllocateNewContext();
30 return GetExecutionContext();
31} 27}
32 28
33VKExecutionContext VKScheduler::Finish(vk::Semaphore semaphore) { 29void VKScheduler::Finish(bool release_fence, vk::Semaphore semaphore) {
34 SubmitExecution(semaphore); 30 SubmitExecution(semaphore);
35 current_fence->Wait(); 31 current_fence->Wait();
36 current_fence->Release(); 32 if (release_fence)
33 current_fence->Release();
37 AllocateNewContext(); 34 AllocateNewContext();
38 return GetExecutionContext();
39} 35}
40 36
41void VKScheduler::SubmitExecution(vk::Semaphore semaphore) { 37void VKScheduler::SubmitExecution(vk::Semaphore semaphore) {
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index cfaf5376f..0e5b49c7f 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -10,10 +10,43 @@
10namespace Vulkan { 10namespace Vulkan {
11 11
12class VKDevice; 12class VKDevice;
13class VKExecutionContext;
14class VKFence; 13class VKFence;
15class VKResourceManager; 14class VKResourceManager;
16 15
16class VKFenceView {
17public:
18 VKFenceView() = default;
19 VKFenceView(VKFence* const& fence) : fence{fence} {}
20
21 VKFence* operator->() const noexcept {
22 return fence;
23 }
24
25 operator VKFence&() const noexcept {
26 return *fence;
27 }
28
29private:
30 VKFence* const& fence;
31};
32
33class VKCommandBufferView {
34public:
35 VKCommandBufferView() = default;
36 VKCommandBufferView(const vk::CommandBuffer& cmdbuf) : cmdbuf{cmdbuf} {}
37
38 const vk::CommandBuffer* operator->() const noexcept {
39 return &cmdbuf;
40 }
41
42 operator vk::CommandBuffer() const noexcept {
43 return cmdbuf;
44 }
45
46private:
47 const vk::CommandBuffer& cmdbuf;
48};
49
17/// The scheduler abstracts command buffer and fence management with an interface that's able to do 50/// The scheduler abstracts command buffer and fence management with an interface that's able to do
18/// OpenGL-like operations on Vulkan command buffers. 51/// OpenGL-like operations on Vulkan command buffers.
19class VKScheduler { 52class VKScheduler {
@@ -21,16 +54,21 @@ public:
21 explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager); 54 explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager);
22 ~VKScheduler(); 55 ~VKScheduler();
23 56
24 /// Gets the current execution context. 57 /// Gets a reference to the current fence.
25 [[nodiscard]] VKExecutionContext GetExecutionContext() const; 58 VKFenceView GetFence() const {
59 return current_fence;
60 }
61
62 /// Gets a reference to the current command buffer.
63 VKCommandBufferView GetCommandBuffer() const {
64 return current_cmdbuf;
65 }
26 66
27 /// Sends the current execution context to the GPU. It invalidates the current execution context 67 /// Sends the current execution context to the GPU.
28 /// and returns a new one. 68 void Flush(bool release_fence = true, vk::Semaphore semaphore = nullptr);
29 VKExecutionContext Flush(vk::Semaphore semaphore = nullptr);
30 69
31 /// Sends the current execution context to the GPU and waits for it to complete. It invalidates 70 /// Sends the current execution context to the GPU and waits for it to complete.
32 /// the current execution context and returns a new one. 71 void Finish(bool release_fence = true, vk::Semaphore semaphore = nullptr);
33 VKExecutionContext Finish(vk::Semaphore semaphore = nullptr);
34 72
35private: 73private:
36 void SubmitExecution(vk::Semaphore semaphore); 74 void SubmitExecution(vk::Semaphore semaphore);
@@ -44,26 +82,4 @@ private:
44 VKFence* next_fence = nullptr; 82 VKFence* next_fence = nullptr;
45}; 83};
46 84
47class VKExecutionContext {
48 friend class VKScheduler;
49
50public:
51 VKExecutionContext() = default;
52
53 VKFence& GetFence() const {
54 return *fence;
55 }
56
57 vk::CommandBuffer GetCommandBuffer() const {
58 return cmdbuf;
59 }
60
61private:
62 explicit VKExecutionContext(VKFence* fence, vk::CommandBuffer cmdbuf)
63 : fence{fence}, cmdbuf{cmdbuf} {}
64
65 VKFence* fence{};
66 vk::CommandBuffer cmdbuf;
67};
68
69} // namespace Vulkan 85} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 97ce214b1..d267712c9 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -205,10 +205,6 @@ public:
205 } 205 }
206 206
207private: 207private:
208 using OperationDecompilerFn = Id (SPIRVDecompiler::*)(Operation);
209 using OperationDecompilersArray =
210 std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>;
211
212 static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount); 208 static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount);
213 209
214 void AllocateBindings() { 210 void AllocateBindings() {
@@ -430,20 +426,17 @@ private:
430 instance_index = DeclareBuiltIn(spv::BuiltIn::InstanceIndex, spv::StorageClass::Input, 426 instance_index = DeclareBuiltIn(spv::BuiltIn::InstanceIndex, spv::StorageClass::Input,
431 t_in_uint, "instance_index"); 427 t_in_uint, "instance_index");
432 428
433 bool is_point_size_declared = false;
434 bool is_clip_distances_declared = false; 429 bool is_clip_distances_declared = false;
435 for (const auto index : ir.GetOutputAttributes()) { 430 for (const auto index : ir.GetOutputAttributes()) {
436 if (index == Attribute::Index::PointSize) { 431 if (index == Attribute::Index::ClipDistances0123 ||
437 is_point_size_declared = true; 432 index == Attribute::Index::ClipDistances4567) {
438 } else if (index == Attribute::Index::ClipDistances0123 ||
439 index == Attribute::Index::ClipDistances4567) {
440 is_clip_distances_declared = true; 433 is_clip_distances_declared = true;
441 } 434 }
442 } 435 }
443 436
444 std::vector<Id> members; 437 std::vector<Id> members;
445 members.push_back(t_float4); 438 members.push_back(t_float4);
446 if (is_point_size_declared) { 439 if (ir.UsesPointSize()) {
447 members.push_back(t_float); 440 members.push_back(t_float);
448 } 441 }
449 if (is_clip_distances_declared) { 442 if (is_clip_distances_declared) {
@@ -466,7 +459,7 @@ private:
466 459
467 position_index = MemberDecorateBuiltIn(spv::BuiltIn::Position, "position", true); 460 position_index = MemberDecorateBuiltIn(spv::BuiltIn::Position, "position", true);
468 point_size_index = 461 point_size_index =
469 MemberDecorateBuiltIn(spv::BuiltIn::PointSize, "point_size", is_point_size_declared); 462 MemberDecorateBuiltIn(spv::BuiltIn::PointSize, "point_size", ir.UsesPointSize());
470 clip_distances_index = MemberDecorateBuiltIn(spv::BuiltIn::ClipDistance, "clip_distances", 463 clip_distances_index = MemberDecorateBuiltIn(spv::BuiltIn::ClipDistance, "clip_distances",
471 is_clip_distances_declared); 464 is_clip_distances_declared);
472 465
@@ -712,7 +705,8 @@ private:
712 case Attribute::Index::Position: 705 case Attribute::Index::Position:
713 return AccessElement(t_out_float, per_vertex, position_index, 706 return AccessElement(t_out_float, per_vertex, position_index,
714 abuf->GetElement()); 707 abuf->GetElement());
715 case Attribute::Index::PointSize: 708 case Attribute::Index::LayerViewportPointSize:
709 UNIMPLEMENTED_IF(abuf->GetElement() != 3);
716 return AccessElement(t_out_float, per_vertex, point_size_index); 710 return AccessElement(t_out_float, per_vertex, point_size_index);
717 case Attribute::Index::ClipDistances0123: 711 case Attribute::Index::ClipDistances0123:
718 return AccessElement(t_out_float, per_vertex, clip_distances_index, 712 return AccessElement(t_out_float, per_vertex, clip_distances_index,
@@ -806,12 +800,7 @@ private:
806 return {}; 800 return {};
807 } 801 }
808 802
809 Id LogicalAll2(Operation operation) { 803 Id LogicalAnd2(Operation operation) {
810 UNIMPLEMENTED();
811 return {};
812 }
813
814 Id LogicalAny2(Operation operation) {
815 UNIMPLEMENTED(); 804 UNIMPLEMENTED();
816 return {}; 805 return {};
817 } 806 }
@@ -949,6 +938,14 @@ private:
949 return {}; 938 return {};
950 } 939 }
951 940
941 Id BranchIndirect(Operation operation) {
942 const Id op_a = VisitOperand<Type::Uint>(operation, 0);
943
944 Emit(OpStore(jmp_to, op_a));
945 BranchingOp([&]() { Emit(OpBranch(continue_label)); });
946 return {};
947 }
948
952 Id PushFlowStack(Operation operation) { 949 Id PushFlowStack(Operation operation) {
953 const auto target = std::get_if<ImmediateNode>(&*operation[0]); 950 const auto target = std::get_if<ImmediateNode>(&*operation[0]);
954 ASSERT(target); 951 ASSERT(target);
@@ -1200,7 +1197,7 @@ private:
1200 return {}; 1197 return {};
1201 } 1198 }
1202 1199
1203 static constexpr OperationDecompilersArray operation_decompilers = { 1200 static constexpr std::array operation_decompilers = {
1204 &SPIRVDecompiler::Assign, 1201 &SPIRVDecompiler::Assign,
1205 1202
1206 &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float, 1203 &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float,
@@ -1285,8 +1282,7 @@ private:
1285 &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>, 1282 &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>,
1286 &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>, 1283 &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>,
1287 &SPIRVDecompiler::LogicalPick2, 1284 &SPIRVDecompiler::LogicalPick2,
1288 &SPIRVDecompiler::LogicalAll2, 1285 &SPIRVDecompiler::LogicalAnd2,
1289 &SPIRVDecompiler::LogicalAny2,
1290 1286
1291 &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>, 1287 &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>,
1292 &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>, 1288 &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>,
@@ -1334,6 +1330,7 @@ private:
1334 &SPIRVDecompiler::ImageStore, 1330 &SPIRVDecompiler::ImageStore,
1335 1331
1336 &SPIRVDecompiler::Branch, 1332 &SPIRVDecompiler::Branch,
1333 &SPIRVDecompiler::BranchIndirect,
1337 &SPIRVDecompiler::PushFlowStack, 1334 &SPIRVDecompiler::PushFlowStack,
1338 &SPIRVDecompiler::PopFlowStack, 1335 &SPIRVDecompiler::PopFlowStack,
1339 &SPIRVDecompiler::Exit, 1336 &SPIRVDecompiler::Exit,
@@ -1350,6 +1347,7 @@ private:
1350 &SPIRVDecompiler::WorkGroupId<1>, 1347 &SPIRVDecompiler::WorkGroupId<1>,
1351 &SPIRVDecompiler::WorkGroupId<2>, 1348 &SPIRVDecompiler::WorkGroupId<2>,
1352 }; 1349 };
1350 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
1353 1351
1354 const VKDevice& device; 1352 const VKDevice& device;
1355 const ShaderIR& ir; 1353 const ShaderIR& ir;
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
index 58ffa42f2..62f1427f5 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -46,12 +46,12 @@ std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) {
46 return {mapped_pointer + offset, offset, invalidation_mark.has_value()}; 46 return {mapped_pointer + offset, offset, invalidation_mark.has_value()};
47} 47}
48 48
49VKExecutionContext VKStreamBuffer::Send(VKExecutionContext exctx, u64 size) { 49void VKStreamBuffer::Send(u64 size) {
50 ASSERT_MSG(size <= mapped_size, "Reserved size is too small"); 50 ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
51 51
52 if (invalidation_mark) { 52 if (invalidation_mark) {
53 // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish. 53 // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish.
54 exctx = scheduler.Flush(); 54 scheduler.Flush();
55 std::for_each(watches.begin(), watches.begin() + *invalidation_mark, 55 std::for_each(watches.begin(), watches.begin() + *invalidation_mark,
56 [&](auto& resource) { resource->Wait(); }); 56 [&](auto& resource) { resource->Wait(); });
57 invalidation_mark = std::nullopt; 57 invalidation_mark = std::nullopt;
@@ -62,11 +62,9 @@ VKExecutionContext VKStreamBuffer::Send(VKExecutionContext exctx, u64 size) {
62 ReserveWatches(WATCHES_RESERVE_CHUNK); 62 ReserveWatches(WATCHES_RESERVE_CHUNK);
63 } 63 }
64 // Add a watch for this allocation. 64 // Add a watch for this allocation.
65 watches[used_watches++]->Watch(exctx.GetFence()); 65 watches[used_watches++]->Watch(scheduler.GetFence());
66 66
67 offset += size; 67 offset += size;
68
69 return exctx;
70} 68}
71 69
72void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) { 70void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) {
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
index 69d036ccd..842e54162 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -37,7 +37,7 @@ public:
37 std::tuple<u8*, u64, bool> Reserve(u64 size); 37 std::tuple<u8*, u64, bool> Reserve(u64 size);
38 38
39 /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. 39 /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
40 [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx, u64 size); 40 void Send(u64 size);
41 41
42 vk::Buffer GetBuffer() const { 42 vk::Buffer GetBuffer() const {
43 return *buffer; 43 return *buffer;
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
new file mode 100644
index 000000000..fdcc970ff
--- /dev/null
+++ b/src/video_core/shader/control_flow.cpp
@@ -0,0 +1,476 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <list>
6#include <map>
7#include <stack>
8#include <unordered_map>
9#include <unordered_set>
10#include <vector>
11
12#include "common/assert.h"
13#include "common/common_types.h"
14#include "video_core/shader/control_flow.h"
15#include "video_core/shader/shader_ir.h"
16
17namespace VideoCommon::Shader {
18
19using Tegra::Shader::Instruction;
20using Tegra::Shader::OpCode;
21
22constexpr s32 unassigned_branch = -2;
23
24struct Query {
25 u32 address{};
26 std::stack<u32> ssy_stack{};
27 std::stack<u32> pbk_stack{};
28};
29
30struct BlockStack {
31 BlockStack() = default;
32 BlockStack(const BlockStack& b) = default;
33 BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {}
34 std::stack<u32> ssy_stack{};
35 std::stack<u32> pbk_stack{};
36};
37
38struct BlockBranchInfo {
39 Condition condition{};
40 s32 address{exit_branch};
41 bool kill{};
42 bool is_sync{};
43 bool is_brk{};
44 bool ignore{};
45};
46
47struct BlockInfo {
48 u32 start{};
49 u32 end{};
50 bool visited{};
51 BlockBranchInfo branch{};
52
53 bool IsInside(const u32 address) const {
54 return start <= address && address <= end;
55 }
56};
57
58struct CFGRebuildState {
59 explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size,
60 const u32 start)
61 : program_code{program_code}, program_size{program_size}, start{start} {}
62
63 u32 start{};
64 std::vector<BlockInfo> block_info{};
65 std::list<u32> inspect_queries{};
66 std::list<Query> queries{};
67 std::unordered_map<u32, u32> registered{};
68 std::unordered_set<u32> labels{};
69 std::map<u32, u32> ssy_labels{};
70 std::map<u32, u32> pbk_labels{};
71 std::unordered_map<u32, BlockStack> stacks{};
72 const ProgramCode& program_code;
73 const std::size_t program_size;
74};
75
76enum class BlockCollision : u32 { None, Found, Inside };
77
78std::pair<BlockCollision, u32> TryGetBlock(CFGRebuildState& state, u32 address) {
79 const auto& blocks = state.block_info;
80 for (u32 index = 0; index < blocks.size(); index++) {
81 if (blocks[index].start == address) {
82 return {BlockCollision::Found, index};
83 }
84 if (blocks[index].IsInside(address)) {
85 return {BlockCollision::Inside, index};
86 }
87 }
88 return {BlockCollision::None, -1};
89}
90
91struct ParseInfo {
92 BlockBranchInfo branch_info{};
93 u32 end_address{};
94};
95
96BlockInfo& CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) {
97 auto& it = state.block_info.emplace_back();
98 it.start = start;
99 it.end = end;
100 const u32 index = static_cast<u32>(state.block_info.size() - 1);
101 state.registered.insert({start, index});
102 return it;
103}
104
105Pred GetPredicate(u32 index, bool negated) {
106 return static_cast<Pred>(index + (negated ? 8 : 0));
107}
108
109/**
110 * Returns whether the instruction at the specified offset is a 'sched' instruction.
111 * Sched instructions always appear before a sequence of 3 instructions.
112 */
113constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
114 constexpr u32 SchedPeriod = 4;
115 u32 absolute_offset = offset - main_offset;
116
117 return (absolute_offset % SchedPeriod) == 0;
118}
119
120enum class ParseResult : u32 {
121 ControlCaught,
122 BlockEnd,
123 AbnormalFlow,
124};
125
126std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) {
127 u32 offset = static_cast<u32>(address);
128 const u32 end_address = static_cast<u32>(state.program_size / sizeof(Instruction));
129 ParseInfo parse_info{};
130
131 const auto insert_label = [](CFGRebuildState& state, u32 address) {
132 const auto pair = state.labels.emplace(address);
133 if (pair.second) {
134 state.inspect_queries.push_back(address);
135 }
136 };
137
138 while (true) {
139 if (offset >= end_address) {
140 // ASSERT_OR_EXECUTE can't be used, as it ignores the break
141 ASSERT_MSG(false, "Shader passed the current limit!");
142 parse_info.branch_info.address = exit_branch;
143 parse_info.branch_info.ignore = false;
144 break;
145 }
146 if (state.registered.count(offset) != 0) {
147 parse_info.branch_info.address = offset;
148 parse_info.branch_info.ignore = true;
149 break;
150 }
151 if (IsSchedInstruction(offset, state.start)) {
152 offset++;
153 continue;
154 }
155 const Instruction instr = {state.program_code[offset]};
156 const auto opcode = OpCode::Decode(instr);
157 if (!opcode || opcode->get().GetType() != OpCode::Type::Flow) {
158 offset++;
159 continue;
160 }
161
162 switch (opcode->get().GetId()) {
163 case OpCode::Id::EXIT: {
164 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
165 parse_info.branch_info.condition.predicate =
166 GetPredicate(pred_index, instr.negate_pred != 0);
167 if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
168 offset++;
169 continue;
170 }
171 const ConditionCode cc = instr.flow_condition_code;
172 parse_info.branch_info.condition.cc = cc;
173 if (cc == ConditionCode::F) {
174 offset++;
175 continue;
176 }
177 parse_info.branch_info.address = exit_branch;
178 parse_info.branch_info.kill = false;
179 parse_info.branch_info.is_sync = false;
180 parse_info.branch_info.is_brk = false;
181 parse_info.branch_info.ignore = false;
182 parse_info.end_address = offset;
183
184 return {ParseResult::ControlCaught, parse_info};
185 }
186 case OpCode::Id::BRA: {
187 if (instr.bra.constant_buffer != 0) {
188 return {ParseResult::AbnormalFlow, parse_info};
189 }
190 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
191 parse_info.branch_info.condition.predicate =
192 GetPredicate(pred_index, instr.negate_pred != 0);
193 if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
194 offset++;
195 continue;
196 }
197 const ConditionCode cc = instr.flow_condition_code;
198 parse_info.branch_info.condition.cc = cc;
199 if (cc == ConditionCode::F) {
200 offset++;
201 continue;
202 }
203 const u32 branch_offset = offset + instr.bra.GetBranchTarget();
204 if (branch_offset == 0) {
205 parse_info.branch_info.address = exit_branch;
206 } else {
207 parse_info.branch_info.address = branch_offset;
208 }
209 insert_label(state, branch_offset);
210 parse_info.branch_info.kill = false;
211 parse_info.branch_info.is_sync = false;
212 parse_info.branch_info.is_brk = false;
213 parse_info.branch_info.ignore = false;
214 parse_info.end_address = offset;
215
216 return {ParseResult::ControlCaught, parse_info};
217 }
218 case OpCode::Id::SYNC: {
219 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
220 parse_info.branch_info.condition.predicate =
221 GetPredicate(pred_index, instr.negate_pred != 0);
222 if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
223 offset++;
224 continue;
225 }
226 const ConditionCode cc = instr.flow_condition_code;
227 parse_info.branch_info.condition.cc = cc;
228 if (cc == ConditionCode::F) {
229 offset++;
230 continue;
231 }
232 parse_info.branch_info.address = unassigned_branch;
233 parse_info.branch_info.kill = false;
234 parse_info.branch_info.is_sync = true;
235 parse_info.branch_info.is_brk = false;
236 parse_info.branch_info.ignore = false;
237 parse_info.end_address = offset;
238
239 return {ParseResult::ControlCaught, parse_info};
240 }
241 case OpCode::Id::BRK: {
242 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
243 parse_info.branch_info.condition.predicate =
244 GetPredicate(pred_index, instr.negate_pred != 0);
245 if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
246 offset++;
247 continue;
248 }
249 const ConditionCode cc = instr.flow_condition_code;
250 parse_info.branch_info.condition.cc = cc;
251 if (cc == ConditionCode::F) {
252 offset++;
253 continue;
254 }
255 parse_info.branch_info.address = unassigned_branch;
256 parse_info.branch_info.kill = false;
257 parse_info.branch_info.is_sync = false;
258 parse_info.branch_info.is_brk = true;
259 parse_info.branch_info.ignore = false;
260 parse_info.end_address = offset;
261
262 return {ParseResult::ControlCaught, parse_info};
263 }
264 case OpCode::Id::KIL: {
265 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
266 parse_info.branch_info.condition.predicate =
267 GetPredicate(pred_index, instr.negate_pred != 0);
268 if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
269 offset++;
270 continue;
271 }
272 const ConditionCode cc = instr.flow_condition_code;
273 parse_info.branch_info.condition.cc = cc;
274 if (cc == ConditionCode::F) {
275 offset++;
276 continue;
277 }
278 parse_info.branch_info.address = exit_branch;
279 parse_info.branch_info.kill = true;
280 parse_info.branch_info.is_sync = false;
281 parse_info.branch_info.is_brk = false;
282 parse_info.branch_info.ignore = false;
283 parse_info.end_address = offset;
284
285 return {ParseResult::ControlCaught, parse_info};
286 }
287 case OpCode::Id::SSY: {
288 const u32 target = offset + instr.bra.GetBranchTarget();
289 insert_label(state, target);
290 state.ssy_labels.emplace(offset, target);
291 break;
292 }
293 case OpCode::Id::PBK: {
294 const u32 target = offset + instr.bra.GetBranchTarget();
295 insert_label(state, target);
296 state.pbk_labels.emplace(offset, target);
297 break;
298 }
299 case OpCode::Id::BRX: {
300 return {ParseResult::AbnormalFlow, parse_info};
301 }
302 default:
303 break;
304 }
305
306 offset++;
307 }
308 parse_info.branch_info.kill = false;
309 parse_info.branch_info.is_sync = false;
310 parse_info.branch_info.is_brk = false;
311 parse_info.end_address = offset - 1;
312 return {ParseResult::BlockEnd, parse_info};
313}
314
315bool TryInspectAddress(CFGRebuildState& state) {
316 if (state.inspect_queries.empty()) {
317 return false;
318 }
319
320 const u32 address = state.inspect_queries.front();
321 state.inspect_queries.pop_front();
322 const auto [result, block_index] = TryGetBlock(state, address);
323 switch (result) {
324 case BlockCollision::Found: {
325 return true;
326 }
327 case BlockCollision::Inside: {
328 // This case is the tricky one:
329 // We need to Split the block in 2 sepparate blocks
330 const u32 end = state.block_info[block_index].end;
331 BlockInfo& new_block = CreateBlockInfo(state, address, end);
332 BlockInfo& current_block = state.block_info[block_index];
333 current_block.end = address - 1;
334 new_block.branch = current_block.branch;
335 BlockBranchInfo forward_branch{};
336 forward_branch.address = address;
337 forward_branch.ignore = true;
338 current_block.branch = forward_branch;
339 return true;
340 }
341 default:
342 break;
343 }
344 const auto [parse_result, parse_info] = ParseCode(state, address);
345 if (parse_result == ParseResult::AbnormalFlow) {
346 // if it's AbnormalFlow, we end it as false, ending the CFG reconstruction
347 return false;
348 }
349
350 BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address);
351 block_info.branch = parse_info.branch_info;
352 if (parse_info.branch_info.condition.IsUnconditional()) {
353 return true;
354 }
355
356 const u32 fallthrough_address = parse_info.end_address + 1;
357 state.inspect_queries.push_front(fallthrough_address);
358 return true;
359}
360
361bool TryQuery(CFGRebuildState& state) {
362 const auto gather_labels = [](std::stack<u32>& cc, std::map<u32, u32>& labels,
363 BlockInfo& block) {
364 auto gather_start = labels.lower_bound(block.start);
365 const auto gather_end = labels.upper_bound(block.end);
366 while (gather_start != gather_end) {
367 cc.push(gather_start->second);
368 gather_start++;
369 }
370 };
371 if (state.queries.empty()) {
372 return false;
373 }
374 Query& q = state.queries.front();
375 const u32 block_index = state.registered[q.address];
376 BlockInfo& block = state.block_info[block_index];
377 // If the block is visted, check if the stacks match, else gather the ssy/pbk
378 // labels into the current stack and look if the branch at the end of the block
379 // consumes a label. Schedule new queries accordingly
380 if (block.visited) {
381 BlockStack& stack = state.stacks[q.address];
382 const bool all_okay = (stack.ssy_stack.size() == 0 || q.ssy_stack == stack.ssy_stack) &&
383 (stack.pbk_stack.size() == 0 || q.pbk_stack == stack.pbk_stack);
384 state.queries.pop_front();
385 return all_okay;
386 }
387 block.visited = true;
388 state.stacks[q.address] = BlockStack{q};
389 Query q2(q);
390 state.queries.pop_front();
391 gather_labels(q2.ssy_stack, state.ssy_labels, block);
392 gather_labels(q2.pbk_stack, state.pbk_labels, block);
393 if (!block.branch.condition.IsUnconditional()) {
394 q2.address = block.end + 1;
395 state.queries.push_back(q2);
396 }
397 Query conditional_query{q2};
398 if (block.branch.is_sync) {
399 if (block.branch.address == unassigned_branch) {
400 block.branch.address = conditional_query.ssy_stack.top();
401 }
402 conditional_query.ssy_stack.pop();
403 }
404 if (block.branch.is_brk) {
405 if (block.branch.address == unassigned_branch) {
406 block.branch.address = conditional_query.pbk_stack.top();
407 }
408 conditional_query.pbk_stack.pop();
409 }
410 conditional_query.address = block.branch.address;
411 state.queries.push_back(conditional_query);
412 return true;
413}
414
415std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 program_size,
416 u32 start_address) {
417 CFGRebuildState state{program_code, program_size, start_address};
418 // Inspect Code and generate blocks
419 state.labels.clear();
420 state.labels.emplace(start_address);
421 state.inspect_queries.push_back(state.start);
422 while (!state.inspect_queries.empty()) {
423 if (!TryInspectAddress(state)) {
424 return {};
425 }
426 }
427 // Decompile Stacks
428 Query start_query{};
429 start_query.address = state.start;
430 state.queries.push_back(start_query);
431 bool decompiled = true;
432 while (!state.queries.empty()) {
433 if (!TryQuery(state)) {
434 decompiled = false;
435 break;
436 }
437 }
438 // Sort and organize results
439 std::sort(state.block_info.begin(), state.block_info.end(),
440 [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; });
441 ShaderCharacteristics result_out{};
442 result_out.decompilable = decompiled;
443 result_out.start = start_address;
444 result_out.end = start_address;
445 for (auto& block : state.block_info) {
446 ShaderBlock new_block{};
447 new_block.start = block.start;
448 new_block.end = block.end;
449 new_block.ignore_branch = block.branch.ignore;
450 if (!new_block.ignore_branch) {
451 new_block.branch.cond = block.branch.condition;
452 new_block.branch.kills = block.branch.kill;
453 new_block.branch.address = block.branch.address;
454 }
455 result_out.end = std::max(result_out.end, block.end);
456 result_out.blocks.push_back(new_block);
457 }
458 if (result_out.decompilable) {
459 result_out.labels = std::move(state.labels);
460 return {result_out};
461 }
462 // If it's not decompilable, merge the unlabelled blocks together
463 auto back = result_out.blocks.begin();
464 auto next = std::next(back);
465 while (next != result_out.blocks.end()) {
466 if (state.labels.count(next->start) == 0 && next->start == back->end + 1) {
467 back->end = next->end;
468 next = result_out.blocks.erase(next);
469 continue;
470 }
471 back = next;
472 next++;
473 }
474 return {result_out};
475}
476} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h
new file mode 100644
index 000000000..5e8ea3271
--- /dev/null
+++ b/src/video_core/shader/control_flow.h
@@ -0,0 +1,63 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstring>
8#include <list>
9#include <optional>
10#include <unordered_set>
11
12#include "video_core/engines/shader_bytecode.h"
13#include "video_core/shader/shader_ir.h"
14
15namespace VideoCommon::Shader {
16
17using Tegra::Shader::ConditionCode;
18using Tegra::Shader::Pred;
19
20constexpr s32 exit_branch = -1;
21
22struct Condition {
23 Pred predicate{Pred::UnusedIndex};
24 ConditionCode cc{ConditionCode::T};
25
26 bool IsUnconditional() const {
27 return predicate == Pred::UnusedIndex && cc == ConditionCode::T;
28 }
29 bool operator==(const Condition& other) const {
30 return std::tie(predicate, cc) == std::tie(other.predicate, other.cc);
31 }
32};
33
34struct ShaderBlock {
35 u32 start{};
36 u32 end{};
37 bool ignore_branch{};
38 struct Branch {
39 Condition cond{};
40 bool kills{};
41 s32 address{};
42 bool operator==(const Branch& b) const {
43 return std::tie(cond, kills, address) == std::tie(b.cond, b.kills, b.address);
44 }
45 } branch{};
46 bool operator==(const ShaderBlock& sb) const {
47 return std::tie(start, end, ignore_branch, branch) ==
48 std::tie(sb.start, sb.end, sb.ignore_branch, sb.branch);
49 }
50};
51
52struct ShaderCharacteristics {
53 std::list<ShaderBlock> blocks{};
54 bool decompilable{};
55 u32 start{};
56 u32 end{};
57 std::unordered_set<u32> labels{};
58};
59
60std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 program_size,
61 u32 start_address);
62
63} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 2c9ff28f2..afffd157f 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -11,6 +11,7 @@
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "video_core/engines/shader_bytecode.h" 12#include "video_core/engines/shader_bytecode.h"
13#include "video_core/engines/shader_header.h" 13#include "video_core/engines/shader_header.h"
14#include "video_core/shader/control_flow.h"
14#include "video_core/shader/node_helper.h" 15#include "video_core/shader/node_helper.h"
15#include "video_core/shader/shader_ir.h" 16#include "video_core/shader/shader_ir.h"
16 17
@@ -21,20 +22,6 @@ using Tegra::Shader::OpCode;
21 22
22namespace { 23namespace {
23 24
24/// Merges exit method of two parallel branches.
25constexpr ExitMethod ParallelExit(ExitMethod a, ExitMethod b) {
26 if (a == ExitMethod::Undetermined) {
27 return b;
28 }
29 if (b == ExitMethod::Undetermined) {
30 return a;
31 }
32 if (a == b) {
33 return a;
34 }
35 return ExitMethod::Conditional;
36}
37
38/** 25/**
39 * Returns whether the instruction at the specified offset is a 'sched' instruction. 26 * Returns whether the instruction at the specified offset is a 'sched' instruction.
40 * Sched instructions always appear before a sequence of 3 instructions. 27 * Sched instructions always appear before a sequence of 3 instructions.
@@ -51,85 +38,104 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
51void ShaderIR::Decode() { 38void ShaderIR::Decode() {
52 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); 39 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
53 40
54 std::set<u32> labels; 41 disable_flow_stack = false;
55 const ExitMethod exit_method = Scan(main_offset, MAX_PROGRAM_LENGTH, labels); 42 const auto info = ScanFlow(program_code, program_size, main_offset);
56 if (exit_method != ExitMethod::AlwaysEnd) { 43 if (info) {
57 UNREACHABLE_MSG("Program does not always end"); 44 const auto& shader_info = *info;
58 } 45 coverage_begin = shader_info.start;
59 46 coverage_end = shader_info.end;
60 if (labels.empty()) { 47 if (shader_info.decompilable) {
61 basic_blocks.insert({main_offset, DecodeRange(main_offset, MAX_PROGRAM_LENGTH)}); 48 disable_flow_stack = true;
49 const auto insert_block = [this](NodeBlock& nodes, u32 label) {
50 if (label == exit_branch) {
51 return;
52 }
53 basic_blocks.insert({label, nodes});
54 };
55 const auto& blocks = shader_info.blocks;
56 NodeBlock current_block;
57 u32 current_label = exit_branch;
58 for (auto& block : blocks) {
59 if (shader_info.labels.count(block.start) != 0) {
60 insert_block(current_block, current_label);
61 current_block.clear();
62 current_label = block.start;
63 }
64 if (!block.ignore_branch) {
65 DecodeRangeInner(current_block, block.start, block.end);
66 InsertControlFlow(current_block, block);
67 } else {
68 DecodeRangeInner(current_block, block.start, block.end + 1);
69 }
70 }
71 insert_block(current_block, current_label);
72 return;
73 }
74 LOG_WARNING(HW_GPU, "Flow Stack Removing Failed! Falling back to old method");
75 // we can't decompile it, fallback to standard method
76 for (const auto& block : shader_info.blocks) {
77 basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)});
78 }
62 return; 79 return;
63 } 80 }
81 LOG_WARNING(HW_GPU, "Flow Analysis Failed! Falling back to brute force compiling");
82
83 // Now we need to deal with an undecompilable shader. We need to brute force
84 // a shader that captures every position.
85 coverage_begin = main_offset;
86 const u32 shader_end = static_cast<u32>(program_size / sizeof(u64));
87 coverage_end = shader_end;
88 for (u32 label = main_offset; label < shader_end; label++) {
89 basic_blocks.insert({label, DecodeRange(label, label + 1)});
90 }
91}
64 92
65 labels.insert(main_offset); 93NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) {
66 94 NodeBlock basic_block;
67 for (const u32 label : labels) { 95 DecodeRangeInner(basic_block, begin, end);
68 const auto next_it = labels.lower_bound(label + 1); 96 return basic_block;
69 const u32 next_label = next_it == labels.end() ? MAX_PROGRAM_LENGTH : *next_it; 97}
70 98
71 basic_blocks.insert({label, DecodeRange(label, next_label)}); 99void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) {
100 for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) {
101 pc = DecodeInstr(bb, pc);
72 } 102 }
73} 103}
74 104
75ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) { 105void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
76 const auto [iter, inserted] = 106 const auto apply_conditions = [&](const Condition& cond, Node n) -> Node {
77 exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined); 107 Node result = n;
78 ExitMethod& exit_method = iter->second; 108 if (cond.cc != ConditionCode::T) {
79 if (!inserted) 109 result = Conditional(GetConditionCode(cond.cc), {result});
80 return exit_method;
81
82 for (u32 offset = begin; offset != end && offset != MAX_PROGRAM_LENGTH; ++offset) {
83 coverage_begin = std::min(coverage_begin, offset);
84 coverage_end = std::max(coverage_end, offset + 1);
85
86 const Instruction instr = {program_code[offset]};
87 const auto opcode = OpCode::Decode(instr);
88 if (!opcode)
89 continue;
90 switch (opcode->get().GetId()) {
91 case OpCode::Id::EXIT: {
92 // The EXIT instruction can be predicated, which means that the shader can conditionally
93 // end on this instruction. We have to consider the case where the condition is not met
94 // and check the exit method of that other basic block.
95 using Tegra::Shader::Pred;
96 if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
97 return exit_method = ExitMethod::AlwaysEnd;
98 } else {
99 const ExitMethod not_met = Scan(offset + 1, end, labels);
100 return exit_method = ParallelExit(ExitMethod::AlwaysEnd, not_met);
101 }
102 } 110 }
103 case OpCode::Id::BRA: { 111 if (cond.predicate != Pred::UnusedIndex) {
104 const u32 target = offset + instr.bra.GetBranchTarget(); 112 u32 pred = static_cast<u32>(cond.predicate);
105 labels.insert(target); 113 const bool is_neg = pred > 7;
106 const ExitMethod no_jmp = Scan(offset + 1, end, labels); 114 if (is_neg) {
107 const ExitMethod jmp = Scan(target, end, labels); 115 pred -= 8;
108 return exit_method = ParallelExit(no_jmp, jmp); 116 }
109 } 117 result = Conditional(GetPredicate(pred, is_neg), {result});
110 case OpCode::Id::SSY:
111 case OpCode::Id::PBK: {
112 // The SSY and PBK use a similar encoding as the BRA instruction.
113 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
114 "Constant buffer branching is not supported");
115 const u32 target = offset + instr.bra.GetBranchTarget();
116 labels.insert(target);
117 // Continue scanning for an exit method.
118 break;
119 } 118 }
120 default: 119 return result;
121 break; 120 };
121 if (block.branch.address < 0) {
122 if (block.branch.kills) {
123 Node n = Operation(OperationCode::Discard);
124 n = apply_conditions(block.branch.cond, n);
125 bb.push_back(n);
126 global_code.push_back(n);
127 return;
122 } 128 }
129 Node n = Operation(OperationCode::Exit);
130 n = apply_conditions(block.branch.cond, n);
131 bb.push_back(n);
132 global_code.push_back(n);
133 return;
123 } 134 }
124 return exit_method = ExitMethod::AlwaysReturn; 135 Node n = Operation(OperationCode::Branch, Immediate(block.branch.address));
125} 136 n = apply_conditions(block.branch.cond, n);
126 137 bb.push_back(n);
127NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { 138 global_code.push_back(n);
128 NodeBlock basic_block;
129 for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) {
130 pc = DecodeInstr(basic_block, pc);
131 }
132 return basic_block;
133} 139}
134 140
135u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { 141u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
@@ -140,15 +146,18 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
140 146
141 const Instruction instr = {program_code[pc]}; 147 const Instruction instr = {program_code[pc]};
142 const auto opcode = OpCode::Decode(instr); 148 const auto opcode = OpCode::Decode(instr);
149 const u32 nv_address = ConvertAddressToNvidiaSpace(pc);
143 150
144 // Decoding failure 151 // Decoding failure
145 if (!opcode) { 152 if (!opcode) {
146 UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value); 153 UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value);
154 bb.push_back(Comment(fmt::format("{:05x} Unimplemented Shader instruction (0x{:016x})",
155 nv_address, instr.value)));
147 return pc + 1; 156 return pc + 1;
148 } 157 }
149 158
150 bb.push_back( 159 bb.push_back(Comment(
151 Comment(fmt::format("{}: {} (0x{:016x})", pc, opcode->get().GetName(), instr.value))); 160 fmt::format("{:05x} {} (0x{:016x})", nv_address, opcode->get().GetName(), instr.value)));
152 161
153 using Tegra::Shader::Pred; 162 using Tegra::Shader::Pred;
154 UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute, 163 UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute,
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index 87d8fecaa..1473c282a 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -42,11 +42,14 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
42 case OpCode::Id::FMUL_R: 42 case OpCode::Id::FMUL_R:
43 case OpCode::Id::FMUL_IMM: { 43 case OpCode::Id::FMUL_IMM: {
44 // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. 44 // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
45 UNIMPLEMENTED_IF_MSG(instr.fmul.tab5cb8_2 != 0, "FMUL tab5cb8_2({}) is not implemented", 45 if (instr.fmul.tab5cb8_2 != 0) {
46 instr.fmul.tab5cb8_2.Value()); 46 LOG_WARNING(HW_GPU, "FMUL tab5cb8_2({}) is not implemented",
47 UNIMPLEMENTED_IF_MSG( 47 instr.fmul.tab5cb8_2.Value());
48 instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented", 48 }
49 instr.fmul.tab5c68_0.Value()); // SMO typical sends 1 here which seems to be the default 49 if (instr.fmul.tab5c68_0 != 1) {
50 LOG_WARNING(HW_GPU, "FMUL tab5cb8_0({}) is not implemented",
51 instr.fmul.tab5c68_0.Value());
52 }
50 53
51 op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); 54 op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);
52 55
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
index 7bcf38f23..6466fc011 100644
--- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
@@ -23,7 +23,9 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) {
23 LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); 23 LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
24 } 24 }
25 } else { 25 } else {
26 UNIMPLEMENTED_IF(instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None); 26 if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None) {
27 LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
28 }
27 } 29 }
28 30
29 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a); 31 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a);
diff --git a/src/video_core/shader/decode/decode_integer_set.cpp b/src/video_core/shader/decode/decode_integer_set.cpp
deleted file mode 100644
index e69de29bb..000000000
--- a/src/video_core/shader/decode/decode_integer_set.cpp
+++ /dev/null
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp
index 29be25ca3..ca2f39e8d 100644
--- a/src/video_core/shader/decode/ffma.cpp
+++ b/src/video_core/shader/decode/ffma.cpp
@@ -18,10 +18,12 @@ u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) {
18 const auto opcode = OpCode::Decode(instr); 18 const auto opcode = OpCode::Decode(instr);
19 19
20 UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); 20 UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented");
21 UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_0 != 1, "FFMA tab5980_0({}) not implemented", 21 if (instr.ffma.tab5980_0 != 1) {
22 instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO 22 LOG_WARNING(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value());
23 UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented", 23 }
24 instr.ffma.tab5980_1.Value()); 24 if (instr.ffma.tab5980_1 != 0) {
25 LOG_WARNING(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value());
26 }
25 27
26 const Node op_a = GetRegister(instr.gpr8); 28 const Node op_a = GetRegister(instr.gpr8);
27 29
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp
index d59d15bd8..a82a6a15c 100644
--- a/src/video_core/shader/decode/half_set_predicate.cpp
+++ b/src/video_core/shader/decode/half_set_predicate.cpp
@@ -18,43 +18,56 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
18 const Instruction instr = {program_code[pc]}; 18 const Instruction instr = {program_code[pc]};
19 const auto opcode = OpCode::Decode(instr); 19 const auto opcode = OpCode::Decode(instr);
20 20
21 UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0); 21 DEBUG_ASSERT(instr.hsetp2.ftz == 0);
22 22
23 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); 23 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a);
24 op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); 24 op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a);
25 25
26 Node op_b = [&]() { 26 Tegra::Shader::PredCondition cond{};
27 switch (opcode->get().GetId()) { 27 bool h_and{};
28 case OpCode::Id::HSETP2_R: 28 Node op_b{};
29 return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a, 29 switch (opcode->get().GetId()) {
30 instr.hsetp2.negate_b); 30 case OpCode::Id::HSETP2_C:
31 default: 31 cond = instr.hsetp2.cbuf_and_imm.cond;
32 UNREACHABLE(); 32 h_and = instr.hsetp2.cbuf_and_imm.h_and;
33 return Immediate(0); 33 op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset),
34 } 34 instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b);
35 }(); 35 break;
36 op_b = UnpackHalfFloat(op_b, instr.hsetp2.type_b); 36 case OpCode::Id::HSETP2_IMM:
37 37 cond = instr.hsetp2.cbuf_and_imm.cond;
38 // We can't use the constant predicate as destination. 38 h_and = instr.hsetp2.cbuf_and_imm.h_and;
39 ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex)); 39 op_b = UnpackHalfImmediate(instr, true);
40 40 break;
41 const Node second_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0); 41 case OpCode::Id::HSETP2_R:
42 cond = instr.hsetp2.reg.cond;
43 h_and = instr.hsetp2.reg.h_and;
44 op_b =
45 UnpackHalfFloat(GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.reg.abs_b,
46 instr.hsetp2.reg.negate_b),
47 instr.hsetp2.reg.type_b);
48 break;
49 default:
50 UNREACHABLE();
51 op_b = Immediate(0);
52 }
42 53
43 const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); 54 const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op);
44 const OperationCode pair_combiner = 55 const Node pred39 = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred);
45 instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2;
46
47 const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, op_a, op_b);
48 const Node first_pred = Operation(pair_combiner, comparison);
49 56
50 // Set the primary predicate to the result of Predicate OP SecondPredicate 57 const auto Write = [&](u64 dest, Node src) {
51 const Node value = Operation(combiner, first_pred, second_pred); 58 SetPredicate(bb, dest, Operation(combiner, std::move(src), pred39));
52 SetPredicate(bb, instr.hsetp2.pred3, value); 59 };
53 60
54 if (instr.hsetp2.pred0 != static_cast<u64>(Pred::UnusedIndex)) { 61 const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b);
55 // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled 62 const u64 first = instr.hsetp2.pred0;
56 const Node negated_pred = Operation(OperationCode::LogicalNegate, first_pred); 63 const u64 second = instr.hsetp2.pred3;
57 SetPredicate(bb, instr.hsetp2.pred0, Operation(combiner, negated_pred, second_pred)); 64 if (h_and) {
65 const Node joined = Operation(OperationCode::LogicalAnd2, comparison);
66 Write(first, joined);
67 Write(second, Operation(OperationCode::LogicalNegate, joined));
68 } else {
69 Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0u)));
70 Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1u)));
58 } 71 }
59 72
60 return pc; 73 return pc;
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp
index c3bcf1ae9..5b44cb79c 100644
--- a/src/video_core/shader/decode/hfma2.cpp
+++ b/src/video_core/shader/decode/hfma2.cpp
@@ -22,9 +22,9 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
22 const auto opcode = OpCode::Decode(instr); 22 const auto opcode = OpCode::Decode(instr);
23 23
24 if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) { 24 if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) {
25 UNIMPLEMENTED_IF(instr.hfma2.rr.precision != HalfPrecision::None); 25 DEBUG_ASSERT(instr.hfma2.rr.precision == HalfPrecision::None);
26 } else { 26 } else {
27 UNIMPLEMENTED_IF(instr.hfma2.precision != HalfPrecision::None); 27 DEBUG_ASSERT(instr.hfma2.precision == HalfPrecision::None);
28 } 28 }
29 29
30 constexpr auto identity = HalfType::H0_H1; 30 constexpr auto identity = HalfType::H0_H1;
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index 24f022cc0..77151a24b 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -95,12 +95,8 @@ const Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::Image
95const Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, 95const Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg,
96 Tegra::Shader::ImageType type) { 96 Tegra::Shader::ImageType type) {
97 const Node image_register{GetRegister(reg)}; 97 const Node image_register{GetRegister(reg)};
98 const Node base_image{ 98 const auto [base_image, cbuf_index, cbuf_offset]{
99 TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))}; 99 TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))};
100 const auto cbuf{std::get_if<CbufNode>(&*base_image)};
101 const auto cbuf_offset_imm{std::get_if<ImmediateNode>(&*cbuf->GetOffset())};
102 const auto cbuf_offset{cbuf_offset_imm->GetValue()};
103 const auto cbuf_index{cbuf->GetIndex()};
104 const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)}; 100 const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)};
105 101
106 // If this image has already been used, return the existing mapping. 102 // If this image has already been used, return the existing mapping.
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 80fc0ccfc..ed108bea8 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -95,10 +95,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
95 const Node op_b = 95 const Node op_b =
96 GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index); 96 GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index);
97 97
98 SetTemporal(bb, 0, op_a); 98 SetTemporary(bb, 0, op_a);
99 SetTemporal(bb, 1, op_b); 99 SetTemporary(bb, 1, op_b);
100 SetRegister(bb, instr.gpr0, GetTemporal(0)); 100 SetRegister(bb, instr.gpr0, GetTemporary(0));
101 SetRegister(bb, instr.gpr0.Value() + 1, GetTemporal(1)); 101 SetRegister(bb, instr.gpr0.Value() + 1, GetTemporary(1));
102 break; 102 break;
103 } 103 }
104 default: 104 default:
@@ -136,9 +136,9 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
136 } 136 }
137 }(); 137 }();
138 for (u32 i = 0; i < count; ++i) 138 for (u32 i = 0; i < count; ++i)
139 SetTemporal(bb, i, GetLmem(i * 4)); 139 SetTemporary(bb, i, GetLmem(i * 4));
140 for (u32 i = 0; i < count; ++i) 140 for (u32 i = 0; i < count; ++i)
141 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); 141 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
142 break; 142 break;
143 } 143 }
144 default: 144 default:
@@ -172,10 +172,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
172 Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); 172 Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
173 const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); 173 const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
174 174
175 SetTemporal(bb, i, gmem); 175 SetTemporary(bb, i, gmem);
176 } 176 }
177 for (u32 i = 0; i < count; ++i) { 177 for (u32 i = 0; i < count; ++i) {
178 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); 178 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
179 } 179 }
180 break; 180 break;
181 } 181 }
@@ -253,11 +253,11 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
253 TrackAndGetGlobalMemory(bb, instr, true); 253 TrackAndGetGlobalMemory(bb, instr, true);
254 254
255 // Encode in temporary registers like this: real_base_address, {registers_to_be_written...} 255 // Encode in temporary registers like this: real_base_address, {registers_to_be_written...}
256 SetTemporal(bb, 0, real_address_base); 256 SetTemporary(bb, 0, real_address_base);
257 257
258 const u32 count = GetUniformTypeElementsCount(type); 258 const u32 count = GetUniformTypeElementsCount(type);
259 for (u32 i = 0; i < count; ++i) { 259 for (u32 i = 0; i < count; ++i) {
260 SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i)); 260 SetTemporary(bb, i + 1, GetRegister(instr.gpr0.Value() + i));
261 } 261 }
262 for (u32 i = 0; i < count; ++i) { 262 for (u32 i = 0; i < count; ++i) {
263 const Node it_offset = Immediate(i * 4); 263 const Node it_offset = Immediate(i * 4);
@@ -265,7 +265,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
265 Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); 265 Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
266 const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); 266 const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
267 267
268 bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1))); 268 bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporary(i + 1)));
269 } 269 }
270 break; 270 break;
271 } 271 }
@@ -297,18 +297,13 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeB
297 const auto addr_register{GetRegister(instr.gmem.gpr)}; 297 const auto addr_register{GetRegister(instr.gmem.gpr)};
298 const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; 298 const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
299 299
300 const Node base_address{ 300 const auto [base_address, index, offset] =
301 TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))}; 301 TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
302 const auto cbuf = std::get_if<CbufNode>(&*base_address); 302 ASSERT(base_address != nullptr);
303 ASSERT(cbuf != nullptr);
304 const auto cbuf_offset_imm = std::get_if<ImmediateNode>(&*cbuf->GetOffset());
305 ASSERT(cbuf_offset_imm != nullptr);
306 const auto cbuf_offset = cbuf_offset_imm->GetValue();
307 303
308 bb.push_back( 304 bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset)));
309 Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset)));
310 305
311 const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset}; 306 const GlobalMemoryBase descriptor{index, offset};
312 const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor); 307 const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor);
313 auto& usage = entry->second; 308 auto& usage = entry->second;
314 if (is_write) { 309 if (is_write) {
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index d46a8ab82..c0f64d7a0 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -91,11 +91,46 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
91 break; 91 break;
92 } 92 }
93 case OpCode::Id::BRA: { 93 case OpCode::Id::BRA: {
94 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, 94 Node branch;
95 "BRA with constant buffers are not implemented"); 95 if (instr.bra.constant_buffer == 0) {
96 const u32 target = pc + instr.bra.GetBranchTarget();
97 branch = Operation(OperationCode::Branch, Immediate(target));
98 } else {
99 const u32 target = pc + 1;
100 const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset());
101 const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
102 PRECISE, op_a, Immediate(3));
103 const Node operand =
104 Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
105 branch = Operation(OperationCode::BranchIndirect, operand);
106 }
96 107
97 const u32 target = pc + instr.bra.GetBranchTarget(); 108 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
98 const Node branch = Operation(OperationCode::Branch, Immediate(target)); 109 if (cc != Tegra::Shader::ConditionCode::T) {
110 bb.push_back(Conditional(GetConditionCode(cc), {branch}));
111 } else {
112 bb.push_back(branch);
113 }
114 break;
115 }
116 case OpCode::Id::BRX: {
117 Node operand;
118 if (instr.brx.constant_buffer != 0) {
119 const s32 target = pc + 1;
120 const Node index = GetRegister(instr.gpr8);
121 const Node op_a =
122 GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index);
123 const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
124 PRECISE, op_a, Immediate(3));
125 operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
126 } else {
127 const s32 target = pc + instr.brx.GetBranchExtend();
128 const Node op_a = GetRegister(instr.gpr8);
129 const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
130 PRECISE, op_a, Immediate(3));
131 operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
132 }
133 const Node branch = Operation(OperationCode::BranchIndirect, operand);
99 134
100 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; 135 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
101 if (cc != Tegra::Shader::ConditionCode::T) { 136 if (cc != Tegra::Shader::ConditionCode::T) {
@@ -109,6 +144,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
109 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, 144 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
110 "Constant buffer flow is not supported"); 145 "Constant buffer flow is not supported");
111 146
147 if (disable_flow_stack) {
148 break;
149 }
150
112 // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC. 151 // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC.
113 const u32 target = pc + instr.bra.GetBranchTarget(); 152 const u32 target = pc + instr.bra.GetBranchTarget();
114 bb.push_back( 153 bb.push_back(
@@ -119,6 +158,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
119 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, 158 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
120 "Constant buffer PBK is not supported"); 159 "Constant buffer PBK is not supported");
121 160
161 if (disable_flow_stack) {
162 break;
163 }
164
122 // PBK pushes to a stack the address where BRK will jump to. 165 // PBK pushes to a stack the address where BRK will jump to.
123 const u32 target = pc + instr.bra.GetBranchTarget(); 166 const u32 target = pc + instr.bra.GetBranchTarget();
124 bb.push_back( 167 bb.push_back(
@@ -130,6 +173,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
130 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}", 173 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}",
131 static_cast<u32>(cc)); 174 static_cast<u32>(cc));
132 175
176 if (disable_flow_stack) {
177 break;
178 }
179
133 // The SYNC opcode jumps to the address previously set by the SSY opcode 180 // The SYNC opcode jumps to the address previously set by the SSY opcode
134 bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy)); 181 bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy));
135 break; 182 break;
@@ -138,6 +185,9 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
138 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; 185 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
139 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}", 186 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}",
140 static_cast<u32>(cc)); 187 static_cast<u32>(cc));
188 if (disable_flow_stack) {
189 break;
190 }
141 191
142 // The BRK opcode jumps to the address previously set by the PBK opcode 192 // The BRK opcode jumps to the address previously set by the PBK opcode
143 bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk)); 193 bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk));
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index cb480be9b..0b934a069 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -181,10 +181,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
181 const Node value = 181 const Node value =
182 Operation(OperationCode::TextureQueryDimensions, meta, 182 Operation(OperationCode::TextureQueryDimensions, meta,
183 GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); 183 GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0)));
184 SetTemporal(bb, indexer++, value); 184 SetTemporary(bb, indexer++, value);
185 } 185 }
186 for (u32 i = 0; i < indexer; ++i) { 186 for (u32 i = 0; i < indexer; ++i) {
187 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); 187 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
188 } 188 }
189 break; 189 break;
190 } 190 }
@@ -238,10 +238,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
238 auto params = coords; 238 auto params = coords;
239 MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; 239 MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
240 const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); 240 const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
241 SetTemporal(bb, indexer++, value); 241 SetTemporary(bb, indexer++, value);
242 } 242 }
243 for (u32 i = 0; i < indexer; ++i) { 243 for (u32 i = 0; i < indexer; ++i) {
244 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); 244 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
245 } 245 }
246 break; 246 break;
247 } 247 }
@@ -269,7 +269,13 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
269 LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete"); 269 LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete");
270 } 270 }
271 271
272 WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array)); 272 const Node4 components = GetTldsCode(instr, texture_type, is_array);
273
274 if (instr.tlds.fp32_flag) {
275 WriteTexsInstructionFloat(bb, instr, components);
276 } else {
277 WriteTexsInstructionHalfFloat(bb, instr, components);
278 }
273 break; 279 break;
274 } 280 }
275 default: 281 default:
@@ -302,13 +308,9 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu
302const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type, 308const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type,
303 bool is_array, bool is_shadow) { 309 bool is_array, bool is_shadow) {
304 const Node sampler_register = GetRegister(reg); 310 const Node sampler_register = GetRegister(reg);
305 const Node base_sampler = 311 const auto [base_sampler, cbuf_index, cbuf_offset] =
306 TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); 312 TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size()));
307 const auto cbuf = std::get_if<CbufNode>(&*base_sampler); 313 ASSERT(base_sampler != nullptr);
308 const auto cbuf_offset_imm = std::get_if<ImmediateNode>(&*cbuf->GetOffset());
309 ASSERT(cbuf_offset_imm != nullptr);
310 const auto cbuf_offset = cbuf_offset_imm->GetValue();
311 const auto cbuf_index = cbuf->GetIndex();
312 const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset); 314 const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset);
313 315
314 // If this sampler has already been used, return the existing mapping. 316 // If this sampler has already been used, return the existing mapping.
@@ -334,11 +336,11 @@ void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const
334 // Skip disabled components 336 // Skip disabled components
335 continue; 337 continue;
336 } 338 }
337 SetTemporal(bb, dest_elem++, components[elem]); 339 SetTemporary(bb, dest_elem++, components[elem]);
338 } 340 }
339 // After writing values in temporals, move them to the real registers 341 // After writing values in temporals, move them to the real registers
340 for (u32 i = 0; i < dest_elem; ++i) { 342 for (u32 i = 0; i < dest_elem; ++i) {
341 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); 343 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
342 } 344 }
343} 345}
344 346
@@ -351,17 +353,17 @@ void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
351 for (u32 component = 0; component < 4; ++component) { 353 for (u32 component = 0; component < 4; ++component) {
352 if (!instr.texs.IsComponentEnabled(component)) 354 if (!instr.texs.IsComponentEnabled(component))
353 continue; 355 continue;
354 SetTemporal(bb, dest_elem++, components[component]); 356 SetTemporary(bb, dest_elem++, components[component]);
355 } 357 }
356 358
357 for (u32 i = 0; i < dest_elem; ++i) { 359 for (u32 i = 0; i < dest_elem; ++i) {
358 if (i < 2) { 360 if (i < 2) {
359 // Write the first two swizzle components to gpr0 and gpr0+1 361 // Write the first two swizzle components to gpr0 and gpr0+1
360 SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i)); 362 SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporary(i));
361 } else { 363 } else {
362 ASSERT(instr.texs.HasTwoDestinations()); 364 ASSERT(instr.texs.HasTwoDestinations());
363 // Write the rest of the swizzle components to gpr28 and gpr28+1 365 // Write the rest of the swizzle components to gpr28 and gpr28+1
364 SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i)); 366 SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporary(i));
365 } 367 }
366 } 368 }
367} 369}
@@ -389,11 +391,11 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
389 return; 391 return;
390 } 392 }
391 393
392 SetTemporal(bb, 0, first_value); 394 SetTemporary(bb, 0, first_value);
393 SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); 395 SetTemporary(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
394 396
395 SetRegister(bb, instr.gpr0, GetTemporal(0)); 397 SetRegister(bb, instr.gpr0, GetTemporary(0));
396 SetRegister(bb, instr.gpr28, GetTemporal(1)); 398 SetRegister(bb, instr.gpr28, GetTemporary(1));
397} 399}
398 400
399Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, 401Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
index 93dee77d1..206961909 100644
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -73,8 +73,8 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
73 if (is_psl) { 73 if (is_psl) {
74 product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16)); 74 product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16));
75 } 75 }
76 SetTemporal(bb, 0, product); 76 SetTemporary(bb, 0, product);
77 product = GetTemporal(0); 77 product = GetTemporary(0);
78 78
79 const Node original_c = op_c; 79 const Node original_c = op_c;
80 const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error 80 const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error
@@ -98,13 +98,13 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
98 } 98 }
99 }(); 99 }();
100 100
101 SetTemporal(bb, 1, op_c); 101 SetTemporary(bb, 1, op_c);
102 op_c = GetTemporal(1); 102 op_c = GetTemporary(1);
103 103
104 // TODO(Rodrigo): Use an appropiate sign for this operation 104 // TODO(Rodrigo): Use an appropiate sign for this operation
105 Node sum = Operation(OperationCode::IAdd, product, op_c); 105 Node sum = Operation(OperationCode::IAdd, product, op_c);
106 SetTemporal(bb, 2, sum); 106 SetTemporary(bb, 2, sum);
107 sum = GetTemporal(2); 107 sum = GetTemporary(2);
108 if (is_merge) { 108 if (is_merge) {
109 const Node a = BitfieldExtract(sum, 0, 16); 109 const Node a = BitfieldExtract(sum, 0, 16);
110 const Node b = 110 const Node b =
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 0ac83fcf0..715184d67 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -101,8 +101,7 @@ enum class OperationCode {
101 LogicalXor, /// (bool a, bool b) -> bool 101 LogicalXor, /// (bool a, bool b) -> bool
102 LogicalNegate, /// (bool a) -> bool 102 LogicalNegate, /// (bool a) -> bool
103 LogicalPick2, /// (bool2 pair, uint index) -> bool 103 LogicalPick2, /// (bool2 pair, uint index) -> bool
104 LogicalAll2, /// (bool2 a) -> bool 104 LogicalAnd2, /// (bool2 a) -> bool
105 LogicalAny2, /// (bool2 a) -> bool
106 105
107 LogicalFLessThan, /// (float a, float b) -> bool 106 LogicalFLessThan, /// (float a, float b) -> bool
108 LogicalFEqual, /// (float a, float b) -> bool 107 LogicalFEqual, /// (float a, float b) -> bool
@@ -148,11 +147,12 @@ enum class OperationCode {
148 147
149 ImageStore, /// (MetaImage, float[N] coords) -> void 148 ImageStore, /// (MetaImage, float[N] coords) -> void
150 149
151 Branch, /// (uint branch_target) -> void 150 Branch, /// (uint branch_target) -> void
152 PushFlowStack, /// (uint branch_target) -> void 151 BranchIndirect, /// (uint branch_target) -> void
153 PopFlowStack, /// () -> void 152 PushFlowStack, /// (uint branch_target) -> void
154 Exit, /// () -> void 153 PopFlowStack, /// () -> void
155 Discard, /// () -> void 154 Exit, /// () -> void
155 Discard, /// () -> void
156 156
157 EmitVertex, /// () -> void 157 EmitVertex, /// () -> void
158 EndPrimitive, /// () -> void 158 EndPrimitive, /// () -> void
diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp
index 6fccbbba3..b3dcd291c 100644
--- a/src/video_core/shader/node_helper.cpp
+++ b/src/video_core/shader/node_helper.cpp
@@ -12,7 +12,7 @@
12namespace VideoCommon::Shader { 12namespace VideoCommon::Shader {
13 13
14Node Conditional(Node condition, std::vector<Node> code) { 14Node Conditional(Node condition, std::vector<Node> code) {
15 return MakeNode<ConditionalNode>(condition, std::move(code)); 15 return MakeNode<ConditionalNode>(std::move(condition), std::move(code));
16} 16}
17 17
18Node Comment(std::string text) { 18Node Comment(std::string text) {
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 11b545cca..5e91fe129 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -22,8 +22,8 @@ using Tegra::Shader::PredCondition;
22using Tegra::Shader::PredOperation; 22using Tegra::Shader::PredOperation;
23using Tegra::Shader::Register; 23using Tegra::Shader::Register;
24 24
25ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset) 25ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, const std::size_t size)
26 : program_code{program_code}, main_offset{main_offset} { 26 : program_code{program_code}, main_offset{main_offset}, program_size{size} {
27 Decode(); 27 Decode();
28} 28}
29 29
@@ -61,8 +61,17 @@ Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) {
61 const auto [entry, is_new] = used_cbufs.try_emplace(index); 61 const auto [entry, is_new] = used_cbufs.try_emplace(index);
62 entry->second.MarkAsUsedIndirect(); 62 entry->second.MarkAsUsedIndirect();
63 63
64 const Node final_offset = Operation(OperationCode::UAdd, NO_PRECISE, node, Immediate(offset)); 64 Node final_offset = [&] {
65 return MakeNode<CbufNode>(index, final_offset); 65 // Attempt to inline constant buffer without a variable offset. This is done to allow
66 // tracking LDC calls.
67 if (const auto gpr = std::get_if<GprNode>(&*node)) {
68 if (gpr->GetIndex() == Register::ZeroIndex) {
69 return Immediate(offset);
70 }
71 }
72 return Operation(OperationCode::UAdd, NO_PRECISE, std::move(node), Immediate(offset));
73 }();
74 return MakeNode<CbufNode>(index, std::move(final_offset));
66} 75}
67 76
68Node ShaderIR::GetPredicate(u64 pred_, bool negated) { 77Node ShaderIR::GetPredicate(u64 pred_, bool negated) {
@@ -80,7 +89,7 @@ Node ShaderIR::GetPredicate(bool immediate) {
80 89
81Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) { 90Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) {
82 used_input_attributes.emplace(index); 91 used_input_attributes.emplace(index);
83 return MakeNode<AbufNode>(index, static_cast<u32>(element), buffer); 92 return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
84} 93}
85 94
86Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) { 95Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) {
@@ -89,6 +98,22 @@ Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_addres
89} 98}
90 99
91Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) { 100Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) {
101 if (index == Attribute::Index::LayerViewportPointSize) {
102 switch (element) {
103 case 0:
104 UNIMPLEMENTED();
105 break;
106 case 1:
107 uses_layer = true;
108 break;
109 case 2:
110 uses_viewport_index = true;
111 break;
112 case 3:
113 uses_point_size = true;
114 break;
115 }
116 }
92 if (index == Attribute::Index::ClipDistances0123 || 117 if (index == Attribute::Index::ClipDistances0123 ||
93 index == Attribute::Index::ClipDistances4567) { 118 index == Attribute::Index::ClipDistances4567) {
94 const auto clip_index = 119 const auto clip_index =
@@ -97,7 +122,7 @@ Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buff
97 } 122 }
98 used_output_attributes.insert(index); 123 used_output_attributes.insert(index);
99 124
100 return MakeNode<AbufNode>(index, static_cast<u32>(element), buffer); 125 return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
101} 126}
102 127
103Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) { 128Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) {
@@ -109,19 +134,19 @@ Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) {
109} 134}
110 135
111Node ShaderIR::GetLocalMemory(Node address) { 136Node ShaderIR::GetLocalMemory(Node address) {
112 return MakeNode<LmemNode>(address); 137 return MakeNode<LmemNode>(std::move(address));
113} 138}
114 139
115Node ShaderIR::GetTemporal(u32 id) { 140Node ShaderIR::GetTemporary(u32 id) {
116 return GetRegister(Register::ZeroIndex + 1 + id); 141 return GetRegister(Register::ZeroIndex + 1 + id);
117} 142}
118 143
119Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) { 144Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) {
120 if (absolute) { 145 if (absolute) {
121 value = Operation(OperationCode::FAbsolute, NO_PRECISE, value); 146 value = Operation(OperationCode::FAbsolute, NO_PRECISE, std::move(value));
122 } 147 }
123 if (negate) { 148 if (negate) {
124 value = Operation(OperationCode::FNegate, NO_PRECISE, value); 149 value = Operation(OperationCode::FNegate, NO_PRECISE, std::move(value));
125 } 150 }
126 return value; 151 return value;
127} 152}
@@ -130,24 +155,26 @@ Node ShaderIR::GetSaturatedFloat(Node value, bool saturate) {
130 if (!saturate) { 155 if (!saturate) {
131 return value; 156 return value;
132 } 157 }
133 const Node positive_zero = Immediate(std::copysignf(0, 1)); 158
134 const Node positive_one = Immediate(1.0f); 159 Node positive_zero = Immediate(std::copysignf(0, 1));
135 return Operation(OperationCode::FClamp, NO_PRECISE, value, positive_zero, positive_one); 160 Node positive_one = Immediate(1.0f);
161 return Operation(OperationCode::FClamp, NO_PRECISE, std::move(value), std::move(positive_zero),
162 std::move(positive_one));
136} 163}
137 164
138Node ShaderIR::ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed) { 165Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signed) {
139 switch (size) { 166 switch (size) {
140 case Register::Size::Byte: 167 case Register::Size::Byte:
141 value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value, 168 value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE,
142 Immediate(24)); 169 std::move(value), Immediate(24));
143 value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value, 170 value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE,
144 Immediate(24)); 171 std::move(value), Immediate(24));
145 return value; 172 return value;
146 case Register::Size::Short: 173 case Register::Size::Short:
147 value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value, 174 value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE,
148 Immediate(16)); 175 std::move(value), Immediate(16));
149 value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value, 176 value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE,
150 Immediate(16)); 177 std::move(value), Immediate(16));
151 case Register::Size::Word: 178 case Register::Size::Word:
152 // Default - do nothing 179 // Default - do nothing
153 return value; 180 return value;
@@ -163,27 +190,29 @@ Node ShaderIR::GetOperandAbsNegInteger(Node value, bool absolute, bool negate, b
163 return value; 190 return value;
164 } 191 }
165 if (absolute) { 192 if (absolute) {
166 value = Operation(OperationCode::IAbsolute, NO_PRECISE, value); 193 value = Operation(OperationCode::IAbsolute, NO_PRECISE, std::move(value));
167 } 194 }
168 if (negate) { 195 if (negate) {
169 value = Operation(OperationCode::INegate, NO_PRECISE, value); 196 value = Operation(OperationCode::INegate, NO_PRECISE, std::move(value));
170 } 197 }
171 return value; 198 return value;
172} 199}
173 200
174Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) { 201Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) {
175 const Node value = Immediate(instr.half_imm.PackImmediates()); 202 Node value = Immediate(instr.half_imm.PackImmediates());
176 if (!has_negation) { 203 if (!has_negation) {
177 return value; 204 return value;
178 } 205 }
179 const Node first_negate = GetPredicate(instr.half_imm.first_negate != 0);
180 const Node second_negate = GetPredicate(instr.half_imm.second_negate != 0);
181 206
182 return Operation(OperationCode::HNegate, NO_PRECISE, value, first_negate, second_negate); 207 Node first_negate = GetPredicate(instr.half_imm.first_negate != 0);
208 Node second_negate = GetPredicate(instr.half_imm.second_negate != 0);
209
210 return Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), std::move(first_negate),
211 std::move(second_negate));
183} 212}
184 213
185Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) { 214Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) {
186 return Operation(OperationCode::HUnpack, type, value); 215 return Operation(OperationCode::HUnpack, type, std::move(value));
187} 216}
188 217
189Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { 218Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {
@@ -191,11 +220,11 @@ Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {
191 case Tegra::Shader::HalfMerge::H0_H1: 220 case Tegra::Shader::HalfMerge::H0_H1:
192 return src; 221 return src;
193 case Tegra::Shader::HalfMerge::F32: 222 case Tegra::Shader::HalfMerge::F32:
194 return Operation(OperationCode::HMergeF32, src); 223 return Operation(OperationCode::HMergeF32, std::move(src));
195 case Tegra::Shader::HalfMerge::Mrg_H0: 224 case Tegra::Shader::HalfMerge::Mrg_H0:
196 return Operation(OperationCode::HMergeH0, dest, src); 225 return Operation(OperationCode::HMergeH0, std::move(dest), std::move(src));
197 case Tegra::Shader::HalfMerge::Mrg_H1: 226 case Tegra::Shader::HalfMerge::Mrg_H1:
198 return Operation(OperationCode::HMergeH1, dest, src); 227 return Operation(OperationCode::HMergeH1, std::move(dest), std::move(src));
199 } 228 }
200 UNREACHABLE(); 229 UNREACHABLE();
201 return src; 230 return src;
@@ -203,10 +232,10 @@ Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {
203 232
204Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) { 233Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) {
205 if (absolute) { 234 if (absolute) {
206 value = Operation(OperationCode::HAbsolute, NO_PRECISE, value); 235 value = Operation(OperationCode::HAbsolute, NO_PRECISE, std::move(value));
207 } 236 }
208 if (negate) { 237 if (negate) {
209 value = Operation(OperationCode::HNegate, NO_PRECISE, value, GetPredicate(true), 238 value = Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), GetPredicate(true),
210 GetPredicate(true)); 239 GetPredicate(true));
211 } 240 }
212 return value; 241 return value;
@@ -216,9 +245,11 @@ Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) {
216 if (!saturate) { 245 if (!saturate) {
217 return value; 246 return value;
218 } 247 }
219 const Node positive_zero = Immediate(std::copysignf(0, 1)); 248
220 const Node positive_one = Immediate(1.0f); 249 Node positive_zero = Immediate(std::copysignf(0, 1));
221 return Operation(OperationCode::HClamp, NO_PRECISE, value, positive_zero, positive_one); 250 Node positive_one = Immediate(1.0f);
251 return Operation(OperationCode::HClamp, NO_PRECISE, std::move(value), std::move(positive_zero),
252 std::move(positive_one));
222} 253}
223 254
224Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { 255Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) {
@@ -246,7 +277,6 @@ Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, N
246 condition == PredCondition::LessEqualWithNan || 277 condition == PredCondition::LessEqualWithNan ||
247 condition == PredCondition::GreaterThanWithNan || 278 condition == PredCondition::GreaterThanWithNan ||
248 condition == PredCondition::GreaterEqualWithNan) { 279 condition == PredCondition::GreaterEqualWithNan) {
249
250 predicate = Operation(OperationCode::LogicalOr, predicate, 280 predicate = Operation(OperationCode::LogicalOr, predicate,
251 Operation(OperationCode::LogicalFIsNan, op_a)); 281 Operation(OperationCode::LogicalFIsNan, op_a));
252 predicate = Operation(OperationCode::LogicalOr, predicate, 282 predicate = Operation(OperationCode::LogicalOr, predicate,
@@ -275,7 +305,8 @@ Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_si
275 UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), 305 UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(),
276 "Unknown predicate comparison operation"); 306 "Unknown predicate comparison operation");
277 307
278 Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, op_a, op_b); 308 Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a),
309 std::move(op_b));
279 310
280 UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan || 311 UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan ||
281 condition == PredCondition::NotEqualWithNan || 312 condition == PredCondition::NotEqualWithNan ||
@@ -305,9 +336,7 @@ Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition
305 UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), 336 UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(),
306 "Unknown predicate comparison operation"); 337 "Unknown predicate comparison operation");
307 338
308 const Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b); 339 return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b));
309
310 return predicate;
311} 340}
312 341
313OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { 342OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) {
@@ -333,31 +362,32 @@ Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) {
333} 362}
334 363
335void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) { 364void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) {
336 bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), src)); 365 bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), std::move(src)));
337} 366}
338 367
339void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) { 368void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) {
340 bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), src)); 369 bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), std::move(src)));
341} 370}
342 371
343void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) { 372void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) {
344 bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), value)); 373 bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), std::move(value)));
345} 374}
346 375
347void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) { 376void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) {
348 bb.push_back(Operation(OperationCode::Assign, GetLocalMemory(address), value)); 377 bb.push_back(
378 Operation(OperationCode::Assign, GetLocalMemory(std::move(address)), std::move(value)));
349} 379}
350 380
351void ShaderIR::SetTemporal(NodeBlock& bb, u32 id, Node value) { 381void ShaderIR::SetTemporary(NodeBlock& bb, u32 id, Node value) {
352 SetRegister(bb, Register::ZeroIndex + 1 + id, value); 382 SetRegister(bb, Register::ZeroIndex + 1 + id, std::move(value));
353} 383}
354 384
355void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) { 385void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) {
356 if (!sets_cc) { 386 if (!sets_cc) {
357 return; 387 return;
358 } 388 }
359 const Node zerop = Operation(OperationCode::LogicalFEqual, value, Immediate(0.0f)); 389 Node zerop = Operation(OperationCode::LogicalFEqual, std::move(value), Immediate(0.0f));
360 SetInternalFlag(bb, InternalFlag::Zero, zerop); 390 SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop));
361 LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); 391 LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
362} 392}
363 393
@@ -365,14 +395,14 @@ void ShaderIR::SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_
365 if (!sets_cc) { 395 if (!sets_cc) {
366 return; 396 return;
367 } 397 }
368 const Node zerop = Operation(OperationCode::LogicalIEqual, value, Immediate(0)); 398 Node zerop = Operation(OperationCode::LogicalIEqual, std::move(value), Immediate(0));
369 SetInternalFlag(bb, InternalFlag::Zero, zerop); 399 SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop));
370 LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); 400 LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
371} 401}
372 402
373Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { 403Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) {
374 return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, value, Immediate(offset), 404 return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, std::move(value),
375 Immediate(bits)); 405 Immediate(offset), Immediate(bits));
376} 406}
377 407
378} // namespace VideoCommon::Shader 408} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index e22548208..59a083d90 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -5,13 +5,10 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <cstring>
9#include <map> 8#include <map>
10#include <optional> 9#include <optional>
11#include <set> 10#include <set>
12#include <string>
13#include <tuple> 11#include <tuple>
14#include <variant>
15#include <vector> 12#include <vector>
16 13
17#include "common/common_types.h" 14#include "common/common_types.h"
@@ -22,18 +19,12 @@
22 19
23namespace VideoCommon::Shader { 20namespace VideoCommon::Shader {
24 21
22struct ShaderBlock;
23
25using ProgramCode = std::vector<u64>; 24using ProgramCode = std::vector<u64>;
26 25
27constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; 26constexpr u32 MAX_PROGRAM_LENGTH = 0x1000;
28 27
29/// Describes the behaviour of code path of a given entry point and a return point.
30enum class ExitMethod {
31 Undetermined, ///< Internal value. Only occur when analyzing JMP loop.
32 AlwaysReturn, ///< All code paths reach the return point.
33 Conditional, ///< Code path reaches the return point or an END instruction conditionally.
34 AlwaysEnd, ///< All code paths reach a END instruction.
35};
36
37class ConstBuffer { 28class ConstBuffer {
38public: 29public:
39 explicit ConstBuffer(u32 max_offset, bool is_indirect) 30 explicit ConstBuffer(u32 max_offset, bool is_indirect)
@@ -73,7 +64,7 @@ struct GlobalMemoryUsage {
73 64
74class ShaderIR final { 65class ShaderIR final {
75public: 66public:
76 explicit ShaderIR(const ProgramCode& program_code, u32 main_offset); 67 explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, std::size_t size);
77 ~ShaderIR(); 68 ~ShaderIR();
78 69
79 const std::map<u32, NodeBlock>& GetBasicBlocks() const { 70 const std::map<u32, NodeBlock>& GetBasicBlocks() const {
@@ -121,6 +112,18 @@ public:
121 return static_cast<std::size_t>(coverage_end * sizeof(u64)); 112 return static_cast<std::size_t>(coverage_end * sizeof(u64));
122 } 113 }
123 114
115 bool UsesLayer() const {
116 return uses_layer;
117 }
118
119 bool UsesViewportIndex() const {
120 return uses_viewport_index;
121 }
122
123 bool UsesPointSize() const {
124 return uses_point_size;
125 }
126
124 bool HasPhysicalAttributes() const { 127 bool HasPhysicalAttributes() const {
125 return uses_physical_attributes; 128 return uses_physical_attributes;
126 } 129 }
@@ -129,12 +132,20 @@ public:
129 return header; 132 return header;
130 } 133 }
131 134
135 bool IsFlowStackDisabled() const {
136 return disable_flow_stack;
137 }
138
139 u32 ConvertAddressToNvidiaSpace(const u32 address) const {
140 return (address - main_offset) * sizeof(Tegra::Shader::Instruction);
141 }
142
132private: 143private:
133 void Decode(); 144 void Decode();
134 145
135 ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels);
136
137 NodeBlock DecodeRange(u32 begin, u32 end); 146 NodeBlock DecodeRange(u32 begin, u32 end);
147 void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end);
148 void InsertControlFlow(NodeBlock& bb, const ShaderBlock& block);
138 149
139 /** 150 /**
140 * Decodes a single instruction from Tegra to IR. 151 * Decodes a single instruction from Tegra to IR.
@@ -196,8 +207,8 @@ private:
196 Node GetInternalFlag(InternalFlag flag, bool negated = false); 207 Node GetInternalFlag(InternalFlag flag, bool negated = false);
197 /// Generates a node representing a local memory address 208 /// Generates a node representing a local memory address
198 Node GetLocalMemory(Node address); 209 Node GetLocalMemory(Node address);
199 /// Generates a temporal, internally it uses a post-RZ register 210 /// Generates a temporary, internally it uses a post-RZ register
200 Node GetTemporal(u32 id); 211 Node GetTemporary(u32 id);
201 212
202 /// Sets a register. src value must be a number-evaluated node. 213 /// Sets a register. src value must be a number-evaluated node.
203 void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src); 214 void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src);
@@ -207,8 +218,8 @@ private:
207 void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value); 218 void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value);
208 /// Sets a local memory address. address and value must be a number-evaluated node 219 /// Sets a local memory address. address and value must be a number-evaluated node
209 void SetLocalMemory(NodeBlock& bb, Node address, Node value); 220 void SetLocalMemory(NodeBlock& bb, Node address, Node value);
210 /// Sets a temporal. Internally it uses a post-RZ register 221 /// Sets a temporary. Internally it uses a post-RZ register
211 void SetTemporal(NodeBlock& bb, u32 id, Node value); 222 void SetTemporary(NodeBlock& bb, u32 id, Node value);
212 223
213 /// Sets internal flags from a float 224 /// Sets internal flags from a float
214 void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true); 225 void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true);
@@ -314,7 +325,7 @@ private:
314 void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, 325 void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b,
315 Node op_c, Node imm_lut, bool sets_cc); 326 Node op_c, Node imm_lut, bool sets_cc);
316 327
317 Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; 328 std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
318 329
319 std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; 330 std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;
320 331
@@ -326,10 +337,11 @@ private:
326 337
327 const ProgramCode& program_code; 338 const ProgramCode& program_code;
328 const u32 main_offset; 339 const u32 main_offset;
340 const std::size_t program_size;
341 bool disable_flow_stack{};
329 342
330 u32 coverage_begin{}; 343 u32 coverage_begin{};
331 u32 coverage_end{}; 344 u32 coverage_end{};
332 std::map<std::pair<u32, u32>, ExitMethod> exit_method_map;
333 345
334 std::map<u32, NodeBlock> basic_blocks; 346 std::map<u32, NodeBlock> basic_blocks;
335 NodeBlock global_code; 347 NodeBlock global_code;
@@ -343,6 +355,9 @@ private:
343 std::set<Image> used_images; 355 std::set<Image> used_images;
344 std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; 356 std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
345 std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; 357 std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory;
358 bool uses_layer{};
359 bool uses_viewport_index{};
360 bool uses_point_size{};
346 bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes 361 bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes
347 362
348 Tegra::Shader::Header header; 363 Tegra::Shader::Header header;
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index fc957d980..a53e02253 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -15,56 +15,63 @@ namespace {
15std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, 15std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
16 OperationCode operation_code) { 16 OperationCode operation_code) {
17 for (; cursor >= 0; --cursor) { 17 for (; cursor >= 0; --cursor) {
18 const Node node = code.at(cursor); 18 Node node = code.at(cursor);
19
19 if (const auto operation = std::get_if<OperationNode>(&*node)) { 20 if (const auto operation = std::get_if<OperationNode>(&*node)) {
20 if (operation->GetCode() == operation_code) { 21 if (operation->GetCode() == operation_code) {
21 return {node, cursor}; 22 return {std::move(node), cursor};
22 } 23 }
23 } 24 }
25
24 if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { 26 if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
25 const auto& conditional_code = conditional->GetCode(); 27 const auto& conditional_code = conditional->GetCode();
26 const auto [found, internal_cursor] = FindOperation( 28 auto [found, internal_cursor] = FindOperation(
27 conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code); 29 conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code);
28 if (found) { 30 if (found) {
29 return {found, cursor}; 31 return {std::move(found), cursor};
30 } 32 }
31 } 33 }
32 } 34 }
33 return {}; 35 return {};
34} 36}
35} // namespace 37} // Anonymous namespace
36 38
37Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const { 39std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code,
40 s64 cursor) const {
38 if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { 41 if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
39 // Cbuf found, but it has to be immediate 42 // Constant buffer found, test if it's an immediate
40 return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr; 43 const auto offset = cbuf->GetOffset();
44 if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
45 return {tracked, cbuf->GetIndex(), immediate->GetValue()};
46 }
47 return {};
41 } 48 }
42 if (const auto gpr = std::get_if<GprNode>(&*tracked)) { 49 if (const auto gpr = std::get_if<GprNode>(&*tracked)) {
43 if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { 50 if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
44 return nullptr; 51 return {};
45 } 52 }
46 // Reduce the cursor in one to avoid infinite loops when the instruction sets the same 53 // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
47 // register that it uses as operand 54 // register that it uses as operand
48 const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); 55 const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1);
49 if (!source) { 56 if (!source) {
50 return nullptr; 57 return {};
51 } 58 }
52 return TrackCbuf(source, code, new_cursor); 59 return TrackCbuf(source, code, new_cursor);
53 } 60 }
54 if (const auto operation = std::get_if<OperationNode>(&*tracked)) { 61 if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
55 for (std::size_t i = 0; i < operation->GetOperandsCount(); ++i) { 62 for (std::size_t i = 0; i < operation->GetOperandsCount(); ++i) {
56 if (const auto found = TrackCbuf((*operation)[i], code, cursor)) { 63 if (auto found = TrackCbuf((*operation)[i], code, cursor); std::get<0>(found)) {
57 // Cbuf found in operand 64 // Cbuf found in operand.
58 return found; 65 return found;
59 } 66 }
60 } 67 }
61 return nullptr; 68 return {};
62 } 69 }
63 if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) { 70 if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) {
64 const auto& conditional_code = conditional->GetCode(); 71 const auto& conditional_code = conditional->GetCode();
65 return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size())); 72 return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size()));
66 } 73 }
67 return nullptr; 74 return {};
68} 75}
69 76
70std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const { 77std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const {
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp
index 7a0fdb19b..6af9044ca 100644
--- a/src/video_core/texture_cache/surface_base.cpp
+++ b/src/video_core/texture_cache/surface_base.cpp
@@ -75,9 +75,12 @@ MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs)
75 75
76 // Linear Surface check 76 // Linear Surface check
77 if (!params.is_tiled) { 77 if (!params.is_tiled) {
78 if (std::tie(params.width, params.height, params.pitch) == 78 if (std::tie(params.height, params.pitch) == std::tie(rhs.height, rhs.pitch)) {
79 std::tie(rhs.width, rhs.height, rhs.pitch)) { 79 if (params.width == rhs.width) {
80 return MatchStructureResult::FullMatch; 80 return MatchStructureResult::FullMatch;
81 } else {
82 return MatchStructureResult::SemiMatch;
83 }
81 } 84 }
82 return MatchStructureResult::None; 85 return MatchStructureResult::None;
83 } 86 }
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h
index 8ba386a8a..bcce8d863 100644
--- a/src/video_core/texture_cache/surface_base.h
+++ b/src/video_core/texture_cache/surface_base.h
@@ -200,8 +200,9 @@ public:
200 modification_tick = tick; 200 modification_tick = tick;
201 } 201 }
202 202
203 void MarkAsRenderTarget(const bool is_target) { 203 void MarkAsRenderTarget(const bool is_target, const u32 index) {
204 this->is_target = is_target; 204 this->is_target = is_target;
205 this->index = index;
205 } 206 }
206 207
207 void MarkAsPicked(const bool is_picked) { 208 void MarkAsPicked(const bool is_picked) {
@@ -221,6 +222,10 @@ public:
221 return is_target; 222 return is_target;
222 } 223 }
223 224
225 u32 GetRenderTarget() const {
226 return index;
227 }
228
224 bool IsRegistered() const { 229 bool IsRegistered() const {
225 return is_registered; 230 return is_registered;
226 } 231 }
@@ -307,10 +312,13 @@ private:
307 return view; 312 return view;
308 } 313 }
309 314
315 static constexpr u32 NO_RT = 0xFFFFFFFF;
316
310 bool is_modified{}; 317 bool is_modified{};
311 bool is_target{}; 318 bool is_target{};
312 bool is_registered{}; 319 bool is_registered{};
313 bool is_picked{}; 320 bool is_picked{};
321 u32 index{NO_RT};
314 u64 modification_tick{}; 322 u64 modification_tick{};
315}; 323};
316 324
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
index 9c56e2b4f..fd5472451 100644
--- a/src/video_core/texture_cache/surface_params.cpp
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -290,12 +290,19 @@ std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) co
290 290
291std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, 291std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size,
292 bool uncompressed) const { 292 bool uncompressed) const {
293 const bool tiled{as_host_size ? false : is_tiled};
294 const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())}; 293 const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())};
295 const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())}; 294 const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())};
296 const u32 depth{is_layered ? 1U : GetMipDepth(level)}; 295 const u32 depth{is_layered ? 1U : GetMipDepth(level)};
297 return Tegra::Texture::CalculateSize(tiled, GetBytesPerPixel(), width, height, depth, 296 if (is_tiled) {
298 GetMipBlockHeight(level), GetMipBlockDepth(level)); 297 return Tegra::Texture::CalculateSize(!as_host_size, GetBytesPerPixel(), width, height,
298 depth, GetMipBlockHeight(level),
299 GetMipBlockDepth(level));
300 } else if (as_host_size || IsBuffer()) {
301 return GetBytesPerPixel() * width * height * depth;
302 } else {
303 // Linear Texture Case
304 return pitch * height * depth;
305 }
299} 306}
300 307
301bool SurfaceParams::operator==(const SurfaceParams& rhs) const { 308bool SurfaceParams::operator==(const SurfaceParams& rhs) const {
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index c9e72531a..a3a3770a7 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -116,10 +116,10 @@ public:
116 std::lock_guard lock{mutex}; 116 std::lock_guard lock{mutex};
117 auto& maxwell3d = system.GPU().Maxwell3D(); 117 auto& maxwell3d = system.GPU().Maxwell3D();
118 118
119 if (!maxwell3d.dirty_flags.zeta_buffer) { 119 if (!maxwell3d.dirty.depth_buffer) {
120 return depth_buffer.view; 120 return depth_buffer.view;
121 } 121 }
122 maxwell3d.dirty_flags.zeta_buffer = false; 122 maxwell3d.dirty.depth_buffer = false;
123 123
124 const auto& regs{maxwell3d.regs}; 124 const auto& regs{maxwell3d.regs};
125 const auto gpu_addr{regs.zeta.Address()}; 125 const auto gpu_addr{regs.zeta.Address()};
@@ -133,11 +133,11 @@ public:
133 regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; 133 regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)};
134 auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true); 134 auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true);
135 if (depth_buffer.target) 135 if (depth_buffer.target)
136 depth_buffer.target->MarkAsRenderTarget(false); 136 depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
137 depth_buffer.target = surface_view.first; 137 depth_buffer.target = surface_view.first;
138 depth_buffer.view = surface_view.second; 138 depth_buffer.view = surface_view.second;
139 if (depth_buffer.target) 139 if (depth_buffer.target)
140 depth_buffer.target->MarkAsRenderTarget(true); 140 depth_buffer.target->MarkAsRenderTarget(true, DEPTH_RT);
141 return surface_view.second; 141 return surface_view.second;
142 } 142 }
143 143
@@ -145,10 +145,10 @@ public:
145 std::lock_guard lock{mutex}; 145 std::lock_guard lock{mutex};
146 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); 146 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
147 auto& maxwell3d = system.GPU().Maxwell3D(); 147 auto& maxwell3d = system.GPU().Maxwell3D();
148 if (!maxwell3d.dirty_flags.color_buffer[index]) { 148 if (!maxwell3d.dirty.render_target[index]) {
149 return render_targets[index].view; 149 return render_targets[index].view;
150 } 150 }
151 maxwell3d.dirty_flags.color_buffer.reset(index); 151 maxwell3d.dirty.render_target[index] = false;
152 152
153 const auto& regs{maxwell3d.regs}; 153 const auto& regs{maxwell3d.regs};
154 if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || 154 if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 ||
@@ -167,11 +167,11 @@ public:
167 auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), 167 auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
168 preserve_contents, true); 168 preserve_contents, true);
169 if (render_targets[index].target) 169 if (render_targets[index].target)
170 render_targets[index].target->MarkAsRenderTarget(false); 170 render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
171 render_targets[index].target = surface_view.first; 171 render_targets[index].target = surface_view.first;
172 render_targets[index].view = surface_view.second; 172 render_targets[index].view = surface_view.second;
173 if (render_targets[index].target) 173 if (render_targets[index].target)
174 render_targets[index].target->MarkAsRenderTarget(true); 174 render_targets[index].target->MarkAsRenderTarget(true, static_cast<u32>(index));
175 return surface_view.second; 175 return surface_view.second;
176 } 176 }
177 177
@@ -191,7 +191,7 @@ public:
191 if (depth_buffer.target == nullptr) { 191 if (depth_buffer.target == nullptr) {
192 return; 192 return;
193 } 193 }
194 depth_buffer.target->MarkAsRenderTarget(false); 194 depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
195 depth_buffer.target = nullptr; 195 depth_buffer.target = nullptr;
196 depth_buffer.view = nullptr; 196 depth_buffer.view = nullptr;
197 } 197 }
@@ -200,7 +200,7 @@ public:
200 if (render_targets[index].target == nullptr) { 200 if (render_targets[index].target == nullptr) {
201 return; 201 return;
202 } 202 }
203 render_targets[index].target->MarkAsRenderTarget(false); 203 render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
204 render_targets[index].target = nullptr; 204 render_targets[index].target = nullptr;
205 render_targets[index].view = nullptr; 205 render_targets[index].view = nullptr;
206 } 206 }
@@ -270,6 +270,17 @@ protected:
270 // and reading it from a sepparate buffer. 270 // and reading it from a sepparate buffer.
271 virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; 271 virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0;
272 272
273 void ManageRenderTargetUnregister(TSurface& surface) {
274 auto& maxwell3d = system.GPU().Maxwell3D();
275 const u32 index = surface->GetRenderTarget();
276 if (index == DEPTH_RT) {
277 maxwell3d.dirty.depth_buffer = true;
278 } else {
279 maxwell3d.dirty.render_target[index] = true;
280 }
281 maxwell3d.dirty.render_settings = true;
282 }
283
273 void Register(TSurface surface) { 284 void Register(TSurface surface) {
274 const GPUVAddr gpu_addr = surface->GetGpuAddr(); 285 const GPUVAddr gpu_addr = surface->GetGpuAddr();
275 const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr)); 286 const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr));
@@ -294,6 +305,9 @@ protected:
294 if (guard_render_targets && surface->IsProtected()) { 305 if (guard_render_targets && surface->IsProtected()) {
295 return; 306 return;
296 } 307 }
308 if (!guard_render_targets && surface->IsRenderTarget()) {
309 ManageRenderTargetUnregister(surface);
310 }
297 const GPUVAddr gpu_addr = surface->GetGpuAddr(); 311 const GPUVAddr gpu_addr = surface->GetGpuAddr();
298 const CacheAddr cache_ptr = surface->GetCacheAddr(); 312 const CacheAddr cache_ptr = surface->GetCacheAddr();
299 const std::size_t size = surface->GetSizeInBytes(); 313 const std::size_t size = surface->GetSizeInBytes();
@@ -649,15 +663,6 @@ private:
649 } 663 }
650 return {current_surface, *view}; 664 return {current_surface, *view};
651 } 665 }
652 // The next case is unsafe, so if we r in accurate GPU, just skip it
653 if (Settings::values.use_accurate_gpu_emulation) {
654 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
655 MatchTopologyResult::FullMatch);
656 }
657 // This is the case the texture is a part of the parent.
658 if (current_surface->MatchesSubTexture(params, gpu_addr)) {
659 return RebuildSurface(current_surface, params, is_render);
660 }
661 } else { 666 } else {
662 // If there are many overlaps, odds are they are subtextures of the candidate 667 // If there are many overlaps, odds are they are subtextures of the candidate
663 // surface. We try to construct a new surface based on the candidate parameters, 668 // surface. We try to construct a new surface based on the candidate parameters,
@@ -793,6 +798,9 @@ private:
793 static constexpr u64 registry_page_size{1 << registry_page_bits}; 798 static constexpr u64 registry_page_size{1 << registry_page_bits};
794 std::unordered_map<CacheAddr, std::vector<TSurface>> registry; 799 std::unordered_map<CacheAddr, std::vector<TSurface>> registry;
795 800
801 static constexpr u32 DEPTH_RT = 8;
802 static constexpr u32 NO_RT = 0xFFFFFFFF;
803
796 // The L1 Cache is used for fast texture lookup before checking the overlaps 804 // The L1 Cache is used for fast texture lookup before checking the overlaps
797 // This avoids calculating size and other stuffs. 805 // This avoids calculating size and other stuffs.
798 std::unordered_map<CacheAddr, TSurface> l1_cache; 806 std::unordered_map<CacheAddr, TSurface> l1_cache;
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index 73978ff5b..b7f3fdf75 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -436,8 +436,6 @@ void Config::ReadControlValues() {
436void Config::ReadCoreValues() { 436void Config::ReadCoreValues() {
437 qt_config->beginGroup(QStringLiteral("Core")); 437 qt_config->beginGroup(QStringLiteral("Core"));
438 438
439 Settings::values.cpu_jit_enabled =
440 ReadSetting(QStringLiteral("cpu_jit_enabled"), true).toBool();
441 Settings::values.use_multi_core = ReadSetting(QStringLiteral("use_multi_core"), false).toBool(); 439 Settings::values.use_multi_core = ReadSetting(QStringLiteral("use_multi_core"), false).toBool();
442 440
443 qt_config->endGroup(); 441 qt_config->endGroup();
@@ -831,7 +829,6 @@ void Config::SaveControlValues() {
831void Config::SaveCoreValues() { 829void Config::SaveCoreValues() {
832 qt_config->beginGroup(QStringLiteral("Core")); 830 qt_config->beginGroup(QStringLiteral("Core"));
833 831
834 WriteSetting(QStringLiteral("cpu_jit_enabled"), Settings::values.cpu_jit_enabled, true);
835 WriteSetting(QStringLiteral("use_multi_core"), Settings::values.use_multi_core, false); 832 WriteSetting(QStringLiteral("use_multi_core"), Settings::values.use_multi_core, false);
836 833
837 qt_config->endGroup(); 834 qt_config->endGroup();
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 30b22341b..067d58d80 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -340,7 +340,6 @@ void Config::ReadValues() {
340 } 340 }
341 341
342 // Core 342 // Core
343 Settings::values.cpu_jit_enabled = sdl2_config->GetBoolean("Core", "cpu_jit_enabled", true);
344 Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false); 343 Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false);
345 344
346 // Renderer 345 // Renderer
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index 4f1add434..0cfc111a6 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -76,10 +76,6 @@ motion_device=
76touch_device= 76touch_device=
77 77
78[Core] 78[Core]
79# Whether to use the Just-In-Time (JIT) compiler for CPU emulation
80# 0: Interpreter (slow), 1 (default): JIT (fast)
81cpu_jit_enabled =
82
83# Whether to use multi-core for CPU emulation 79# Whether to use multi-core for CPU emulation
84# 0 (default): Disabled, 1: Enabled 80# 0 (default): Disabled, 1: Enabled
85use_multi_core= 81use_multi_core=
diff --git a/src/yuzu_tester/config.cpp b/src/yuzu_tester/config.cpp
index b96b7d279..9a11dc6c3 100644
--- a/src/yuzu_tester/config.cpp
+++ b/src/yuzu_tester/config.cpp
@@ -114,7 +114,6 @@ void Config::ReadValues() {
114 } 114 }
115 115
116 // Core 116 // Core
117 Settings::values.cpu_jit_enabled = sdl2_config->GetBoolean("Core", "cpu_jit_enabled", true);
118 Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false); 117 Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false);
119 118
120 // Renderer 119 // Renderer
diff --git a/src/yuzu_tester/default_ini.h b/src/yuzu_tester/default_ini.h
index 0f880d8c7..9a3e86d68 100644
--- a/src/yuzu_tester/default_ini.h
+++ b/src/yuzu_tester/default_ini.h
@@ -8,10 +8,6 @@ namespace DefaultINI {
8 8
9const char* sdl2_config_file = R"( 9const char* sdl2_config_file = R"(
10[Core] 10[Core]
11# Whether to use the Just-In-Time (JIT) compiler for CPU emulation
12# 0: Interpreter (slow), 1 (default): JIT (fast)
13cpu_jit_enabled =
14
15# Whether to use multi-core for CPU emulation 11# Whether to use multi-core for CPU emulation
16# 0 (default): Disabled, 1: Enabled 12# 0 (default): Disabled, 1: Enabled
17use_multi_core= 13use_multi_core=