summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/common/x64/native_clock.cpp49
-rw-r--r--src/core/arm/arm_interface.h3
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_32.cpp37
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_32.h4
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_64.cpp38
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_64.h4
-rw-r--r--src/core/hle/kernel/physical_core.cpp1
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp25
-rw-r--r--src/shader_recompiler/backend/glsl/glsl_emit_context.cpp19
-rw-r--r--src/shader_recompiler/backend/glsl/glsl_emit_context.h1
-rw-r--r--src/shader_recompiler/backend/spirv/spirv_emit_context.cpp21
-rw-r--r--src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp9
-rw-r--r--src/shader_recompiler/shader_info.h1
-rw-r--r--src/video_core/host_shaders/CMakeLists.txt1
-rw-r--r--src/video_core/host_shaders/opengl_convert_s8d24.comp18
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp13
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h3
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp7
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h2
-rw-r--r--src/video_core/renderer_opengl/util_shaders.cpp24
-rw-r--r--src/video_core/renderer_opengl/util_shaders.h3
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp5
23 files changed, 199 insertions, 91 deletions
diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp
index 7a3f21dcf..7fd9d22f8 100644
--- a/src/common/x64/native_clock.cpp
+++ b/src/common/x64/native_clock.cpp
@@ -10,25 +10,49 @@
10#include "common/uint128.h" 10#include "common/uint128.h"
11#include "common/x64/native_clock.h" 11#include "common/x64/native_clock.h"
12 12
13#ifdef _MSC_VER
14#include <intrin.h>
15#endif
16
13namespace Common { 17namespace Common {
14 18
19#ifdef _MSC_VER
20__forceinline static u64 FencedRDTSC() {
21 _mm_lfence();
22 _ReadWriteBarrier();
23 const u64 result = __rdtsc();
24 _mm_lfence();
25 _ReadWriteBarrier();
26 return result;
27}
28#else
29static u64 FencedRDTSC() {
30 u64 result;
31 asm volatile("lfence\n\t"
32 "rdtsc\n\t"
33 "shl $32, %%rdx\n\t"
34 "or %%rdx, %0\n\t"
35 "lfence"
36 : "=a"(result)
37 :
38 : "rdx", "memory", "cc");
39 return result;
40}
41#endif
42
15u64 EstimateRDTSCFrequency() { 43u64 EstimateRDTSCFrequency() {
16 // Discard the first result measuring the rdtsc. 44 // Discard the first result measuring the rdtsc.
17 _mm_mfence(); 45 FencedRDTSC();
18 __rdtsc();
19 std::this_thread::sleep_for(std::chrono::milliseconds{1}); 46 std::this_thread::sleep_for(std::chrono::milliseconds{1});
20 _mm_mfence(); 47 FencedRDTSC();
21 __rdtsc();
22 48
23 // Get the current time. 49 // Get the current time.
24 const auto start_time = std::chrono::steady_clock::now(); 50 const auto start_time = std::chrono::steady_clock::now();
25 _mm_mfence(); 51 const u64 tsc_start = FencedRDTSC();
26 const u64 tsc_start = __rdtsc();
27 // Wait for 200 milliseconds. 52 // Wait for 200 milliseconds.
28 std::this_thread::sleep_for(std::chrono::milliseconds{200}); 53 std::this_thread::sleep_for(std::chrono::milliseconds{200});
29 const auto end_time = std::chrono::steady_clock::now(); 54 const auto end_time = std::chrono::steady_clock::now();
30 _mm_mfence(); 55 const u64 tsc_end = FencedRDTSC();
31 const u64 tsc_end = __rdtsc();
32 // Calculate differences. 56 // Calculate differences.
33 const u64 timer_diff = static_cast<u64>( 57 const u64 timer_diff = static_cast<u64>(
34 std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count()); 58 std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count());
@@ -42,8 +66,7 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen
42 u64 rtsc_frequency_) 66 u64 rtsc_frequency_)
43 : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{ 67 : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{
44 rtsc_frequency_} { 68 rtsc_frequency_} {
45 _mm_mfence(); 69 time_point.inner.last_measure = FencedRDTSC();
46 time_point.inner.last_measure = __rdtsc();
47 time_point.inner.accumulated_ticks = 0U; 70 time_point.inner.accumulated_ticks = 0U;
48 ns_rtsc_factor = GetFixedPoint64Factor(NS_RATIO, rtsc_frequency); 71 ns_rtsc_factor = GetFixedPoint64Factor(NS_RATIO, rtsc_frequency);
49 us_rtsc_factor = GetFixedPoint64Factor(US_RATIO, rtsc_frequency); 72 us_rtsc_factor = GetFixedPoint64Factor(US_RATIO, rtsc_frequency);
@@ -58,8 +81,7 @@ u64 NativeClock::GetRTSC() {
58 81
59 current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); 82 current_time_point.pack = Common::AtomicLoad128(time_point.pack.data());
60 do { 83 do {
61 _mm_mfence(); 84 const u64 current_measure = FencedRDTSC();
62 const u64 current_measure = __rdtsc();
63 u64 diff = current_measure - current_time_point.inner.last_measure; 85 u64 diff = current_measure - current_time_point.inner.last_measure;
64 diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0) 86 diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0)
65 new_time_point.inner.last_measure = current_measure > current_time_point.inner.last_measure 87 new_time_point.inner.last_measure = current_measure > current_time_point.inner.last_measure
@@ -80,8 +102,7 @@ void NativeClock::Pause(bool is_paused) {
80 current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); 102 current_time_point.pack = Common::AtomicLoad128(time_point.pack.data());
81 do { 103 do {
82 new_time_point.pack = current_time_point.pack; 104 new_time_point.pack = current_time_point.pack;
83 _mm_mfence(); 105 new_time_point.inner.last_measure = FencedRDTSC();
84 new_time_point.inner.last_measure = __rdtsc();
85 } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, 106 } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack,
86 current_time_point.pack, current_time_point.pack)); 107 current_time_point.pack, current_time_point.pack));
87 } 108 }
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index c60322442..dce2f4195 100644
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -171,6 +171,9 @@ public:
171 /// Prepare core for thread reschedule (if needed to correctly handle state) 171 /// Prepare core for thread reschedule (if needed to correctly handle state)
172 virtual void PrepareReschedule() = 0; 172 virtual void PrepareReschedule() = 0;
173 173
174 /// Signal an interrupt and ask the core to halt as soon as possible.
175 virtual void SignalInterrupt() = 0;
176
174 struct BacktraceEntry { 177 struct BacktraceEntry {
175 std::string module; 178 std::string module;
176 u64 address; 179 u64 address;
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
index 054572445..ab3210d84 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -25,6 +25,9 @@ namespace Core {
25 25
26using namespace Common::Literals; 26using namespace Common::Literals;
27 27
28constexpr Dynarmic::HaltReason break_loop = Dynarmic::HaltReason::UserDefined2;
29constexpr Dynarmic::HaltReason svc_call = Dynarmic::HaltReason::UserDefined3;
30
28class DynarmicCallbacks32 : public Dynarmic::A32::UserCallbacks { 31class DynarmicCallbacks32 : public Dynarmic::A32::UserCallbacks {
29public: 32public:
30 explicit DynarmicCallbacks32(ARM_Dynarmic_32& parent_) 33 explicit DynarmicCallbacks32(ARM_Dynarmic_32& parent_)
@@ -84,15 +87,13 @@ public:
84 } 87 }
85 88
86 void CallSVC(u32 swi) override { 89 void CallSVC(u32 swi) override {
87 parent.svc_called = true;
88 parent.svc_swi = swi; 90 parent.svc_swi = swi;
89 parent.jit->HaltExecution(); 91 parent.jit->HaltExecution(svc_call);
90 } 92 }
91 93
92 void AddTicks(u64 ticks) override { 94 void AddTicks(u64 ticks) override {
93 if (parent.uses_wall_clock) { 95 ASSERT_MSG(!parent.uses_wall_clock, "This should never happen - dynarmic ticking disabled");
94 return; 96
95 }
96 // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a 97 // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a
97 // rough approximation of the amount of executed ticks in the system, it may be thrown off 98 // rough approximation of the amount of executed ticks in the system, it may be thrown off
98 // if not all cores are doing a similar amount of work. Instead of doing this, we should 99 // if not all cores are doing a similar amount of work. Instead of doing this, we should
@@ -108,12 +109,8 @@ public:
108 } 109 }
109 110
110 u64 GetTicksRemaining() override { 111 u64 GetTicksRemaining() override {
111 if (parent.uses_wall_clock) { 112 ASSERT_MSG(!parent.uses_wall_clock, "This should never happen - dynarmic ticking disabled");
112 if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) { 113
113 return minimum_run_cycles;
114 }
115 return 0U;
116 }
117 return std::max<s64>(parent.system.CoreTiming().GetDowncount(), 0); 114 return std::max<s64>(parent.system.CoreTiming().GetDowncount(), 0);
118 } 115 }
119 116
@@ -148,6 +145,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
148 145
149 // Timing 146 // Timing
150 config.wall_clock_cntpct = uses_wall_clock; 147 config.wall_clock_cntpct = uses_wall_clock;
148 config.enable_cycle_counting = !uses_wall_clock;
151 149
152 // Code cache size 150 // Code cache size
153 config.code_cache_size = 512_MiB; 151 config.code_cache_size = 512_MiB;
@@ -230,13 +228,11 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
230 228
231void ARM_Dynarmic_32::Run() { 229void ARM_Dynarmic_32::Run() {
232 while (true) { 230 while (true) {
233 jit->Run(); 231 const auto hr = jit->Run();
234 if (!svc_called) { 232 if (Has(hr, svc_call)) {
235 break; 233 Kernel::Svc::Call(system, svc_swi);
236 } 234 }
237 svc_called = false; 235 if (Has(hr, break_loop)) {
238 Kernel::Svc::Call(system, svc_swi);
239 if (shutdown) {
240 break; 236 break;
241 } 237 }
242 } 238 }
@@ -322,8 +318,11 @@ void ARM_Dynarmic_32::LoadContext(const ThreadContext32& ctx) {
322} 318}
323 319
324void ARM_Dynarmic_32::PrepareReschedule() { 320void ARM_Dynarmic_32::PrepareReschedule() {
325 jit->HaltExecution(); 321 jit->HaltExecution(break_loop);
326 shutdown = true; 322}
323
324void ARM_Dynarmic_32::SignalInterrupt() {
325 jit->HaltExecution(break_loop);
327} 326}
328 327
329void ARM_Dynarmic_32::ClearInstructionCache() { 328void ARM_Dynarmic_32::ClearInstructionCache() {
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.h b/src/core/arm/dynarmic/arm_dynarmic_32.h
index 5d47b600d..3f68a4ff1 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.h
@@ -57,6 +57,7 @@ public:
57 void LoadContext(const ThreadContext64& ctx) override {} 57 void LoadContext(const ThreadContext64& ctx) override {}
58 58
59 void PrepareReschedule() override; 59 void PrepareReschedule() override;
60 void SignalInterrupt() override;
60 void ClearExclusiveState() override; 61 void ClearExclusiveState() override;
61 62
62 void ClearInstructionCache() override; 63 void ClearInstructionCache() override;
@@ -83,9 +84,6 @@ private:
83 84
84 // SVC callback 85 // SVC callback
85 u32 svc_swi{}; 86 u32 svc_swi{};
86 bool svc_called{};
87
88 bool shutdown{};
89}; 87};
90 88
91} // namespace Core 89} // namespace Core
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index 7ff8f9495..68822a1fc 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -26,6 +26,9 @@ namespace Core {
26using Vector = Dynarmic::A64::Vector; 26using Vector = Dynarmic::A64::Vector;
27using namespace Common::Literals; 27using namespace Common::Literals;
28 28
29constexpr Dynarmic::HaltReason break_loop = Dynarmic::HaltReason::UserDefined2;
30constexpr Dynarmic::HaltReason svc_call = Dynarmic::HaltReason::UserDefined3;
31
29class DynarmicCallbacks64 : public Dynarmic::A64::UserCallbacks { 32class DynarmicCallbacks64 : public Dynarmic::A64::UserCallbacks {
30public: 33public:
31 explicit DynarmicCallbacks64(ARM_Dynarmic_64& parent_) 34 explicit DynarmicCallbacks64(ARM_Dynarmic_64& parent_)
@@ -106,7 +109,7 @@ public:
106 break; 109 break;
107 } 110 }
108 111
109 parent.jit->HaltExecution(); 112 parent.jit->HaltExecution(Dynarmic::HaltReason::CacheInvalidation);
110 } 113 }
111 114
112 void ExceptionRaised(u64 pc, Dynarmic::A64::Exception exception) override { 115 void ExceptionRaised(u64 pc, Dynarmic::A64::Exception exception) override {
@@ -126,15 +129,12 @@ public:
126 } 129 }
127 130
128 void CallSVC(u32 swi) override { 131 void CallSVC(u32 swi) override {
129 parent.svc_called = true;
130 parent.svc_swi = swi; 132 parent.svc_swi = swi;
131 parent.jit->HaltExecution(); 133 parent.jit->HaltExecution(svc_call);
132 } 134 }
133 135
134 void AddTicks(u64 ticks) override { 136 void AddTicks(u64 ticks) override {
135 if (parent.uses_wall_clock) { 137 ASSERT_MSG(!parent.uses_wall_clock, "This should never happen - dynarmic ticking disabled");
136 return;
137 }
138 138
139 // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a 139 // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a
140 // rough approximation of the amount of executed ticks in the system, it may be thrown off 140 // rough approximation of the amount of executed ticks in the system, it may be thrown off
@@ -149,12 +149,8 @@ public:
149 } 149 }
150 150
151 u64 GetTicksRemaining() override { 151 u64 GetTicksRemaining() override {
152 if (parent.uses_wall_clock) { 152 ASSERT_MSG(!parent.uses_wall_clock, "This should never happen - dynarmic ticking disabled");
153 if (!parent.interrupt_handlers[parent.core_index].IsInterrupted()) { 153
154 return minimum_run_cycles;
155 }
156 return 0U;
157 }
158 return std::max<s64>(parent.system.CoreTiming().GetDowncount(), 0); 154 return std::max<s64>(parent.system.CoreTiming().GetDowncount(), 0);
159 } 155 }
160 156
@@ -210,6 +206,7 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
210 206
211 // Timing 207 // Timing
212 config.wall_clock_cntpct = uses_wall_clock; 208 config.wall_clock_cntpct = uses_wall_clock;
209 config.enable_cycle_counting = !uses_wall_clock;
213 210
214 // Code cache size 211 // Code cache size
215 config.code_cache_size = 512_MiB; 212 config.code_cache_size = 512_MiB;
@@ -292,13 +289,11 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
292 289
293void ARM_Dynarmic_64::Run() { 290void ARM_Dynarmic_64::Run() {
294 while (true) { 291 while (true) {
295 jit->Run(); 292 const auto hr = jit->Run();
296 if (!svc_called) { 293 if (Has(hr, svc_call)) {
297 break; 294 Kernel::Svc::Call(system, svc_swi);
298 } 295 }
299 svc_called = false; 296 if (Has(hr, break_loop)) {
300 Kernel::Svc::Call(system, svc_swi);
301 if (shutdown) {
302 break; 297 break;
303 } 298 }
304 } 299 }
@@ -389,8 +384,11 @@ void ARM_Dynarmic_64::LoadContext(const ThreadContext64& ctx) {
389} 384}
390 385
391void ARM_Dynarmic_64::PrepareReschedule() { 386void ARM_Dynarmic_64::PrepareReschedule() {
392 jit->HaltExecution(); 387 jit->HaltExecution(break_loop);
393 shutdown = true; 388}
389
390void ARM_Dynarmic_64::SignalInterrupt() {
391 jit->HaltExecution(break_loop);
394} 392}
395 393
396void ARM_Dynarmic_64::ClearInstructionCache() { 394void ARM_Dynarmic_64::ClearInstructionCache() {
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.h b/src/core/arm/dynarmic/arm_dynarmic_64.h
index 0c4e46c64..58bc7fbec 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.h
@@ -51,6 +51,7 @@ public:
51 void LoadContext(const ThreadContext64& ctx) override; 51 void LoadContext(const ThreadContext64& ctx) override;
52 52
53 void PrepareReschedule() override; 53 void PrepareReschedule() override;
54 void SignalInterrupt() override;
54 void ClearExclusiveState() override; 55 void ClearExclusiveState() override;
55 56
56 void ClearInstructionCache() override; 57 void ClearInstructionCache() override;
@@ -77,9 +78,6 @@ private:
77 78
78 // SVC callback 79 // SVC callback
79 u32 svc_swi{}; 80 u32 svc_swi{};
80 bool svc_called{};
81
82 bool shutdown{};
83}; 81};
84 82
85} // namespace Core 83} // namespace Core
diff --git a/src/core/hle/kernel/physical_core.cpp b/src/core/hle/kernel/physical_core.cpp
index 7477668e4..18a5f40f8 100644
--- a/src/core/hle/kernel/physical_core.cpp
+++ b/src/core/hle/kernel/physical_core.cpp
@@ -58,6 +58,7 @@ bool PhysicalCore::IsInterrupted() const {
58void PhysicalCore::Interrupt() { 58void PhysicalCore::Interrupt() {
59 guard->lock(); 59 guard->lock();
60 interrupts[core_index].SetInterrupt(true); 60 interrupts[core_index].SetInterrupt(true);
61 arm_interface->SignalInterrupt();
61 guard->unlock(); 62 guard->unlock();
62} 63}
63 64
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
index 0c1fbc7b1..282668b36 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
@@ -35,6 +35,15 @@ std::string_view OutputVertexIndex(EmitContext& ctx) {
35 return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : ""; 35 return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : "";
36} 36}
37 37
38std::string ChooseCbuf(EmitContext& ctx, const IR::Value& binding, std::string_view index) {
39 if (binding.IsImmediate()) {
40 return fmt::format("{}_cbuf{}[{}]", ctx.stage_name, binding.U32(), index);
41 } else {
42 const auto binding_var{ctx.var_alloc.Consume(binding)};
43 return fmt::format("GetCbufIndirect({},{})", binding_var, index);
44 }
45}
46
38void GetCbuf(EmitContext& ctx, std::string_view ret, const IR::Value& binding, 47void GetCbuf(EmitContext& ctx, std::string_view ret, const IR::Value& binding,
39 const IR::Value& offset, u32 num_bits, std::string_view cast = {}, 48 const IR::Value& offset, u32 num_bits, std::string_view cast = {},
40 std::string_view bit_offset = {}) { 49 std::string_view bit_offset = {}) {
@@ -55,8 +64,8 @@ void GetCbuf(EmitContext& ctx, std::string_view ret, const IR::Value& binding,
55 const auto swizzle{is_immediate ? fmt::format(".{}", OffsetSwizzle(offset.U32())) 64 const auto swizzle{is_immediate ? fmt::format(".{}", OffsetSwizzle(offset.U32()))
56 : fmt::format("[({}>>2)%4]", offset_var)}; 65 : fmt::format("[({}>>2)%4]", offset_var)};
57 66
58 const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; 67 const auto cbuf{ChooseCbuf(ctx, binding, index)};
59 const auto cbuf_cast{fmt::format("{}({}[{}]{{}})", cast, cbuf, index)}; 68 const auto cbuf_cast{fmt::format("{}({}{{}})", cast, cbuf)};
60 const auto extraction{num_bits == 32 ? cbuf_cast 69 const auto extraction{num_bits == 32 ? cbuf_cast
61 : fmt::format("bitfieldExtract({},int({}),{})", cbuf_cast, 70 : fmt::format("bitfieldExtract({},int({}),{})", cbuf_cast,
62 bit_offset, num_bits)}; 71 bit_offset, num_bits)};
@@ -140,9 +149,9 @@ void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
140 149
141void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, 150void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
142 const IR::Value& offset) { 151 const IR::Value& offset) {
143 const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())};
144 const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"}; 152 const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"};
145 if (offset.IsImmediate()) { 153 if (offset.IsImmediate()) {
154 const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())};
146 static constexpr u32 cbuf_size{0x10000}; 155 static constexpr u32 cbuf_size{0x10000};
147 const u32 u32_offset{offset.U32()}; 156 const u32 u32_offset{offset.U32()};
148 const s32 signed_offset{static_cast<s32>(offset.U32())}; 157 const s32 signed_offset{static_cast<s32>(offset.U32())};
@@ -162,17 +171,17 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding
162 return; 171 return;
163 } 172 }
164 const auto offset_var{ctx.var_alloc.Consume(offset)}; 173 const auto offset_var{ctx.var_alloc.Consume(offset)};
174 const auto cbuf{ChooseCbuf(ctx, binding, fmt::format("{}>>4", offset_var))};
165 if (!ctx.profile.has_gl_component_indexing_bug) { 175 if (!ctx.profile.has_gl_component_indexing_bug) {
166 ctx.AddU32x2("{}=uvec2({}({}[{}>>4][({}>>2)%4]),{}({}[({}+4)>>4][(({}+4)>>2)%4]));", inst, 176 ctx.AddU32x2("{}=uvec2({}({}[({}>>2)%4]),{}({}[(({}+4)>>2)%4]));", inst, cast, cbuf,
167 cast, cbuf, offset_var, offset_var, cast, cbuf, offset_var, offset_var); 177 offset_var, cast, cbuf, offset_var);
168 return; 178 return;
169 } 179 }
170 const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x2)}; 180 const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x2)};
171 const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; 181 const auto cbuf_offset{fmt::format("{}>>2", offset_var)};
172 for (u32 swizzle = 0; swizzle < 4; ++swizzle) { 182 for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
173 ctx.Add("if(({}&3)=={}){}=uvec2({}({}[{}>>4].{}),{}({}[({}+4)>>4].{}));", cbuf_offset, 183 ctx.Add("if(({}&3)=={}){}=uvec2({}({}.{}),{}({}.{}));", cbuf_offset, swizzle, ret, cast,
174 swizzle, ret, cast, cbuf, offset_var, "xyzw"[swizzle], cast, cbuf, offset_var, 184 cbuf, "xyzw"[swizzle], cast, cbuf, "xyzw"[(swizzle + 1) % 4]);
175 "xyzw"[(swizzle + 1) % 4]);
176 } 185 }
177} 186}
178 187
diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
index e816a93ec..17266f40d 100644
--- a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
+++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
@@ -359,6 +359,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile
359 header += "layout(location=0) uniform vec4 scaling;"; 359 header += "layout(location=0) uniform vec4 scaling;";
360 } 360 }
361 DefineConstantBuffers(bindings); 361 DefineConstantBuffers(bindings);
362 DefineConstantBufferIndirect();
362 DefineStorageBuffers(bindings); 363 DefineStorageBuffers(bindings);
363 SetupImages(bindings); 364 SetupImages(bindings);
364 SetupTextures(bindings); 365 SetupTextures(bindings);
@@ -436,6 +437,24 @@ void EmitContext::DefineConstantBuffers(Bindings& bindings) {
436 } 437 }
437} 438}
438 439
440void EmitContext::DefineConstantBufferIndirect() {
441 if (!info.uses_cbuf_indirect) {
442 return;
443 }
444
445 header += profile.has_gl_cbuf_ftou_bug ? "uvec4 " : "vec4 ";
446 header += "GetCbufIndirect(uint binding, uint offset){"
447 "switch(binding){"
448 "default:";
449
450 for (const auto& desc : info.constant_buffer_descriptors) {
451 header +=
452 fmt::format("case {}:return {}_cbuf{}[offset];", desc.index, stage_name, desc.index);
453 }
454
455 header += "}}";
456}
457
439void EmitContext::DefineStorageBuffers(Bindings& bindings) { 458void EmitContext::DefineStorageBuffers(Bindings& bindings) {
440 if (info.storage_buffers_descriptors.empty()) { 459 if (info.storage_buffers_descriptors.empty()) {
441 return; 460 return;
diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.h b/src/shader_recompiler/backend/glsl/glsl_emit_context.h
index d9b639d29..2b13db6e6 100644
--- a/src/shader_recompiler/backend/glsl/glsl_emit_context.h
+++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.h
@@ -162,6 +162,7 @@ public:
162private: 162private:
163 void SetupExtensions(); 163 void SetupExtensions();
164 void DefineConstantBuffers(Bindings& bindings); 164 void DefineConstantBuffers(Bindings& bindings);
165 void DefineConstantBufferIndirect();
165 void DefineStorageBuffers(Bindings& bindings); 166 void DefineStorageBuffers(Bindings& bindings);
166 void DefineGenericOutput(size_t index, u32 invocations); 167 void DefineGenericOutput(size_t index, u32 invocations);
167 void DefineHelperFunctions(); 168 void DefineHelperFunctions();
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
index 28f6a6184..9c83cd2e4 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@@ -1043,15 +1043,15 @@ void EmitContext::DefineConstantBufferIndirectFunctions(const Info& info) {
1043 const Id merge_label{OpLabel()}; 1043 const Id merge_label{OpLabel()};
1044 const Id uniform_type{uniform_types.*member_ptr}; 1044 const Id uniform_type{uniform_types.*member_ptr};
1045 1045
1046 std::array<Id, Info::MAX_CBUFS> buf_labels; 1046 std::array<Id, Info::MAX_INDIRECT_CBUFS> buf_labels;
1047 std::array<Sirit::Literal, Info::MAX_CBUFS> buf_literals; 1047 std::array<Sirit::Literal, Info::MAX_INDIRECT_CBUFS> buf_literals;
1048 for (u32 i = 0; i < Info::MAX_CBUFS; i++) { 1048 for (u32 i = 0; i < Info::MAX_INDIRECT_CBUFS; i++) {
1049 buf_labels[i] = OpLabel(); 1049 buf_labels[i] = OpLabel();
1050 buf_literals[i] = Sirit::Literal{i}; 1050 buf_literals[i] = Sirit::Literal{i};
1051 } 1051 }
1052 OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone); 1052 OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone);
1053 OpSwitch(binding, buf_labels[0], buf_literals, buf_labels); 1053 OpSwitch(binding, buf_labels[0], buf_literals, buf_labels);
1054 for (u32 i = 0; i < Info::MAX_CBUFS; i++) { 1054 for (u32 i = 0; i < Info::MAX_INDIRECT_CBUFS; i++) {
1055 AddLabel(buf_labels[i]); 1055 AddLabel(buf_labels[i]);
1056 const Id cbuf{cbufs[i].*member_ptr}; 1056 const Id cbuf{cbufs[i].*member_ptr};
1057 const Id access_chain{OpAccessChain(uniform_type, cbuf, u32_zero_value, offset)}; 1057 const Id access_chain{OpAccessChain(uniform_type, cbuf, u32_zero_value, offset)};
@@ -1064,22 +1064,23 @@ void EmitContext::DefineConstantBufferIndirectFunctions(const Info& info) {
1064 return func; 1064 return func;
1065 }}; 1065 }};
1066 IR::Type types{info.used_indirect_cbuf_types}; 1066 IR::Type types{info.used_indirect_cbuf_types};
1067 if (True(types & IR::Type::U8)) { 1067 bool supports_aliasing = profile.support_descriptor_aliasing;
1068 if (supports_aliasing && True(types & IR::Type::U8)) {
1068 load_const_func_u8 = make_accessor(U8, &UniformDefinitions::U8); 1069 load_const_func_u8 = make_accessor(U8, &UniformDefinitions::U8);
1069 } 1070 }
1070 if (True(types & IR::Type::U16)) { 1071 if (supports_aliasing && True(types & IR::Type::U16)) {
1071 load_const_func_u16 = make_accessor(U16, &UniformDefinitions::U16); 1072 load_const_func_u16 = make_accessor(U16, &UniformDefinitions::U16);
1072 } 1073 }
1073 if (True(types & IR::Type::F32)) { 1074 if (supports_aliasing && True(types & IR::Type::F32)) {
1074 load_const_func_f32 = make_accessor(F32[1], &UniformDefinitions::F32); 1075 load_const_func_f32 = make_accessor(F32[1], &UniformDefinitions::F32);
1075 } 1076 }
1076 if (True(types & IR::Type::U32)) { 1077 if (supports_aliasing && True(types & IR::Type::U32)) {
1077 load_const_func_u32 = make_accessor(U32[1], &UniformDefinitions::U32); 1078 load_const_func_u32 = make_accessor(U32[1], &UniformDefinitions::U32);
1078 } 1079 }
1079 if (True(types & IR::Type::U32x2)) { 1080 if (supports_aliasing && True(types & IR::Type::U32x2)) {
1080 load_const_func_u32x2 = make_accessor(U32[2], &UniformDefinitions::U32x2); 1081 load_const_func_u32x2 = make_accessor(U32[2], &UniformDefinitions::U32x2);
1081 } 1082 }
1082 if (True(types & IR::Type::U32x4)) { 1083 if (!supports_aliasing || True(types & IR::Type::U32x4)) {
1083 load_const_func_u32x4 = make_accessor(U32[4], &UniformDefinitions::U32x4); 1084 load_const_func_u32x4 = make_accessor(U32[4], &UniformDefinitions::U32x4);
1084 } 1085 }
1085} 1086}
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
index 0b2c60842..16278faab 100644
--- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -32,13 +32,8 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) {
32void AddRegisterIndexedLdc(Info& info) { 32void AddRegisterIndexedLdc(Info& info) {
33 info.uses_cbuf_indirect = true; 33 info.uses_cbuf_indirect = true;
34 34
35 // The shader can use any possible constant buffer 35 for (u32 i = 0; i < Info::MAX_INDIRECT_CBUFS; i++) {
36 info.constant_buffer_mask = (1 << Info::MAX_CBUFS) - 1; 36 AddConstantBufferDescriptor(info, i, 1);
37
38 auto& cbufs{info.constant_buffer_descriptors};
39 cbufs.clear();
40 for (u32 i = 0; i < Info::MAX_CBUFS; i++) {
41 cbufs.push_back(ConstantBufferDescriptor{.index = i, .count = 1});
42 37
43 // The shader can use any possible access size 38 // The shader can use any possible access size
44 info.constant_buffer_used_sizes[i] = 0x10'000; 39 info.constant_buffer_used_sizes[i] = 0x10'000;
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h
index 9d36bd9eb..a3a09c71c 100644
--- a/src/shader_recompiler/shader_info.h
+++ b/src/shader_recompiler/shader_info.h
@@ -105,6 +105,7 @@ struct ImageDescriptor {
105using ImageDescriptors = boost::container::small_vector<ImageDescriptor, 4>; 105using ImageDescriptors = boost::container::small_vector<ImageDescriptor, 4>;
106 106
107struct Info { 107struct Info {
108 static constexpr size_t MAX_INDIRECT_CBUFS{14};
108 static constexpr size_t MAX_CBUFS{18}; 109 static constexpr size_t MAX_CBUFS{18};
109 static constexpr size_t MAX_SSBOS{32}; 110 static constexpr size_t MAX_SSBOS{32};
110 111
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index af05d47d1..190fc6aea 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -18,6 +18,7 @@ set(SHADER_FILES
18 full_screen_triangle.vert 18 full_screen_triangle.vert
19 fxaa.frag 19 fxaa.frag
20 fxaa.vert 20 fxaa.vert
21 opengl_convert_s8d24.comp
21 opengl_copy_bc4.comp 22 opengl_copy_bc4.comp
22 opengl_present.frag 23 opengl_present.frag
23 opengl_present.vert 24 opengl_present.vert
diff --git a/src/video_core/host_shaders/opengl_convert_s8d24.comp b/src/video_core/host_shaders/opengl_convert_s8d24.comp
new file mode 100644
index 000000000..83e1ab176
--- /dev/null
+++ b/src/video_core/host_shaders/opengl_convert_s8d24.comp
@@ -0,0 +1,18 @@
1// Copyright 2022 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#version 430 core
6
7layout(local_size_x = 16, local_size_y = 8) in;
8
9layout(binding = 0, rgba8ui) restrict uniform uimage2D destination;
10layout(location = 0) uniform uvec3 size;
11
12void main() {
13 if (any(greaterThanEqual(gl_GlobalInvocationID, size))) {
14 return;
15 }
16 uvec4 components = imageLoad(destination, ivec2(gl_GlobalInvocationID.xy));
17 imageStore(destination, ivec2(gl_GlobalInvocationID.xy), components.wxyz);
18}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index e6f9ece8b..7ab7f0c0a 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -520,6 +520,8 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
520 // ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different"); 520 // ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different");
521 // ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different"); 521 // ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different");
522 522
523 screen_info.texture.width = image_view->size.width;
524 screen_info.texture.height = image_view->size.height;
523 screen_info.display_texture = image_view->Handle(Shader::TextureType::Color2D); 525 screen_info.display_texture = image_view->Handle(Shader::TextureType::Color2D);
524 screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); 526 screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format);
525 return true; 527 return true;
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 8f9a65beb..d12076358 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -409,8 +409,8 @@ ImageBufferMap::~ImageBufferMap() {
409 409
410TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager, 410TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager,
411 StateTracker& state_tracker_) 411 StateTracker& state_tracker_)
412 : device{device_}, state_tracker{state_tracker_}, 412 : device{device_}, state_tracker{state_tracker_}, util_shaders(program_manager),
413 util_shaders(program_manager), resolution{Settings::values.resolution_info} { 413 format_conversion_pass{util_shaders}, resolution{Settings::values.resolution_info} {
414 static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D}; 414 static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D};
415 for (size_t i = 0; i < TARGETS.size(); ++i) { 415 for (size_t i = 0; i < TARGETS.size(); ++i) {
416 const GLenum target = TARGETS[i]; 416 const GLenum target = TARGETS[i];
@@ -1325,6 +1325,9 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
1325 1325
1326Framebuffer::~Framebuffer() = default; 1326Framebuffer::~Framebuffer() = default;
1327 1327
1328FormatConversionPass::FormatConversionPass(UtilShaders& util_shaders_)
1329 : util_shaders{util_shaders_} {}
1330
1328void FormatConversionPass::ConvertImage(Image& dst_image, Image& src_image, 1331void FormatConversionPass::ConvertImage(Image& dst_image, Image& src_image,
1329 std::span<const VideoCommon::ImageCopy> copies) { 1332 std::span<const VideoCommon::ImageCopy> copies) {
1330 const GLenum dst_target = ImageTarget(dst_image.info); 1333 const GLenum dst_target = ImageTarget(dst_image.info);
@@ -1357,6 +1360,12 @@ void FormatConversionPass::ConvertImage(Image& dst_image, Image& src_image,
1357 dst_origin.z, region.width, region.height, region.depth, 1360 dst_origin.z, region.width, region.height, region.depth,
1358 dst_image.GlFormat(), dst_image.GlType(), nullptr); 1361 dst_image.GlFormat(), dst_image.GlType(), nullptr);
1359 } 1362 }
1363
1364 // Swap component order of S8D24 to ABGR8 reinterprets
1365 if (src_image.info.format == PixelFormat::D24_UNORM_S8_UINT &&
1366 dst_image.info.format == PixelFormat::A8B8G8R8_UNORM) {
1367 util_shaders.ConvertS8D24(dst_image, copies);
1368 }
1360} 1369}
1361 1370
1362} // namespace OpenGL 1371} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 53088b66e..672fa8dde 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -55,13 +55,14 @@ struct FormatProperties {
55 55
56class FormatConversionPass { 56class FormatConversionPass {
57public: 57public:
58 FormatConversionPass() = default; 58 explicit FormatConversionPass(UtilShaders& util_shaders);
59 ~FormatConversionPass() = default; 59 ~FormatConversionPass() = default;
60 60
61 void ConvertImage(Image& dst_image, Image& src_image, 61 void ConvertImage(Image& dst_image, Image& src_image,
62 std::span<const VideoCommon::ImageCopy> copies); 62 std::span<const VideoCommon::ImageCopy> copies);
63 63
64private: 64private:
65 UtilShaders& util_shaders;
65 OGLBuffer intermediate_pbo; 66 OGLBuffer intermediate_pbo;
66 size_t pbo_size{}; 67 size_t pbo_size{};
67}; 68};
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index f8f29013a..3a3c213bb 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -208,6 +208,8 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
208 // Framebuffer orientation handling 208 // Framebuffer orientation handling
209 framebuffer_transform_flags = framebuffer.transform_flags; 209 framebuffer_transform_flags = framebuffer.transform_flags;
210 framebuffer_crop_rect = framebuffer.crop_rect; 210 framebuffer_crop_rect = framebuffer.crop_rect;
211 framebuffer_width = framebuffer.width;
212 framebuffer_height = framebuffer.height;
211 213
212 const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset}; 214 const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset};
213 screen_info.was_accelerated = 215 screen_info.was_accelerated =
@@ -480,9 +482,12 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
480 ASSERT_MSG(framebuffer_crop_rect.top == 0, "Unimplemented"); 482 ASSERT_MSG(framebuffer_crop_rect.top == 0, "Unimplemented");
481 ASSERT_MSG(framebuffer_crop_rect.left == 0, "Unimplemented"); 483 ASSERT_MSG(framebuffer_crop_rect.left == 0, "Unimplemented");
482 484
485 f32 scale_u = static_cast<f32>(framebuffer_width) / static_cast<f32>(screen_info.texture.width);
486 f32 scale_v =
487 static_cast<f32>(framebuffer_height) / static_cast<f32>(screen_info.texture.height);
488
483 // Scale the output by the crop width/height. This is commonly used with 1280x720 rendering 489 // Scale the output by the crop width/height. This is commonly used with 1280x720 rendering
484 // (e.g. handheld mode) on a 1920x1080 framebuffer. 490 // (e.g. handheld mode) on a 1920x1080 framebuffer.
485 f32 scale_u = 1.f, scale_v = 1.f;
486 if (framebuffer_crop_rect.GetWidth() > 0) { 491 if (framebuffer_crop_rect.GetWidth() > 0) {
487 scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) / 492 scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) /
488 static_cast<f32>(screen_info.texture.width); 493 static_cast<f32>(screen_info.texture.width);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index aa206878b..ae9558a33 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -137,6 +137,8 @@ private:
137 /// Used for transforming the framebuffer orientation 137 /// Used for transforming the framebuffer orientation
138 Service::android::BufferTransformFlags framebuffer_transform_flags{}; 138 Service::android::BufferTransformFlags framebuffer_transform_flags{};
139 Common::Rectangle<int> framebuffer_crop_rect; 139 Common::Rectangle<int> framebuffer_crop_rect;
140 u32 framebuffer_width;
141 u32 framebuffer_height;
140}; 142};
141 143
142} // namespace OpenGL 144} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
index 897c380b3..04c482a09 100644
--- a/src/video_core/renderer_opengl/util_shaders.cpp
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -13,6 +13,7 @@
13#include "video_core/host_shaders/astc_decoder_comp.h" 13#include "video_core/host_shaders/astc_decoder_comp.h"
14#include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h" 14#include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h"
15#include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h" 15#include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h"
16#include "video_core/host_shaders/opengl_convert_s8d24_comp.h"
16#include "video_core/host_shaders/opengl_copy_bc4_comp.h" 17#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
17#include "video_core/host_shaders/pitch_unswizzle_comp.h" 18#include "video_core/host_shaders/pitch_unswizzle_comp.h"
18#include "video_core/renderer_opengl/gl_shader_manager.h" 19#include "video_core/renderer_opengl/gl_shader_manager.h"
@@ -50,7 +51,8 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_)
50 block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)), 51 block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)),
51 block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)), 52 block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)),
52 pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)), 53 pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)),
53 copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) { 54 copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)),
55 convert_s8d24_program(MakeProgram(OPENGL_CONVERT_S8D24_COMP)) {
54 const auto swizzle_table = Tegra::Texture::MakeSwizzleTable(); 56 const auto swizzle_table = Tegra::Texture::MakeSwizzleTable();
55 swizzle_table_buffer.Create(); 57 swizzle_table_buffer.Create();
56 glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0); 58 glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0);
@@ -248,6 +250,26 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im
248 program_manager.RestoreGuestCompute(); 250 program_manager.RestoreGuestCompute();
249} 251}
250 252
253void UtilShaders::ConvertS8D24(Image& dst_image, std::span<const ImageCopy> copies) {
254 static constexpr GLuint BINDING_DESTINATION = 0;
255 static constexpr GLuint LOC_SIZE = 0;
256
257 program_manager.BindComputeProgram(convert_s8d24_program.handle);
258 for (const ImageCopy& copy : copies) {
259 ASSERT(copy.src_subresource.base_layer == 0);
260 ASSERT(copy.src_subresource.num_layers == 1);
261 ASSERT(copy.dst_subresource.base_layer == 0);
262 ASSERT(copy.dst_subresource.num_layers == 1);
263
264 glUniform3ui(LOC_SIZE, copy.extent.width, copy.extent.height, copy.extent.depth);
265 glBindImageTexture(BINDING_DESTINATION, dst_image.StorageHandle(),
266 copy.dst_subresource.base_level, GL_TRUE, 0, GL_READ_WRITE, GL_RGBA8UI);
267 glDispatchCompute(Common::DivCeil(copy.extent.width, 16u),
268 Common::DivCeil(copy.extent.height, 8u), copy.extent.depth);
269 }
270 program_manager.RestoreGuestCompute();
271}
272
251GLenum StoreFormat(u32 bytes_per_block) { 273GLenum StoreFormat(u32 bytes_per_block) {
252 switch (bytes_per_block) { 274 switch (bytes_per_block) {
253 case 1: 275 case 1:
diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h
index 5de95ea7a..5c132e67f 100644
--- a/src/video_core/renderer_opengl/util_shaders.h
+++ b/src/video_core/renderer_opengl/util_shaders.h
@@ -39,6 +39,8 @@ public:
39 void CopyBC4(Image& dst_image, Image& src_image, 39 void CopyBC4(Image& dst_image, Image& src_image,
40 std::span<const VideoCommon::ImageCopy> copies); 40 std::span<const VideoCommon::ImageCopy> copies);
41 41
42 void ConvertS8D24(Image& dst_image, std::span<const VideoCommon::ImageCopy> copies);
43
42private: 44private:
43 ProgramManager& program_manager; 45 ProgramManager& program_manager;
44 46
@@ -49,6 +51,7 @@ private:
49 OGLProgram block_linear_unswizzle_3d_program; 51 OGLProgram block_linear_unswizzle_3d_program;
50 OGLProgram pitch_unswizzle_program; 52 OGLProgram pitch_unswizzle_program;
51 OGLProgram copy_bc4_program; 53 OGLProgram copy_bc4_program;
54 OGLProgram convert_s8d24_program;
52}; 55};
53 56
54GLenum StoreFormat(u32 bytes_per_block); 57GLenum StoreFormat(u32 bytes_per_block);
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index d893c1952..b866e9103 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -1406,8 +1406,9 @@ void VKBlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfi
1406 UNIMPLEMENTED_IF(framebuffer_crop_rect.top != 0); 1406 UNIMPLEMENTED_IF(framebuffer_crop_rect.top != 0);
1407 UNIMPLEMENTED_IF(framebuffer_crop_rect.left != 0); 1407 UNIMPLEMENTED_IF(framebuffer_crop_rect.left != 0);
1408 1408
1409 f32 scale_u = 1.0f; 1409 f32 scale_u = static_cast<f32>(framebuffer.width) / static_cast<f32>(screen_info.width);
1410 f32 scale_v = 1.0f; 1410 f32 scale_v = static_cast<f32>(framebuffer.height) / static_cast<f32>(screen_info.height);
1411
1411 // Scale the output by the crop width/height. This is commonly used with 1280x720 rendering 1412 // Scale the output by the crop width/height. This is commonly used with 1280x720 rendering
1412 // (e.g. handheld mode) on a 1920x1080 framebuffer. 1413 // (e.g. handheld mode) on a 1920x1080 framebuffer.
1413 if (!fsr) { 1414 if (!fsr) {