summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CMakeModules/GenerateSCMRev.cmake1
-rw-r--r--src/audio_core/cubeb_sink.cpp6
-rw-r--r--src/audio_core/cubeb_sink.h2
-rw-r--r--src/common/CMakeLists.txt1
-rw-r--r--src/core/core_cpu.cpp9
-rw-r--r--src/core/core_cpu.h8
-rw-r--r--src/core/cpu_core_manager.cpp3
-rw-r--r--src/core/hle/kernel/scheduler.cpp14
-rw-r--r--src/core/hle/kernel/scheduler.h6
-rw-r--r--src/core/hle/service/audio/hwopus.cpp176
-rw-r--r--src/core/settings.cpp3
-rw-r--r--src/video_core/CMakeLists.txt1
-rw-r--r--src/video_core/engines/shader_bytecode.h25
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp37
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp41
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h28
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp56
-rw-r--r--src/video_core/shader/decode.cpp1
-rw-r--r--src/video_core/shader/decode/memory.cpp493
-rw-r--r--src/video_core/shader/decode/texture.cpp534
-rw-r--r--src/video_core/shader/shader_ir.h5
-rw-r--r--src/yuzu_cmd/config.cpp11
22 files changed, 788 insertions, 673 deletions
diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake
index 78728e08b..08315a1f1 100644
--- a/CMakeModules/GenerateSCMRev.cmake
+++ b/CMakeModules/GenerateSCMRev.cmake
@@ -73,6 +73,7 @@ set(HASH_FILES
73 "${VIDEO_CORE}/shader/decode/integer_set.cpp" 73 "${VIDEO_CORE}/shader/decode/integer_set.cpp"
74 "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" 74 "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
75 "${VIDEO_CORE}/shader/decode/memory.cpp" 75 "${VIDEO_CORE}/shader/decode/memory.cpp"
76 "${VIDEO_CORE}/shader/decode/texture.cpp"
76 "${VIDEO_CORE}/shader/decode/other.cpp" 77 "${VIDEO_CORE}/shader/decode/other.cpp"
77 "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp" 78 "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
78 "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp" 79 "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
diff --git a/src/audio_core/cubeb_sink.cpp b/src/audio_core/cubeb_sink.cpp
index 1da0b9f2a..7047ed9cf 100644
--- a/src/audio_core/cubeb_sink.cpp
+++ b/src/audio_core/cubeb_sink.cpp
@@ -12,7 +12,7 @@
12#include "common/ring_buffer.h" 12#include "common/ring_buffer.h"
13#include "core/settings.h" 13#include "core/settings.h"
14 14
15#ifdef _MSC_VER 15#ifdef _WIN32
16#include <objbase.h> 16#include <objbase.h>
17#endif 17#endif
18 18
@@ -113,7 +113,7 @@ private:
113 113
114CubebSink::CubebSink(std::string_view target_device_name) { 114CubebSink::CubebSink(std::string_view target_device_name) {
115 // Cubeb requires COM to be initialized on the thread calling cubeb_init on Windows 115 // Cubeb requires COM to be initialized on the thread calling cubeb_init on Windows
116#ifdef _MSC_VER 116#ifdef _WIN32
117 com_init_result = CoInitializeEx(nullptr, COINIT_MULTITHREADED); 117 com_init_result = CoInitializeEx(nullptr, COINIT_MULTITHREADED);
118#endif 118#endif
119 119
@@ -152,7 +152,7 @@ CubebSink::~CubebSink() {
152 152
153 cubeb_destroy(ctx); 153 cubeb_destroy(ctx);
154 154
155#ifdef _MSC_VER 155#ifdef _WIN32
156 if (SUCCEEDED(com_init_result)) { 156 if (SUCCEEDED(com_init_result)) {
157 CoUninitialize(); 157 CoUninitialize();
158 } 158 }
diff --git a/src/audio_core/cubeb_sink.h b/src/audio_core/cubeb_sink.h
index 511df7bb1..7ce850f47 100644
--- a/src/audio_core/cubeb_sink.h
+++ b/src/audio_core/cubeb_sink.h
@@ -26,7 +26,7 @@ private:
26 cubeb_devid output_device{}; 26 cubeb_devid output_device{};
27 std::vector<SinkStreamPtr> sink_streams; 27 std::vector<SinkStreamPtr> sink_streams;
28 28
29#ifdef _MSC_VER 29#ifdef _WIN32
30 u32 com_init_result = 0; 30 u32 com_init_result = 0;
31#endif 31#endif
32}; 32};
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index bdd885273..3d30f0e3e 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -47,6 +47,7 @@ add_custom_command(OUTPUT scm_rev.cpp
47 "${VIDEO_CORE}/shader/decode/integer_set.cpp" 47 "${VIDEO_CORE}/shader/decode/integer_set.cpp"
48 "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" 48 "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
49 "${VIDEO_CORE}/shader/decode/memory.cpp" 49 "${VIDEO_CORE}/shader/decode/memory.cpp"
50 "${VIDEO_CORE}/shader/decode/texture.cpp"
50 "${VIDEO_CORE}/shader/decode/other.cpp" 51 "${VIDEO_CORE}/shader/decode/other.cpp"
51 "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp" 52 "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
52 "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp" 53 "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp
index 54aa21a3a..1eefed6d0 100644
--- a/src/core/core_cpu.cpp
+++ b/src/core/core_cpu.cpp
@@ -11,6 +11,7 @@
11#endif 11#endif
12#include "core/arm/exclusive_monitor.h" 12#include "core/arm/exclusive_monitor.h"
13#include "core/arm/unicorn/arm_unicorn.h" 13#include "core/arm/unicorn/arm_unicorn.h"
14#include "core/core.h"
14#include "core/core_cpu.h" 15#include "core/core_cpu.h"
15#include "core/core_timing.h" 16#include "core/core_timing.h"
16#include "core/hle/kernel/scheduler.h" 17#include "core/hle/kernel/scheduler.h"
@@ -49,9 +50,9 @@ bool CpuBarrier::Rendezvous() {
49 return false; 50 return false;
50} 51}
51 52
52Cpu::Cpu(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor, 53Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier,
53 CpuBarrier& cpu_barrier, std::size_t core_index) 54 std::size_t core_index)
54 : cpu_barrier{cpu_barrier}, core_timing{core_timing}, core_index{core_index} { 55 : cpu_barrier{cpu_barrier}, core_timing{system.CoreTiming()}, core_index{core_index} {
55 if (Settings::values.use_cpu_jit) { 56 if (Settings::values.use_cpu_jit) {
56#ifdef ARCHITECTURE_x86_64 57#ifdef ARCHITECTURE_x86_64
57 arm_interface = std::make_unique<ARM_Dynarmic>(core_timing, exclusive_monitor, core_index); 58 arm_interface = std::make_unique<ARM_Dynarmic>(core_timing, exclusive_monitor, core_index);
@@ -63,7 +64,7 @@ Cpu::Cpu(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor,
63 arm_interface = std::make_unique<ARM_Unicorn>(core_timing); 64 arm_interface = std::make_unique<ARM_Unicorn>(core_timing);
64 } 65 }
65 66
66 scheduler = std::make_unique<Kernel::Scheduler>(*arm_interface); 67 scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface);
67} 68}
68 69
69Cpu::~Cpu() = default; 70Cpu::~Cpu() = default;
diff --git a/src/core/core_cpu.h b/src/core/core_cpu.h
index e2204c6b0..7589beb8c 100644
--- a/src/core/core_cpu.h
+++ b/src/core/core_cpu.h
@@ -15,6 +15,10 @@ namespace Kernel {
15class Scheduler; 15class Scheduler;
16} 16}
17 17
18namespace Core {
19class System;
20}
21
18namespace Core::Timing { 22namespace Core::Timing {
19class CoreTiming; 23class CoreTiming;
20} 24}
@@ -45,8 +49,8 @@ private:
45 49
46class Cpu { 50class Cpu {
47public: 51public:
48 Cpu(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor, 52 Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier,
49 CpuBarrier& cpu_barrier, std::size_t core_index); 53 std::size_t core_index);
50 ~Cpu(); 54 ~Cpu();
51 55
52 void RunLoop(bool tight_loop = true); 56 void RunLoop(bool tight_loop = true);
diff --git a/src/core/cpu_core_manager.cpp b/src/core/cpu_core_manager.cpp
index 2ddb3610d..93bc5619c 100644
--- a/src/core/cpu_core_manager.cpp
+++ b/src/core/cpu_core_manager.cpp
@@ -27,8 +27,7 @@ void CpuCoreManager::Initialize(System& system) {
27 exclusive_monitor = Cpu::MakeExclusiveMonitor(cores.size()); 27 exclusive_monitor = Cpu::MakeExclusiveMonitor(cores.size());
28 28
29 for (std::size_t index = 0; index < cores.size(); ++index) { 29 for (std::size_t index = 0; index < cores.size(); ++index) {
30 cores[index] = 30 cores[index] = std::make_unique<Cpu>(system, *exclusive_monitor, *barrier, index);
31 std::make_unique<Cpu>(system.CoreTiming(), *exclusive_monitor, *barrier, index);
32 } 31 }
33 32
34 // Create threads for CPU cores 1-3, and build thread_to_cpu map 33 // Create threads for CPU cores 1-3, and build thread_to_cpu map
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index 44f30d070..5fccfd9f4 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -19,7 +19,8 @@ namespace Kernel {
19 19
20std::mutex Scheduler::scheduler_mutex; 20std::mutex Scheduler::scheduler_mutex;
21 21
22Scheduler::Scheduler(Core::ARM_Interface& cpu_core) : cpu_core(cpu_core) {} 22Scheduler::Scheduler(Core::System& system, Core::ARM_Interface& cpu_core)
23 : cpu_core{cpu_core}, system{system} {}
23 24
24Scheduler::~Scheduler() { 25Scheduler::~Scheduler() {
25 for (auto& thread : thread_list) { 26 for (auto& thread : thread_list) {
@@ -61,7 +62,7 @@ Thread* Scheduler::PopNextReadyThread() {
61 62
62void Scheduler::SwitchContext(Thread* new_thread) { 63void Scheduler::SwitchContext(Thread* new_thread) {
63 Thread* const previous_thread = GetCurrentThread(); 64 Thread* const previous_thread = GetCurrentThread();
64 Process* const previous_process = Core::CurrentProcess(); 65 Process* const previous_process = system.Kernel().CurrentProcess();
65 66
66 UpdateLastContextSwitchTime(previous_thread, previous_process); 67 UpdateLastContextSwitchTime(previous_thread, previous_process);
67 68
@@ -94,8 +95,8 @@ void Scheduler::SwitchContext(Thread* new_thread) {
94 95
95 auto* const thread_owner_process = current_thread->GetOwnerProcess(); 96 auto* const thread_owner_process = current_thread->GetOwnerProcess();
96 if (previous_process != thread_owner_process) { 97 if (previous_process != thread_owner_process) {
97 Core::System::GetInstance().Kernel().MakeCurrentProcess(thread_owner_process); 98 system.Kernel().MakeCurrentProcess(thread_owner_process);
98 SetCurrentPageTable(&Core::CurrentProcess()->VMManager().page_table); 99 SetCurrentPageTable(&thread_owner_process->VMManager().page_table);
99 } 100 }
100 101
101 cpu_core.LoadContext(new_thread->GetContext()); 102 cpu_core.LoadContext(new_thread->GetContext());
@@ -111,7 +112,7 @@ void Scheduler::SwitchContext(Thread* new_thread) {
111 112
112void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) { 113void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) {
113 const u64 prev_switch_ticks = last_context_switch_time; 114 const u64 prev_switch_ticks = last_context_switch_time;
114 const u64 most_recent_switch_ticks = Core::System::GetInstance().CoreTiming().GetTicks(); 115 const u64 most_recent_switch_ticks = system.CoreTiming().GetTicks();
115 const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks; 116 const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks;
116 117
117 if (thread != nullptr) { 118 if (thread != nullptr) {
@@ -223,8 +224,7 @@ void Scheduler::YieldWithLoadBalancing(Thread* thread) {
223 // Take the first non-nullptr one 224 // Take the first non-nullptr one
224 for (unsigned cur_core = 0; cur_core < Core::NUM_CPU_CORES; ++cur_core) { 225 for (unsigned cur_core = 0; cur_core < Core::NUM_CPU_CORES; ++cur_core) {
225 const auto res = 226 const auto res =
226 Core::System::GetInstance().CpuCore(cur_core).Scheduler().GetNextSuggestedThread( 227 system.CpuCore(cur_core).Scheduler().GetNextSuggestedThread(core, priority);
227 core, priority);
228 228
229 // If scheduler provides a suggested thread 229 // If scheduler provides a suggested thread
230 if (res != nullptr) { 230 if (res != nullptr) {
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index 97ced4dfc..1c5bf57d9 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -13,7 +13,8 @@
13 13
14namespace Core { 14namespace Core {
15class ARM_Interface; 15class ARM_Interface;
16} 16class System;
17} // namespace Core
17 18
18namespace Kernel { 19namespace Kernel {
19 20
@@ -21,7 +22,7 @@ class Process;
21 22
22class Scheduler final { 23class Scheduler final {
23public: 24public:
24 explicit Scheduler(Core::ARM_Interface& cpu_core); 25 explicit Scheduler(Core::System& system, Core::ARM_Interface& cpu_core);
25 ~Scheduler(); 26 ~Scheduler();
26 27
27 /// Returns whether there are any threads that are ready to run. 28 /// Returns whether there are any threads that are ready to run.
@@ -162,6 +163,7 @@ private:
162 Core::ARM_Interface& cpu_core; 163 Core::ARM_Interface& cpu_core;
163 u64 last_context_switch_time = 0; 164 u64 last_context_switch_time = 0;
164 165
166 Core::System& system;
165 static std::mutex scheduler_mutex; 167 static std::mutex scheduler_mutex;
166}; 168};
167 169
diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp
index 11eba4a12..377e12cfa 100644
--- a/src/core/hle/service/audio/hwopus.cpp
+++ b/src/core/hle/service/audio/hwopus.cpp
@@ -9,43 +9,32 @@
9 9
10#include <opus.h> 10#include <opus.h>
11 11
12#include "common/common_funcs.h" 12#include "common/assert.h"
13#include "common/logging/log.h" 13#include "common/logging/log.h"
14#include "core/hle/ipc_helpers.h" 14#include "core/hle/ipc_helpers.h"
15#include "core/hle/kernel/hle_ipc.h" 15#include "core/hle/kernel/hle_ipc.h"
16#include "core/hle/service/audio/hwopus.h" 16#include "core/hle/service/audio/hwopus.h"
17 17
18namespace Service::Audio { 18namespace Service::Audio {
19 19namespace {
20struct OpusDeleter { 20struct OpusDeleter {
21 void operator()(void* ptr) const { 21 void operator()(void* ptr) const {
22 operator delete(ptr); 22 operator delete(ptr);
23 } 23 }
24}; 24};
25 25
26class IHardwareOpusDecoderManager final : public ServiceFramework<IHardwareOpusDecoderManager> { 26using OpusDecoderPtr = std::unique_ptr<OpusDecoder, OpusDeleter>;
27public:
28 IHardwareOpusDecoderManager(std::unique_ptr<OpusDecoder, OpusDeleter> decoder, u32 sample_rate,
29 u32 channel_count)
30 : ServiceFramework("IHardwareOpusDecoderManager"), decoder(std::move(decoder)),
31 sample_rate(sample_rate), channel_count(channel_count) {
32 // clang-format off
33 static const FunctionInfo functions[] = {
34 {0, &IHardwareOpusDecoderManager::DecodeInterleavedOld, "DecodeInterleavedOld"},
35 {1, nullptr, "SetContext"},
36 {2, nullptr, "DecodeInterleavedForMultiStreamOld"},
37 {3, nullptr, "SetContextForMultiStream"},
38 {4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerfOld, "DecodeInterleavedWithPerfOld"},
39 {5, nullptr, "DecodeInterleavedForMultiStreamWithPerfOld"},
40 {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"},
41 {7, nullptr, "DecodeInterleavedForMultiStream"},
42 };
43 // clang-format on
44 27
45 RegisterHandlers(functions); 28struct OpusPacketHeader {
46 } 29 // Packet size in bytes.
30 u32_be size;
31 // Indicates the final range of the codec's entropy coder.
32 u32_be final_range;
33};
34static_assert(sizeof(OpusPacketHeader) == 0x8, "OpusHeader is an invalid size");
47 35
48private: 36class OpusDecoderStateBase {
37public:
49 /// Describes extra behavior that may be asked of the decoding context. 38 /// Describes extra behavior that may be asked of the decoding context.
50 enum class ExtraBehavior { 39 enum class ExtraBehavior {
51 /// No extra behavior. 40 /// No extra behavior.
@@ -55,30 +44,36 @@ private:
55 ResetContext, 44 ResetContext,
56 }; 45 };
57 46
58 void DecodeInterleavedOld(Kernel::HLERequestContext& ctx) { 47 enum class PerfTime {
59 LOG_DEBUG(Audio, "called"); 48 Disabled,
60 49 Enabled,
61 DecodeInterleavedHelper(ctx, nullptr, ExtraBehavior::None); 50 };
62 }
63
64 void DecodeInterleavedWithPerfOld(Kernel::HLERequestContext& ctx) {
65 LOG_DEBUG(Audio, "called");
66 51
67 u64 performance = 0; 52 virtual ~OpusDecoderStateBase() = default;
68 DecodeInterleavedHelper(ctx, &performance, ExtraBehavior::None);
69 }
70 53
71 void DecodeInterleaved(Kernel::HLERequestContext& ctx) { 54 // Decodes interleaved Opus packets. Optionally allows reporting time taken to
72 LOG_DEBUG(Audio, "called"); 55 // perform the decoding, as well as any relevant extra behavior.
73 56 virtual void DecodeInterleaved(Kernel::HLERequestContext& ctx, PerfTime perf_time,
74 IPC::RequestParser rp{ctx}; 57 ExtraBehavior extra_behavior) = 0;
75 const auto extra_behavior = 58};
76 rp.Pop<bool>() ? ExtraBehavior::ResetContext : ExtraBehavior::None;
77 59
78 u64 performance = 0; 60// Represents the decoder state for a non-multistream decoder.
79 DecodeInterleavedHelper(ctx, &performance, extra_behavior); 61class OpusDecoderState final : public OpusDecoderStateBase {
62public:
63 explicit OpusDecoderState(OpusDecoderPtr decoder, u32 sample_rate, u32 channel_count)
64 : decoder{std::move(decoder)}, sample_rate{sample_rate}, channel_count{channel_count} {}
65
66 void DecodeInterleaved(Kernel::HLERequestContext& ctx, PerfTime perf_time,
67 ExtraBehavior extra_behavior) override {
68 if (perf_time == PerfTime::Disabled) {
69 DecodeInterleavedHelper(ctx, nullptr, extra_behavior);
70 } else {
71 u64 performance = 0;
72 DecodeInterleavedHelper(ctx, &performance, extra_behavior);
73 }
80 } 74 }
81 75
76private:
82 void DecodeInterleavedHelper(Kernel::HLERequestContext& ctx, u64* performance, 77 void DecodeInterleavedHelper(Kernel::HLERequestContext& ctx, u64* performance,
83 ExtraBehavior extra_behavior) { 78 ExtraBehavior extra_behavior) {
84 u32 consumed = 0; 79 u32 consumed = 0;
@@ -89,8 +84,7 @@ private:
89 ResetDecoderContext(); 84 ResetDecoderContext();
90 } 85 }
91 86
92 if (!Decoder_DecodeInterleaved(consumed, sample_count, ctx.ReadBuffer(), samples, 87 if (!DecodeOpusData(consumed, sample_count, ctx.ReadBuffer(), samples, performance)) {
93 performance)) {
94 LOG_ERROR(Audio, "Failed to decode opus data"); 88 LOG_ERROR(Audio, "Failed to decode opus data");
95 IPC::ResponseBuilder rb{ctx, 2}; 89 IPC::ResponseBuilder rb{ctx, 2};
96 // TODO(ogniK): Use correct error code 90 // TODO(ogniK): Use correct error code
@@ -109,27 +103,27 @@ private:
109 ctx.WriteBuffer(samples.data(), samples.size() * sizeof(s16)); 103 ctx.WriteBuffer(samples.data(), samples.size() * sizeof(s16));
110 } 104 }
111 105
112 bool Decoder_DecodeInterleaved(u32& consumed, u32& sample_count, const std::vector<u8>& input, 106 bool DecodeOpusData(u32& consumed, u32& sample_count, const std::vector<u8>& input,
113 std::vector<opus_int16>& output, u64* out_performance_time) { 107 std::vector<opus_int16>& output, u64* out_performance_time) const {
114 const auto start_time = std::chrono::high_resolution_clock::now(); 108 const auto start_time = std::chrono::high_resolution_clock::now();
115 const std::size_t raw_output_sz = output.size() * sizeof(opus_int16); 109 const std::size_t raw_output_sz = output.size() * sizeof(opus_int16);
116 if (sizeof(OpusHeader) > input.size()) { 110 if (sizeof(OpusPacketHeader) > input.size()) {
117 LOG_ERROR(Audio, "Input is smaller than the header size, header_sz={}, input_sz={}", 111 LOG_ERROR(Audio, "Input is smaller than the header size, header_sz={}, input_sz={}",
118 sizeof(OpusHeader), input.size()); 112 sizeof(OpusPacketHeader), input.size());
119 return false; 113 return false;
120 } 114 }
121 115
122 OpusHeader hdr{}; 116 OpusPacketHeader hdr{};
123 std::memcpy(&hdr, input.data(), sizeof(OpusHeader)); 117 std::memcpy(&hdr, input.data(), sizeof(OpusPacketHeader));
124 if (sizeof(OpusHeader) + static_cast<u32>(hdr.sz) > input.size()) { 118 if (sizeof(OpusPacketHeader) + static_cast<u32>(hdr.size) > input.size()) {
125 LOG_ERROR(Audio, "Input does not fit in the opus header size. data_sz={}, input_sz={}", 119 LOG_ERROR(Audio, "Input does not fit in the opus header size. data_sz={}, input_sz={}",
126 sizeof(OpusHeader) + static_cast<u32>(hdr.sz), input.size()); 120 sizeof(OpusPacketHeader) + static_cast<u32>(hdr.size), input.size());
127 return false; 121 return false;
128 } 122 }
129 123
130 const auto frame = input.data() + sizeof(OpusHeader); 124 const auto frame = input.data() + sizeof(OpusPacketHeader);
131 const auto decoded_sample_count = opus_packet_get_nb_samples( 125 const auto decoded_sample_count = opus_packet_get_nb_samples(
132 frame, static_cast<opus_int32>(input.size() - sizeof(OpusHeader)), 126 frame, static_cast<opus_int32>(input.size() - sizeof(OpusPacketHeader)),
133 static_cast<opus_int32>(sample_rate)); 127 static_cast<opus_int32>(sample_rate));
134 if (decoded_sample_count * channel_count * sizeof(u16) > raw_output_sz) { 128 if (decoded_sample_count * channel_count * sizeof(u16) > raw_output_sz) {
135 LOG_ERROR( 129 LOG_ERROR(
@@ -141,18 +135,18 @@ private:
141 135
142 const int frame_size = (static_cast<int>(raw_output_sz / sizeof(s16) / channel_count)); 136 const int frame_size = (static_cast<int>(raw_output_sz / sizeof(s16) / channel_count));
143 const auto out_sample_count = 137 const auto out_sample_count =
144 opus_decode(decoder.get(), frame, hdr.sz, output.data(), frame_size, 0); 138 opus_decode(decoder.get(), frame, hdr.size, output.data(), frame_size, 0);
145 if (out_sample_count < 0) { 139 if (out_sample_count < 0) {
146 LOG_ERROR(Audio, 140 LOG_ERROR(Audio,
147 "Incorrect sample count received from opus_decode, " 141 "Incorrect sample count received from opus_decode, "
148 "output_sample_count={}, frame_size={}, data_sz_from_hdr={}", 142 "output_sample_count={}, frame_size={}, data_sz_from_hdr={}",
149 out_sample_count, frame_size, static_cast<u32>(hdr.sz)); 143 out_sample_count, frame_size, static_cast<u32>(hdr.size));
150 return false; 144 return false;
151 } 145 }
152 146
153 const auto end_time = std::chrono::high_resolution_clock::now() - start_time; 147 const auto end_time = std::chrono::high_resolution_clock::now() - start_time;
154 sample_count = out_sample_count; 148 sample_count = out_sample_count;
155 consumed = static_cast<u32>(sizeof(OpusHeader) + hdr.sz); 149 consumed = static_cast<u32>(sizeof(OpusPacketHeader) + hdr.size);
156 if (out_performance_time != nullptr) { 150 if (out_performance_time != nullptr) {
157 *out_performance_time = 151 *out_performance_time =
158 std::chrono::duration_cast<std::chrono::milliseconds>(end_time).count(); 152 std::chrono::duration_cast<std::chrono::milliseconds>(end_time).count();
@@ -167,21 +161,66 @@ private:
167 opus_decoder_ctl(decoder.get(), OPUS_RESET_STATE); 161 opus_decoder_ctl(decoder.get(), OPUS_RESET_STATE);
168 } 162 }
169 163
170 struct OpusHeader { 164 OpusDecoderPtr decoder;
171 u32_be sz; // Needs to be BE for some odd reason
172 INSERT_PADDING_WORDS(1);
173 };
174 static_assert(sizeof(OpusHeader) == 0x8, "OpusHeader is an invalid size");
175
176 std::unique_ptr<OpusDecoder, OpusDeleter> decoder;
177 u32 sample_rate; 165 u32 sample_rate;
178 u32 channel_count; 166 u32 channel_count;
179}; 167};
180 168
181static std::size_t WorkerBufferSize(u32 channel_count) { 169class IHardwareOpusDecoderManager final : public ServiceFramework<IHardwareOpusDecoderManager> {
170public:
171 explicit IHardwareOpusDecoderManager(std::unique_ptr<OpusDecoderStateBase> decoder_state)
172 : ServiceFramework("IHardwareOpusDecoderManager"), decoder_state{std::move(decoder_state)} {
173 // clang-format off
174 static const FunctionInfo functions[] = {
175 {0, &IHardwareOpusDecoderManager::DecodeInterleavedOld, "DecodeInterleavedOld"},
176 {1, nullptr, "SetContext"},
177 {2, nullptr, "DecodeInterleavedForMultiStreamOld"},
178 {3, nullptr, "SetContextForMultiStream"},
179 {4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerfOld, "DecodeInterleavedWithPerfOld"},
180 {5, nullptr, "DecodeInterleavedForMultiStreamWithPerfOld"},
181 {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"},
182 {7, nullptr, "DecodeInterleavedForMultiStream"},
183 };
184 // clang-format on
185
186 RegisterHandlers(functions);
187 }
188
189private:
190 void DecodeInterleavedOld(Kernel::HLERequestContext& ctx) {
191 LOG_DEBUG(Audio, "called");
192
193 decoder_state->DecodeInterleaved(ctx, OpusDecoderStateBase::PerfTime::Disabled,
194 OpusDecoderStateBase::ExtraBehavior::None);
195 }
196
197 void DecodeInterleavedWithPerfOld(Kernel::HLERequestContext& ctx) {
198 LOG_DEBUG(Audio, "called");
199
200 decoder_state->DecodeInterleaved(ctx, OpusDecoderStateBase::PerfTime::Enabled,
201 OpusDecoderStateBase::ExtraBehavior::None);
202 }
203
204 void DecodeInterleaved(Kernel::HLERequestContext& ctx) {
205 LOG_DEBUG(Audio, "called");
206
207 IPC::RequestParser rp{ctx};
208 const auto extra_behavior = rp.Pop<bool>()
209 ? OpusDecoderStateBase::ExtraBehavior::ResetContext
210 : OpusDecoderStateBase::ExtraBehavior::None;
211
212 decoder_state->DecodeInterleaved(ctx, OpusDecoderStateBase::PerfTime::Enabled,
213 extra_behavior);
214 }
215
216 std::unique_ptr<OpusDecoderStateBase> decoder_state;
217};
218
219std::size_t WorkerBufferSize(u32 channel_count) {
182 ASSERT_MSG(channel_count == 1 || channel_count == 2, "Invalid channel count"); 220 ASSERT_MSG(channel_count == 1 || channel_count == 2, "Invalid channel count");
183 return opus_decoder_get_size(static_cast<int>(channel_count)); 221 return opus_decoder_get_size(static_cast<int>(channel_count));
184} 222}
223} // Anonymous namespace
185 224
186void HwOpus::GetWorkBufferSize(Kernel::HLERequestContext& ctx) { 225void HwOpus::GetWorkBufferSize(Kernel::HLERequestContext& ctx) {
187 IPC::RequestParser rp{ctx}; 226 IPC::RequestParser rp{ctx};
@@ -220,8 +259,7 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
220 const std::size_t worker_sz = WorkerBufferSize(channel_count); 259 const std::size_t worker_sz = WorkerBufferSize(channel_count);
221 ASSERT_MSG(buffer_sz >= worker_sz, "Worker buffer too large"); 260 ASSERT_MSG(buffer_sz >= worker_sz, "Worker buffer too large");
222 261
223 std::unique_ptr<OpusDecoder, OpusDeleter> decoder{ 262 OpusDecoderPtr decoder{static_cast<OpusDecoder*>(operator new(worker_sz))};
224 static_cast<OpusDecoder*>(operator new(worker_sz))};
225 if (const int err = opus_decoder_init(decoder.get(), sample_rate, channel_count)) { 263 if (const int err = opus_decoder_init(decoder.get(), sample_rate, channel_count)) {
226 LOG_ERROR(Audio, "Failed to init opus decoder with error={}", err); 264 LOG_ERROR(Audio, "Failed to init opus decoder with error={}", err);
227 IPC::ResponseBuilder rb{ctx, 2}; 265 IPC::ResponseBuilder rb{ctx, 2};
@@ -232,8 +270,8 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) {
232 270
233 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 271 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
234 rb.Push(RESULT_SUCCESS); 272 rb.Push(RESULT_SUCCESS);
235 rb.PushIpcInterface<IHardwareOpusDecoderManager>(std::move(decoder), sample_rate, 273 rb.PushIpcInterface<IHardwareOpusDecoderManager>(
236 channel_count); 274 std::make_unique<OpusDecoderState>(std::move(decoder), sample_rate, channel_count));
237} 275}
238 276
239HwOpus::HwOpus() : ServiceFramework("hwopus") { 277HwOpus::HwOpus() : ServiceFramework("hwopus") {
diff --git a/src/core/settings.cpp b/src/core/settings.cpp
index 2e232e1e7..6dd3139cc 100644
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -91,7 +91,10 @@ void LogSettings() {
91 LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor); 91 LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor);
92 LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit); 92 LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit);
93 LogSetting("Renderer_FrameLimit", Settings::values.frame_limit); 93 LogSetting("Renderer_FrameLimit", Settings::values.frame_limit);
94 LogSetting("Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache);
94 LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation); 95 LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation);
96 LogSetting("Renderer_UseAsynchronousGpuEmulation",
97 Settings::values.use_asynchronous_gpu_emulation);
95 LogSetting("Audio_OutputEngine", Settings::values.sink_id); 98 LogSetting("Audio_OutputEngine", Settings::values.sink_id);
96 LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching); 99 LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching);
97 LogSetting("Audio_OutputDevice", Settings::values.audio_device_id); 100 LogSetting("Audio_OutputDevice", Settings::values.audio_device_id);
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index dac992d44..0c3038c52 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -80,6 +80,7 @@ add_library(video_core STATIC
80 shader/decode/hfma2.cpp 80 shader/decode/hfma2.cpp
81 shader/decode/conversion.cpp 81 shader/decode/conversion.cpp
82 shader/decode/memory.cpp 82 shader/decode/memory.cpp
83 shader/decode/texture.cpp
83 shader/decode/float_set_predicate.cpp 84 shader/decode/float_set_predicate.cpp
84 shader/decode/integer_set_predicate.cpp 85 shader/decode/integer_set_predicate.cpp
85 shader/decode/half_set_predicate.cpp 86 shader/decode/half_set_predicate.cpp
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index c7eb15b6a..7f613370b 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -324,11 +324,11 @@ enum class TextureQueryType : u64 {
324 324
325enum class TextureProcessMode : u64 { 325enum class TextureProcessMode : u64 {
326 None = 0, 326 None = 0,
327 LZ = 1, // Unknown, appears to be the same as none. 327 LZ = 1, // Load LOD of zero.
328 LB = 2, // Load Bias. 328 LB = 2, // Load Bias.
329 LL = 3, // Load LOD (LevelOfDetail) 329 LL = 3, // Load LOD.
330 LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB 330 LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB.
331 LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL 331 LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL.
332}; 332};
333 333
334enum class TextureMiscMode : u64 { 334enum class TextureMiscMode : u64 {
@@ -1445,6 +1445,7 @@ public:
1445 Flow, 1445 Flow,
1446 Synch, 1446 Synch,
1447 Memory, 1447 Memory,
1448 Texture,
1448 FloatSet, 1449 FloatSet,
1449 FloatSetPredicate, 1450 FloatSetPredicate,
1450 IntegerSet, 1451 IntegerSet,
@@ -1575,14 +1576,14 @@ private:
1575 INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), 1576 INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
1576 INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), 1577 INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
1577 INST("1110111011011---", Id::STG, Type::Memory, "STG"), 1578 INST("1110111011011---", Id::STG, Type::Memory, "STG"),
1578 INST("110000----111---", Id::TEX, Type::Memory, "TEX"), 1579 INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
1579 INST("1101111101001---", Id::TXQ, Type::Memory, "TXQ"), 1580 INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"),
1580 INST("1101-00---------", Id::TEXS, Type::Memory, "TEXS"), 1581 INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"),
1581 INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"), 1582 INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"),
1582 INST("110010----111---", Id::TLD4, Type::Memory, "TLD4"), 1583 INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
1583 INST("1101111100------", Id::TLD4S, Type::Memory, "TLD4S"), 1584 INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
1584 INST("110111110110----", Id::TMML_B, Type::Memory, "TMML_B"), 1585 INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
1585 INST("1101111101011---", Id::TMML, Type::Memory, "TMML"), 1586 INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
1586 INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"), 1587 INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"),
1587 INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), 1588 INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
1588 INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), 1589 INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index e33848bc1..824863561 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -118,7 +118,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::Syst
118 118
119 glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment); 119 glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment);
120 120
121 LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!"); 121 LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here");
122 CheckExtensions(); 122 CheckExtensions();
123} 123}
124 124
@@ -177,7 +177,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
177 continue; 177 continue;
178 178
179 const auto& buffer = regs.vertex_array[attrib.buffer]; 179 const auto& buffer = regs.vertex_array[attrib.buffer];
180 LOG_TRACE(HW_GPU, 180 LOG_TRACE(Render_OpenGL,
181 "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", 181 "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}",
182 index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(), 182 index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(),
183 attrib.offset.Value(), attrib.IsNormalized()); 183 attrib.offset.Value(), attrib.IsNormalized());
@@ -343,9 +343,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
343 shader_program_manager->UseProgrammableFragmentShader(program_handle); 343 shader_program_manager->UseProgrammableFragmentShader(program_handle);
344 break; 344 break;
345 default: 345 default:
346 LOG_CRITICAL(HW_GPU, "Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, 346 UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
347 shader_config.enable.Value(), shader_config.offset); 347 shader_config.enable.Value(), shader_config.offset);
348 UNREACHABLE();
349 } 348 }
350 349
351 const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage); 350 const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
@@ -793,7 +792,10 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
793 VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)}; 792 VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)};
794 ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); 793 ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
795 ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); 794 ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
796 ASSERT_MSG(params.pixel_format == pixel_format, "Framebuffer pixel_format is different"); 795
796 if (params.pixel_format != pixel_format) {
797 LOG_WARNING(Render_OpenGL, "Framebuffer pixel_format is different");
798 }
797 799
798 screen_info.display_texture = surface->Texture().handle; 800 screen_info.display_texture = surface->Texture().handle;
799 801
@@ -922,8 +924,8 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader
922 size = buffer.size; 924 size = buffer.size;
923 925
924 if (size > MaxConstbufferSize) { 926 if (size > MaxConstbufferSize) {
925 LOG_CRITICAL(HW_GPU, "indirect constbuffer size {} exceeds maximum {}", size, 927 LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size,
926 MaxConstbufferSize); 928 MaxConstbufferSize);
927 size = MaxConstbufferSize; 929 size = MaxConstbufferSize;
928 } 930 }
929 } else { 931 } else {
@@ -983,10 +985,9 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s
983 985
984 texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); 986 texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
985 987
986 Surface surface = res_cache.GetTextureSurface(texture, entry); 988 if (Surface surface = res_cache.GetTextureSurface(texture, entry); surface) {
987 if (surface != nullptr) {
988 state.texture_units[current_bindpoint].texture = 989 state.texture_units[current_bindpoint].texture =
989 entry.IsArray() ? surface->TextureLayer().handle : surface->Texture().handle; 990 surface->Texture(entry.IsArray()).handle;
990 surface->UpdateSwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, 991 surface->UpdateSwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
991 texture.tic.w_source); 992 texture.tic.w_source);
992 } else { 993 } else {
@@ -1218,11 +1219,7 @@ void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) {
1218 1219
1219void RasterizerOpenGL::SyncTransformFeedback() { 1220void RasterizerOpenGL::SyncTransformFeedback() {
1220 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1221 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
1221 1222 UNIMPLEMENTED_IF_MSG(regs.tfb_enabled != 0, "Transform feedbacks are not implemented");
1222 if (regs.tfb_enabled != 0) {
1223 LOG_CRITICAL(Render_OpenGL, "Transform feedbacks are not implemented");
1224 UNREACHABLE();
1225 }
1226} 1223}
1227 1224
1228void RasterizerOpenGL::SyncPointState() { 1225void RasterizerOpenGL::SyncPointState() {
@@ -1242,12 +1239,8 @@ void RasterizerOpenGL::SyncPolygonOffset() {
1242 1239
1243void RasterizerOpenGL::CheckAlphaTests() { 1240void RasterizerOpenGL::CheckAlphaTests() {
1244 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 1241 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
1245 1242 UNIMPLEMENTED_IF_MSG(regs.alpha_test_enabled != 0 && regs.rt_control.count > 1,
1246 if (regs.alpha_test_enabled != 0 && regs.rt_control.count > 1) { 1243 "Alpha Testing is enabled with more than one rendertarget");
1247 LOG_CRITICAL(Render_OpenGL, "Alpha Testing is enabled with Multiple Render Targets, "
1248 "this behavior is undefined.");
1249 UNREACHABLE();
1250 }
1251} 1244}
1252 1245
1253} // namespace OpenGL 1246} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 876698b37..e9eb6e921 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -400,6 +400,27 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType
400 return format; 400 return format;
401} 401}
402 402
403/// Returns the discrepant array target
404constexpr GLenum GetArrayDiscrepantTarget(SurfaceTarget target) {
405 switch (target) {
406 case SurfaceTarget::Texture1D:
407 return GL_TEXTURE_1D_ARRAY;
408 case SurfaceTarget::Texture2D:
409 return GL_TEXTURE_2D_ARRAY;
410 case SurfaceTarget::Texture3D:
411 return GL_NONE;
412 case SurfaceTarget::Texture1DArray:
413 return GL_TEXTURE_1D;
414 case SurfaceTarget::Texture2DArray:
415 return GL_TEXTURE_2D;
416 case SurfaceTarget::TextureCubemap:
417 return GL_TEXTURE_CUBE_MAP_ARRAY;
418 case SurfaceTarget::TextureCubeArray:
419 return GL_TEXTURE_CUBE_MAP;
420 }
421 return GL_NONE;
422}
423
403Common::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const { 424Common::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
404 u32 actual_height{std::max(1U, unaligned_height >> mip_level)}; 425 u32 actual_height{std::max(1U, unaligned_height >> mip_level)};
405 if (IsPixelFormatASTC(pixel_format)) { 426 if (IsPixelFormatASTC(pixel_format)) {
@@ -795,20 +816,22 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
795 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); 816 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
796} 817}
797 818
798void CachedSurface::EnsureTextureView() { 819void CachedSurface::EnsureTextureDiscrepantView() {
799 if (texture_view.handle != 0) 820 if (discrepant_view.handle != 0)
800 return; 821 return;
801 822
802 const GLenum target{TargetLayer()}; 823 const GLenum target{GetArrayDiscrepantTarget(params.target)};
824 ASSERT(target != GL_NONE);
825
803 const GLuint num_layers{target == GL_TEXTURE_CUBE_MAP_ARRAY ? 6u : 1u}; 826 const GLuint num_layers{target == GL_TEXTURE_CUBE_MAP_ARRAY ? 6u : 1u};
804 constexpr GLuint min_layer = 0; 827 constexpr GLuint min_layer = 0;
805 constexpr GLuint min_level = 0; 828 constexpr GLuint min_level = 0;
806 829
807 glGenTextures(1, &texture_view.handle); 830 glGenTextures(1, &discrepant_view.handle);
808 glTextureView(texture_view.handle, target, texture.handle, gl_internal_format, min_level, 831 glTextureView(discrepant_view.handle, target, texture.handle, gl_internal_format, min_level,
809 params.max_mip_level, min_layer, num_layers); 832 params.max_mip_level, min_layer, num_layers);
810 ApplyTextureDefaults(texture_view.handle, params.max_mip_level); 833 ApplyTextureDefaults(discrepant_view.handle, params.max_mip_level);
811 glTextureParameteriv(texture_view.handle, GL_TEXTURE_SWIZZLE_RGBA, 834 glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA,
812 reinterpret_cast<const GLint*>(swizzle.data())); 835 reinterpret_cast<const GLint*>(swizzle.data()));
813} 836}
814 837
@@ -834,8 +857,8 @@ void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
834 swizzle = {new_x, new_y, new_z, new_w}; 857 swizzle = {new_x, new_y, new_z, new_w};
835 const auto swizzle_data = reinterpret_cast<const GLint*>(swizzle.data()); 858 const auto swizzle_data = reinterpret_cast<const GLint*>(swizzle.data());
836 glTextureParameteriv(texture.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data); 859 glTextureParameteriv(texture.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data);
837 if (texture_view.handle != 0) { 860 if (discrepant_view.handle != 0) {
838 glTextureParameteriv(texture_view.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data); 861 glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data);
839 } 862 }
840} 863}
841 864
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 797bbdc9c..9cf6f50be 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -367,31 +367,19 @@ public:
367 return texture; 367 return texture;
368 } 368 }
369 369
370 const OGLTexture& TextureLayer() { 370 const OGLTexture& Texture(bool as_array) {
371 if (params.is_array) { 371 if (params.is_array == as_array) {
372 return Texture(); 372 return texture;
373 } else {
374 EnsureTextureDiscrepantView();
375 return discrepant_view;
373 } 376 }
374 EnsureTextureView();
375 return texture_view;
376 } 377 }
377 378
378 GLenum Target() const { 379 GLenum Target() const {
379 return gl_target; 380 return gl_target;
380 } 381 }
381 382
382 GLenum TargetLayer() const {
383 using VideoCore::Surface::SurfaceTarget;
384 switch (params.target) {
385 case SurfaceTarget::Texture1D:
386 return GL_TEXTURE_1D_ARRAY;
387 case SurfaceTarget::Texture2D:
388 return GL_TEXTURE_2D_ARRAY;
389 case SurfaceTarget::TextureCubemap:
390 return GL_TEXTURE_CUBE_MAP_ARRAY;
391 }
392 return Target();
393 }
394
395 const SurfaceParams& GetSurfaceParams() const { 383 const SurfaceParams& GetSurfaceParams() const {
396 return params; 384 return params;
397 } 385 }
@@ -431,10 +419,10 @@ public:
431private: 419private:
432 void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle); 420 void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle);
433 421
434 void EnsureTextureView(); 422 void EnsureTextureDiscrepantView();
435 423
436 OGLTexture texture; 424 OGLTexture texture;
437 OGLTexture texture_view; 425 OGLTexture discrepant_view;
438 std::vector<std::vector<u8>> gl_buffer; 426 std::vector<std::vector<u8>> gl_buffer;
439 SurfaceParams params{}; 427 SurfaceParams params{};
440 GLenum gl_target{}; 428 GLenum gl_target{};
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 72ff6ac6a..11d1169f0 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -5,7 +5,9 @@
5#include <array> 5#include <array>
6#include <string> 6#include <string>
7#include <string_view> 7#include <string_view>
8#include <utility>
8#include <variant> 9#include <variant>
10#include <vector>
9 11
10#include <fmt/format.h> 12#include <fmt/format.h>
11 13
@@ -717,7 +719,7 @@ private:
717 } 719 }
718 720
719 std::string GenerateTexture(Operation operation, const std::string& func, 721 std::string GenerateTexture(Operation operation, const std::string& func,
720 bool is_extra_int = false) { 722 const std::vector<std::pair<Type, Node>>& extras) {
721 constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"}; 723 constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"};
722 724
723 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 725 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
@@ -738,36 +740,47 @@ private:
738 expr += Visit(operation[i]); 740 expr += Visit(operation[i]);
739 741
740 const std::size_t next = i + 1; 742 const std::size_t next = i + 1;
741 if (next < count || has_array || has_shadow) 743 if (next < count)
742 expr += ", "; 744 expr += ", ";
743 } 745 }
744 if (has_array) { 746 if (has_array) {
745 expr += "float(ftoi(" + Visit(meta->array) + "))"; 747 expr += ", float(ftoi(" + Visit(meta->array) + "))";
746 } 748 }
747 if (has_shadow) { 749 if (has_shadow) {
748 if (has_array) 750 expr += ", " + Visit(meta->depth_compare);
749 expr += ", ";
750 expr += Visit(meta->depth_compare);
751 } 751 }
752 expr += ')'; 752 expr += ')';
753 753
754 for (const Node extra : meta->extras) { 754 for (const auto& extra_pair : extras) {
755 const auto [type, operand] = extra_pair;
756 if (operand == nullptr) {
757 continue;
758 }
755 expr += ", "; 759 expr += ", ";
756 if (is_extra_int) { 760
757 if (const auto immediate = std::get_if<ImmediateNode>(extra)) { 761 switch (type) {
762 case Type::Int:
763 if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
758 // Inline the string as an immediate integer in GLSL (some extra arguments are 764 // Inline the string as an immediate integer in GLSL (some extra arguments are
759 // required to be constant) 765 // required to be constant)
760 expr += std::to_string(static_cast<s32>(immediate->GetValue())); 766 expr += std::to_string(static_cast<s32>(immediate->GetValue()));
761 } else { 767 } else {
762 expr += "ftoi(" + Visit(extra) + ')'; 768 expr += "ftoi(" + Visit(operand) + ')';
763 } 769 }
764 } else { 770 break;
765 expr += Visit(extra); 771 case Type::Float:
772 expr += Visit(operand);
773 break;
774 default: {
775 const auto type_int = static_cast<u32>(type);
776 UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int);
777 expr += '0';
778 break;
779 }
766 } 780 }
767 } 781 }
768 782
769 expr += ')'; 783 return expr + ')';
770 return expr;
771 } 784 }
772 785
773 std::string Assign(Operation operation) { 786 std::string Assign(Operation operation) {
@@ -1146,7 +1159,7 @@ private:
1146 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 1159 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1147 ASSERT(meta); 1160 ASSERT(meta);
1148 1161
1149 std::string expr = GenerateTexture(operation, "texture"); 1162 std::string expr = GenerateTexture(operation, "texture", {{Type::Float, meta->bias}});
1150 if (meta->sampler.IsShadow()) { 1163 if (meta->sampler.IsShadow()) {
1151 expr = "vec4(" + expr + ')'; 1164 expr = "vec4(" + expr + ')';
1152 } 1165 }
@@ -1157,7 +1170,7 @@ private:
1157 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 1170 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1158 ASSERT(meta); 1171 ASSERT(meta);
1159 1172
1160 std::string expr = GenerateTexture(operation, "textureLod"); 1173 std::string expr = GenerateTexture(operation, "textureLod", {{Type::Float, meta->lod}});
1161 if (meta->sampler.IsShadow()) { 1174 if (meta->sampler.IsShadow()) {
1162 expr = "vec4(" + expr + ')'; 1175 expr = "vec4(" + expr + ')';
1163 } 1176 }
@@ -1168,7 +1181,8 @@ private:
1168 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 1181 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1169 ASSERT(meta); 1182 ASSERT(meta);
1170 1183
1171 return GenerateTexture(operation, "textureGather", !meta->sampler.IsShadow()) + 1184 const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int;
1185 return GenerateTexture(operation, "textureGather", {{type, meta->component}}) +
1172 GetSwizzle(meta->element); 1186 GetSwizzle(meta->element);
1173 } 1187 }
1174 1188
@@ -1197,8 +1211,8 @@ private:
1197 ASSERT(meta); 1211 ASSERT(meta);
1198 1212
1199 if (meta->element < 2) { 1213 if (meta->element < 2) {
1200 return "itof(int((" + GenerateTexture(operation, "textureQueryLod") + " * vec2(256))" + 1214 return "itof(int((" + GenerateTexture(operation, "textureQueryLod", {}) +
1201 GetSwizzle(meta->element) + "))"; 1215 " * vec2(256))" + GetSwizzle(meta->element) + "))";
1202 } 1216 }
1203 return "0"; 1217 return "0";
1204 } 1218 }
@@ -1224,9 +1238,9 @@ private:
1224 else if (next < count) 1238 else if (next < count)
1225 expr += ", "; 1239 expr += ", ";
1226 } 1240 }
1227 for (std::size_t i = 0; i < meta->extras.size(); ++i) { 1241 if (meta->lod) {
1228 expr += ", "; 1242 expr += ", ";
1229 expr += CastOperand(Visit(meta->extras.at(i)), Type::Int); 1243 expr += CastOperand(Visit(meta->lod), Type::Int);
1230 } 1244 }
1231 expr += ')'; 1245 expr += ')';
1232 1246
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 740ac3118..e4c438792 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -165,6 +165,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
165 {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, 165 {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
166 {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, 166 {OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
167 {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, 167 {OpCode::Type::Memory, &ShaderIR::DecodeMemory},
168 {OpCode::Type::Texture, &ShaderIR::DecodeTexture},
168 {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, 169 {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
169 {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, 170 {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
170 {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, 171 {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 38f01ca50..ea3c71eed 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -17,24 +17,6 @@ using Tegra::Shader::Attribute;
17using Tegra::Shader::Instruction; 17using Tegra::Shader::Instruction;
18using Tegra::Shader::OpCode; 18using Tegra::Shader::OpCode;
19using Tegra::Shader::Register; 19using Tegra::Shader::Register;
20using Tegra::Shader::TextureMiscMode;
21using Tegra::Shader::TextureProcessMode;
22using Tegra::Shader::TextureType;
23
24static std::size_t GetCoordCount(TextureType texture_type) {
25 switch (texture_type) {
26 case TextureType::Texture1D:
27 return 1;
28 case TextureType::Texture2D:
29 return 2;
30 case TextureType::Texture3D:
31 case TextureType::TextureCube:
32 return 3;
33 default:
34 UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type));
35 return 0;
36 }
37}
38 20
39u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { 21u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
40 const Instruction instr = {program_code[pc]}; 22 const Instruction instr = {program_code[pc]};
@@ -247,194 +229,6 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
247 } 229 }
248 break; 230 break;
249 } 231 }
250 case OpCode::Id::TEX: {
251 UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
252 "AOFFI is not implemented");
253
254 if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
255 LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
256 }
257
258 const TextureType texture_type{instr.tex.texture_type};
259 const bool is_array = instr.tex.array != 0;
260 const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
261 const auto process_mode = instr.tex.GetTextureProcessMode();
262 WriteTexInstructionFloat(
263 bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array));
264 break;
265 }
266 case OpCode::Id::TEXS: {
267 const TextureType texture_type{instr.texs.GetTextureType()};
268 const bool is_array{instr.texs.IsArrayTexture()};
269 const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
270 const auto process_mode = instr.texs.GetTextureProcessMode();
271
272 if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) {
273 LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete");
274 }
275
276 const Node4 components =
277 GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
278
279 if (instr.texs.fp32_flag) {
280 WriteTexsInstructionFloat(bb, instr, components);
281 } else {
282 WriteTexsInstructionHalfFloat(bb, instr, components);
283 }
284 break;
285 }
286 case OpCode::Id::TLD4: {
287 ASSERT(instr.tld4.array == 0);
288 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI),
289 "AOFFI is not implemented");
290 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
291 "NDV is not implemented");
292 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
293 "PTP is not implemented");
294
295 if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) {
296 LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete");
297 }
298
299 const auto texture_type = instr.tld4.texture_type.Value();
300 const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
301 const bool is_array = instr.tld4.array != 0;
302 WriteTexInstructionFloat(bb, instr,
303 GetTld4Code(instr, texture_type, depth_compare, is_array));
304 break;
305 }
306 case OpCode::Id::TLD4S: {
307 UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
308 "AOFFI is not implemented");
309 if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
310 LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
311 }
312
313 const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
314 const Node op_a = GetRegister(instr.gpr8);
315 const Node op_b = GetRegister(instr.gpr20);
316
317 // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
318 std::vector<Node> coords;
319 if (depth_compare) {
320 // Note: TLD4S coordinate encoding works just like TEXS's
321 const Node op_y = GetRegister(instr.gpr8.Value() + 1);
322 coords.push_back(op_a);
323 coords.push_back(op_y);
324 coords.push_back(op_b);
325 } else {
326 coords.push_back(op_a);
327 coords.push_back(op_b);
328 }
329 std::vector<Node> extras;
330 extras.push_back(Immediate(static_cast<u32>(instr.tld4s.component)));
331
332 const auto& sampler =
333 GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);
334
335 Node4 values;
336 for (u32 element = 0; element < values.size(); ++element) {
337 auto coords_copy = coords;
338 MetaTexture meta{sampler, {}, {}, extras, element};
339 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
340 }
341
342 WriteTexsInstructionFloat(bb, instr, values);
343 break;
344 }
345 case OpCode::Id::TXQ: {
346 if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
347 LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete");
348 }
349
350 // TODO: The new commits on the texture refactor, change the way samplers work.
351 // Sadly, not all texture instructions specify the type of texture their sampler
352 // uses. This must be fixed at a later instance.
353 const auto& sampler =
354 GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
355
356 u32 indexer = 0;
357 switch (instr.txq.query_type) {
358 case Tegra::Shader::TextureQueryType::Dimension: {
359 for (u32 element = 0; element < 4; ++element) {
360 if (!instr.txq.IsComponentEnabled(element)) {
361 continue;
362 }
363 MetaTexture meta{sampler, {}, {}, {}, element};
364 const Node value =
365 Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
366 SetTemporal(bb, indexer++, value);
367 }
368 for (u32 i = 0; i < indexer; ++i) {
369 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
370 }
371 break;
372 }
373 default:
374 UNIMPLEMENTED_MSG("Unhandled texture query type: {}",
375 static_cast<u32>(instr.txq.query_type.Value()));
376 }
377 break;
378 }
379 case OpCode::Id::TMML: {
380 UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
381 "NDV is not implemented");
382
383 if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) {
384 LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
385 }
386
387 auto texture_type = instr.tmml.texture_type.Value();
388 const bool is_array = instr.tmml.array != 0;
389 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
390
391 std::vector<Node> coords;
392
393 // TODO: Add coordinates for different samplers once other texture types are implemented.
394 switch (texture_type) {
395 case TextureType::Texture1D:
396 coords.push_back(GetRegister(instr.gpr8));
397 break;
398 case TextureType::Texture2D:
399 coords.push_back(GetRegister(instr.gpr8.Value() + 0));
400 coords.push_back(GetRegister(instr.gpr8.Value() + 1));
401 break;
402 default:
403 UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type));
404
405 // Fallback to interpreting as a 2D texture for now
406 coords.push_back(GetRegister(instr.gpr8.Value() + 0));
407 coords.push_back(GetRegister(instr.gpr8.Value() + 1));
408 texture_type = TextureType::Texture2D;
409 }
410
411 for (u32 element = 0; element < 2; ++element) {
412 auto params = coords;
413 MetaTexture meta{sampler, {}, {}, {}, element};
414 const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
415 SetTemporal(bb, element, value);
416 }
417 for (u32 element = 0; element < 2; ++element) {
418 SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element));
419 }
420
421 break;
422 }
423 case OpCode::Id::TLDS: {
424 const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
425 const bool is_array{instr.tlds.IsArrayTexture()};
426
427 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
428 "AOFFI is not implemented");
429 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
430
431 if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
432 LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete");
433 }
434
435 WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array));
436 break;
437 }
438 default: 232 default:
439 UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); 233 UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
440 } 234 }
@@ -442,291 +236,4 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
442 return pc; 236 return pc;
443} 237}
444 238
445const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type,
446 bool is_array, bool is_shadow) {
447 const auto offset = static_cast<std::size_t>(sampler.index.Value());
448
449 // If this sampler has already been used, return the existing mapping.
450 const auto itr =
451 std::find_if(used_samplers.begin(), used_samplers.end(),
452 [&](const Sampler& entry) { return entry.GetOffset() == offset; });
453 if (itr != used_samplers.end()) {
454 ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
455 itr->IsShadow() == is_shadow);
456 return *itr;
457 }
458
459 // Otherwise create a new mapping for this sampler
460 const std::size_t next_index = used_samplers.size();
461 const Sampler entry{offset, next_index, type, is_array, is_shadow};
462 return *used_samplers.emplace(entry).first;
463}
464
465void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
466 u32 dest_elem = 0;
467 for (u32 elem = 0; elem < 4; ++elem) {
468 if (!instr.tex.IsComponentEnabled(elem)) {
469 // Skip disabled components
470 continue;
471 }
472 SetTemporal(bb, dest_elem++, components[elem]);
473 }
474 // After writing values in temporals, move them to the real registers
475 for (u32 i = 0; i < dest_elem; ++i) {
476 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
477 }
478}
479
480void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
481 const Node4& components) {
482 // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
483 // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
484
485 u32 dest_elem = 0;
486 for (u32 component = 0; component < 4; ++component) {
487 if (!instr.texs.IsComponentEnabled(component))
488 continue;
489 SetTemporal(bb, dest_elem++, components[component]);
490 }
491
492 for (u32 i = 0; i < dest_elem; ++i) {
493 if (i < 2) {
494 // Write the first two swizzle components to gpr0 and gpr0+1
495 SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i));
496 } else {
497 ASSERT(instr.texs.HasTwoDestinations());
498 // Write the rest of the swizzle components to gpr28 and gpr28+1
499 SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i));
500 }
501 }
502}
503
504void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
505 const Node4& components) {
506 // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
507 // float instruction).
508
509 Node4 values;
510 u32 dest_elem = 0;
511 for (u32 component = 0; component < 4; ++component) {
512 if (!instr.texs.IsComponentEnabled(component))
513 continue;
514 values[dest_elem++] = components[component];
515 }
516 if (dest_elem == 0)
517 return;
518
519 std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); });
520
521 const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]);
522 if (dest_elem <= 2) {
523 SetRegister(bb, instr.gpr0, first_value);
524 return;
525 }
526
527 SetTemporal(bb, 0, first_value);
528 SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
529
530 SetRegister(bb, instr.gpr0, GetTemporal(0));
531 SetRegister(bb, instr.gpr28, GetTemporal(1));
532}
533
534Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
535 TextureProcessMode process_mode, std::vector<Node> coords,
536 Node array, Node depth_compare, u32 bias_offset) {
537 const bool is_array = array;
538 const bool is_shadow = depth_compare;
539
540 UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) ||
541 (texture_type == TextureType::TextureCube && is_array && is_shadow),
542 "This method is not supported.");
543
544 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow);
545
546 const bool lod_needed = process_mode == TextureProcessMode::LZ ||
547 process_mode == TextureProcessMode::LL ||
548 process_mode == TextureProcessMode::LLA;
549
550 // LOD selection (either via bias or explicit textureLod) not supported in GL for
551 // sampler2DArrayShadow and samplerCubeArrayShadow.
552 const bool gl_lod_supported =
553 !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) ||
554 (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow));
555
556 const OperationCode read_method =
557 lod_needed && gl_lod_supported ? OperationCode::TextureLod : OperationCode::Texture;
558
559 UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported);
560
561 std::vector<Node> extras;
562 if (process_mode != TextureProcessMode::None && gl_lod_supported) {
563 if (process_mode == TextureProcessMode::LZ) {
564 extras.push_back(Immediate(0.0f));
565 } else {
566 // If present, lod or bias are always stored in the register indexed by the gpr20
567 // field with an offset depending on the usage of the other registers
568 extras.push_back(GetRegister(instr.gpr20.Value() + bias_offset));
569 }
570 }
571
572 Node4 values;
573 for (u32 element = 0; element < values.size(); ++element) {
574 auto copy_coords = coords;
575 MetaTexture meta{sampler, array, depth_compare, extras, element};
576 values[element] = Operation(read_method, meta, std::move(copy_coords));
577 }
578
579 return values;
580}
581
582Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
583 TextureProcessMode process_mode, bool depth_compare, bool is_array) {
584 const bool lod_bias_enabled =
585 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
586
587 const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
588 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
589 // If enabled arrays index is always stored in the gpr8 field
590 const u64 array_register = instr.gpr8.Value();
591 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
592 const u64 coord_register = array_register + (is_array ? 1 : 0);
593
594 std::vector<Node> coords;
595 for (std::size_t i = 0; i < coord_count; ++i) {
596 coords.push_back(GetRegister(coord_register + i));
597 }
598 // 1D.DC in OpenGL the 2nd component is ignored.
599 if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
600 coords.push_back(Immediate(0.0f));
601 }
602
603 const Node array = is_array ? GetRegister(array_register) : nullptr;
604
605 Node dc{};
606 if (depth_compare) {
607 // Depth is always stored in the register signaled by gpr20 or in the next register if lod
608 // or bias are used
609 const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
610 dc = GetRegister(depth_register);
611 }
612
613 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0);
614}
615
616Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
617 TextureProcessMode process_mode, bool depth_compare, bool is_array) {
618 const bool lod_bias_enabled =
619 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
620
621 const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
622 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4);
623 // If enabled arrays index is always stored in the gpr8 field
624 const u64 array_register = instr.gpr8.Value();
625 // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
626 const u64 coord_register = array_register + (is_array ? 1 : 0);
627 const u64 last_coord_register =
628 (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
629 ? static_cast<u64>(instr.gpr20.Value())
630 : coord_register + 1;
631 const u32 bias_offset = coord_count > 2 ? 1 : 0;
632
633 std::vector<Node> coords;
634 for (std::size_t i = 0; i < coord_count; ++i) {
635 const bool last = (i == (coord_count - 1)) && (coord_count > 1);
636 coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
637 }
638
639 const Node array = is_array ? GetRegister(array_register) : nullptr;
640
641 Node dc{};
642 if (depth_compare) {
643 // Depth is always stored in the register signaled by gpr20 or in the next register if lod
644 // or bias are used
645 const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
646 dc = GetRegister(depth_register);
647 }
648
649 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset);
650}
651
652Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
653 bool is_array) {
654 const std::size_t coord_count = GetCoordCount(texture_type);
655 const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
656 const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
657
658 // If enabled arrays index is always stored in the gpr8 field
659 const u64 array_register = instr.gpr8.Value();
660 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
661 const u64 coord_register = array_register + (is_array ? 1 : 0);
662
663 std::vector<Node> coords;
664 for (size_t i = 0; i < coord_count; ++i)
665 coords.push_back(GetRegister(coord_register + i));
666
667 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
668
669 Node4 values;
670 for (u32 element = 0; element < values.size(); ++element) {
671 auto coords_copy = coords;
672 MetaTexture meta{sampler, GetRegister(array_register), {}, {}, element};
673 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
674 }
675
676 return values;
677}
678
679Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
680 const std::size_t type_coord_count = GetCoordCount(texture_type);
681 const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
682
683 // If enabled arrays index is always stored in the gpr8 field
684 const u64 array_register = instr.gpr8.Value();
685 // if is array gpr20 is used
686 const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
687
688 const u64 last_coord_register =
689 ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array
690 ? static_cast<u64>(instr.gpr20.Value())
691 : coord_register + 1;
692
693 std::vector<Node> coords;
694 for (std::size_t i = 0; i < type_coord_count; ++i) {
695 const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
696 coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
697 }
698
699 const Node array = is_array ? GetRegister(array_register) : nullptr;
700 // When lod is used always is in gpr20
701 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
702
703 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
704
705 Node4 values;
706 for (u32 element = 0; element < values.size(); ++element) {
707 auto coords_copy = coords;
708 MetaTexture meta{sampler, array, {}, {lod}, element};
709 values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
710 }
711 return values;
712}
713
714std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
715 TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
716 std::size_t max_coords, std::size_t max_inputs) {
717 const std::size_t coord_count = GetCoordCount(texture_type);
718
719 std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
720 const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
721 if (total_coord_count > max_coords || total_reg_count > max_inputs) {
722 UNIMPLEMENTED_MSG("Unsupported Texture operation");
723 total_coord_count = std::min(total_coord_count, max_coords);
724 }
725 // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
726 total_coord_count +=
727 (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0;
728
729 return {coord_count, total_coord_count};
730}
731
732} // namespace VideoCommon::Shader 239} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
new file mode 100644
index 000000000..a99ae19bf
--- /dev/null
+++ b/src/video_core/shader/decode/texture.cpp
@@ -0,0 +1,534 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <vector>
7#include <fmt/format.h>
8
9#include "common/assert.h"
10#include "common/common_types.h"
11#include "video_core/engines/shader_bytecode.h"
12#include "video_core/shader/shader_ir.h"
13
14namespace VideoCommon::Shader {
15
16using Tegra::Shader::Instruction;
17using Tegra::Shader::OpCode;
18using Tegra::Shader::Register;
19using Tegra::Shader::TextureMiscMode;
20using Tegra::Shader::TextureProcessMode;
21using Tegra::Shader::TextureType;
22
23static std::size_t GetCoordCount(TextureType texture_type) {
24 switch (texture_type) {
25 case TextureType::Texture1D:
26 return 1;
27 case TextureType::Texture2D:
28 return 2;
29 case TextureType::Texture3D:
30 case TextureType::TextureCube:
31 return 3;
32 default:
33 UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type));
34 return 0;
35 }
36}
37
38u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
39 const Instruction instr = {program_code[pc]};
40 const auto opcode = OpCode::Decode(instr);
41
42 switch (opcode->get().GetId()) {
43 case OpCode::Id::TEX: {
44 UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
45 "AOFFI is not implemented");
46
47 if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
48 LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
49 }
50
51 const TextureType texture_type{instr.tex.texture_type};
52 const bool is_array = instr.tex.array != 0;
53 const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
54 const auto process_mode = instr.tex.GetTextureProcessMode();
55 WriteTexInstructionFloat(
56 bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array));
57 break;
58 }
59 case OpCode::Id::TEXS: {
60 const TextureType texture_type{instr.texs.GetTextureType()};
61 const bool is_array{instr.texs.IsArrayTexture()};
62 const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
63 const auto process_mode = instr.texs.GetTextureProcessMode();
64
65 if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) {
66 LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete");
67 }
68
69 const Node4 components =
70 GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
71
72 if (instr.texs.fp32_flag) {
73 WriteTexsInstructionFloat(bb, instr, components);
74 } else {
75 WriteTexsInstructionHalfFloat(bb, instr, components);
76 }
77 break;
78 }
79 case OpCode::Id::TLD4: {
80 ASSERT(instr.tld4.array == 0);
81 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI),
82 "AOFFI is not implemented");
83 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
84 "NDV is not implemented");
85 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
86 "PTP is not implemented");
87
88 if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) {
89 LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete");
90 }
91
92 const auto texture_type = instr.tld4.texture_type.Value();
93 const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
94 const bool is_array = instr.tld4.array != 0;
95 WriteTexInstructionFloat(bb, instr,
96 GetTld4Code(instr, texture_type, depth_compare, is_array));
97 break;
98 }
99 case OpCode::Id::TLD4S: {
100 UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
101 "AOFFI is not implemented");
102 if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
103 LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
104 }
105
106 const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
107 const Node op_a = GetRegister(instr.gpr8);
108 const Node op_b = GetRegister(instr.gpr20);
109
110 // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
111 std::vector<Node> coords;
112 if (depth_compare) {
113 // Note: TLD4S coordinate encoding works just like TEXS's
114 const Node op_y = GetRegister(instr.gpr8.Value() + 1);
115 coords.push_back(op_a);
116 coords.push_back(op_y);
117 coords.push_back(op_b);
118 } else {
119 coords.push_back(op_a);
120 coords.push_back(op_b);
121 }
122 const Node component = Immediate(static_cast<u32>(instr.tld4s.component));
123
124 const auto& sampler =
125 GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);
126
127 Node4 values;
128 for (u32 element = 0; element < values.size(); ++element) {
129 auto coords_copy = coords;
130 MetaTexture meta{sampler, {}, {}, {}, {}, component, element};
131 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
132 }
133
134 WriteTexsInstructionFloat(bb, instr, values);
135 break;
136 }
137 case OpCode::Id::TXQ: {
138 if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
139 LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete");
140 }
141
142 // TODO: The new commits on the texture refactor, change the way samplers work.
143 // Sadly, not all texture instructions specify the type of texture their sampler
144 // uses. This must be fixed at a later instance.
145 const auto& sampler =
146 GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
147
148 u32 indexer = 0;
149 switch (instr.txq.query_type) {
150 case Tegra::Shader::TextureQueryType::Dimension: {
151 for (u32 element = 0; element < 4; ++element) {
152 if (!instr.txq.IsComponentEnabled(element)) {
153 continue;
154 }
155 MetaTexture meta{sampler, {}, {}, {}, {}, {}, element};
156 const Node value =
157 Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
158 SetTemporal(bb, indexer++, value);
159 }
160 for (u32 i = 0; i < indexer; ++i) {
161 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
162 }
163 break;
164 }
165 default:
166 UNIMPLEMENTED_MSG("Unhandled texture query type: {}",
167 static_cast<u32>(instr.txq.query_type.Value()));
168 }
169 break;
170 }
171 case OpCode::Id::TMML: {
172 UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
173 "NDV is not implemented");
174
175 if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) {
176 LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
177 }
178
179 auto texture_type = instr.tmml.texture_type.Value();
180 const bool is_array = instr.tmml.array != 0;
181 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
182
183 std::vector<Node> coords;
184
185 // TODO: Add coordinates for different samplers once other texture types are implemented.
186 switch (texture_type) {
187 case TextureType::Texture1D:
188 coords.push_back(GetRegister(instr.gpr8));
189 break;
190 case TextureType::Texture2D:
191 coords.push_back(GetRegister(instr.gpr8.Value() + 0));
192 coords.push_back(GetRegister(instr.gpr8.Value() + 1));
193 break;
194 default:
195 UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type));
196
197 // Fallback to interpreting as a 2D texture for now
198 coords.push_back(GetRegister(instr.gpr8.Value() + 0));
199 coords.push_back(GetRegister(instr.gpr8.Value() + 1));
200 texture_type = TextureType::Texture2D;
201 }
202
203 for (u32 element = 0; element < 2; ++element) {
204 auto params = coords;
205 MetaTexture meta{sampler, {}, {}, {}, {}, {}, element};
206 const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
207 SetTemporal(bb, element, value);
208 }
209 for (u32 element = 0; element < 2; ++element) {
210 SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element));
211 }
212
213 break;
214 }
215 case OpCode::Id::TLDS: {
216 const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
217 const bool is_array{instr.tlds.IsArrayTexture()};
218
219 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
220 "AOFFI is not implemented");
221 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
222
223 if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
224 LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete");
225 }
226
227 WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array));
228 break;
229 }
230 default:
231 UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
232 }
233
234 return pc;
235}
236
237const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type,
238 bool is_array, bool is_shadow) {
239 const auto offset = static_cast<std::size_t>(sampler.index.Value());
240
241 // If this sampler has already been used, return the existing mapping.
242 const auto itr =
243 std::find_if(used_samplers.begin(), used_samplers.end(),
244 [&](const Sampler& entry) { return entry.GetOffset() == offset; });
245 if (itr != used_samplers.end()) {
246 ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
247 itr->IsShadow() == is_shadow);
248 return *itr;
249 }
250
251 // Otherwise create a new mapping for this sampler
252 const std::size_t next_index = used_samplers.size();
253 const Sampler entry{offset, next_index, type, is_array, is_shadow};
254 return *used_samplers.emplace(entry).first;
255}
256
257void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
258 u32 dest_elem = 0;
259 for (u32 elem = 0; elem < 4; ++elem) {
260 if (!instr.tex.IsComponentEnabled(elem)) {
261 // Skip disabled components
262 continue;
263 }
264 SetTemporal(bb, dest_elem++, components[elem]);
265 }
266 // After writing values in temporals, move them to the real registers
267 for (u32 i = 0; i < dest_elem; ++i) {
268 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
269 }
270}
271
272void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
273 const Node4& components) {
274 // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
275 // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
276
277 u32 dest_elem = 0;
278 for (u32 component = 0; component < 4; ++component) {
279 if (!instr.texs.IsComponentEnabled(component))
280 continue;
281 SetTemporal(bb, dest_elem++, components[component]);
282 }
283
284 for (u32 i = 0; i < dest_elem; ++i) {
285 if (i < 2) {
286 // Write the first two swizzle components to gpr0 and gpr0+1
287 SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i));
288 } else {
289 ASSERT(instr.texs.HasTwoDestinations());
290 // Write the rest of the swizzle components to gpr28 and gpr28+1
291 SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i));
292 }
293 }
294}
295
296void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
297 const Node4& components) {
298 // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
299 // float instruction).
300
301 Node4 values;
302 u32 dest_elem = 0;
303 for (u32 component = 0; component < 4; ++component) {
304 if (!instr.texs.IsComponentEnabled(component))
305 continue;
306 values[dest_elem++] = components[component];
307 }
308 if (dest_elem == 0)
309 return;
310
311 std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); });
312
313 const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]);
314 if (dest_elem <= 2) {
315 SetRegister(bb, instr.gpr0, first_value);
316 return;
317 }
318
319 SetTemporal(bb, 0, first_value);
320 SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
321
322 SetRegister(bb, instr.gpr0, GetTemporal(0));
323 SetRegister(bb, instr.gpr28, GetTemporal(1));
324}
325
326Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
327 TextureProcessMode process_mode, std::vector<Node> coords,
328 Node array, Node depth_compare, u32 bias_offset) {
329 const bool is_array = array;
330 const bool is_shadow = depth_compare;
331
332 UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) ||
333 (texture_type == TextureType::TextureCube && is_array && is_shadow),
334 "This method is not supported.");
335
336 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow);
337
338 const bool lod_needed = process_mode == TextureProcessMode::LZ ||
339 process_mode == TextureProcessMode::LL ||
340 process_mode == TextureProcessMode::LLA;
341
342 // LOD selection (either via bias or explicit textureLod) not supported in GL for
343 // sampler2DArrayShadow and samplerCubeArrayShadow.
344 const bool gl_lod_supported =
345 !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) ||
346 (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow));
347
348 const OperationCode read_method =
349 (lod_needed && gl_lod_supported) ? OperationCode::TextureLod : OperationCode::Texture;
350
351 UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported);
352
353 Node bias = {};
354 Node lod = {};
355 if (process_mode != TextureProcessMode::None && gl_lod_supported) {
356 switch (process_mode) {
357 case TextureProcessMode::LZ:
358 lod = Immediate(0.0f);
359 break;
360 case TextureProcessMode::LB:
361 // If present, lod or bias are always stored in the register indexed by the gpr20
362 // field with an offset depending on the usage of the other registers
363 bias = GetRegister(instr.gpr20.Value() + bias_offset);
364 break;
365 case TextureProcessMode::LL:
366 lod = GetRegister(instr.gpr20.Value() + bias_offset);
367 break;
368 default:
369 UNIMPLEMENTED_MSG("Unimplemented process mode={}", static_cast<u32>(process_mode));
370 break;
371 }
372 }
373
374 Node4 values;
375 for (u32 element = 0; element < values.size(); ++element) {
376 auto copy_coords = coords;
377 MetaTexture meta{sampler, array, depth_compare, bias, lod, {}, element};
378 values[element] = Operation(read_method, meta, std::move(copy_coords));
379 }
380
381 return values;
382}
383
384Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
385 TextureProcessMode process_mode, bool depth_compare, bool is_array) {
386 const bool lod_bias_enabled =
387 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
388
389 const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
390 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
391 // If enabled arrays index is always stored in the gpr8 field
392 const u64 array_register = instr.gpr8.Value();
393 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
394 const u64 coord_register = array_register + (is_array ? 1 : 0);
395
396 std::vector<Node> coords;
397 for (std::size_t i = 0; i < coord_count; ++i) {
398 coords.push_back(GetRegister(coord_register + i));
399 }
400 // 1D.DC in OpenGL the 2nd component is ignored.
401 if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
402 coords.push_back(Immediate(0.0f));
403 }
404
405 const Node array = is_array ? GetRegister(array_register) : nullptr;
406
407 Node dc{};
408 if (depth_compare) {
409 // Depth is always stored in the register signaled by gpr20 or in the next register if lod
410 // or bias are used
411 const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
412 dc = GetRegister(depth_register);
413 }
414
415 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0);
416}
417
418Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
419 TextureProcessMode process_mode, bool depth_compare, bool is_array) {
420 const bool lod_bias_enabled =
421 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
422
423 const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
424 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4);
425 // If enabled arrays index is always stored in the gpr8 field
426 const u64 array_register = instr.gpr8.Value();
427 // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
428 const u64 coord_register = array_register + (is_array ? 1 : 0);
429 const u64 last_coord_register =
430 (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
431 ? static_cast<u64>(instr.gpr20.Value())
432 : coord_register + 1;
433 const u32 bias_offset = coord_count > 2 ? 1 : 0;
434
435 std::vector<Node> coords;
436 for (std::size_t i = 0; i < coord_count; ++i) {
437 const bool last = (i == (coord_count - 1)) && (coord_count > 1);
438 coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
439 }
440
441 const Node array = is_array ? GetRegister(array_register) : nullptr;
442
443 Node dc{};
444 if (depth_compare) {
445 // Depth is always stored in the register signaled by gpr20 or in the next register if lod
446 // or bias are used
447 const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
448 dc = GetRegister(depth_register);
449 }
450
451 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset);
452}
453
454Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
455 bool is_array) {
456 const std::size_t coord_count = GetCoordCount(texture_type);
457 const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
458 const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
459
460 // If enabled arrays index is always stored in the gpr8 field
461 const u64 array_register = instr.gpr8.Value();
462 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
463 const u64 coord_register = array_register + (is_array ? 1 : 0);
464
465 std::vector<Node> coords;
466 for (size_t i = 0; i < coord_count; ++i)
467 coords.push_back(GetRegister(coord_register + i));
468
469 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
470
471 Node4 values;
472 for (u32 element = 0; element < values.size(); ++element) {
473 auto coords_copy = coords;
474 MetaTexture meta{sampler, GetRegister(array_register), {}, {}, {}, {}, element};
475 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
476 }
477
478 return values;
479}
480
481Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
482 const std::size_t type_coord_count = GetCoordCount(texture_type);
483 const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
484
485 // If enabled arrays index is always stored in the gpr8 field
486 const u64 array_register = instr.gpr8.Value();
487 // if is array gpr20 is used
488 const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
489
490 const u64 last_coord_register =
491 ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array
492 ? static_cast<u64>(instr.gpr20.Value())
493 : coord_register + 1;
494
495 std::vector<Node> coords;
496 for (std::size_t i = 0; i < type_coord_count; ++i) {
497 const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
498 coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
499 }
500
501 const Node array = is_array ? GetRegister(array_register) : nullptr;
502 // When lod is used always is in gpr20
503 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
504
505 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
506
507 Node4 values;
508 for (u32 element = 0; element < values.size(); ++element) {
509 auto coords_copy = coords;
510 MetaTexture meta{sampler, array, {}, {}, lod, {}, element};
511 values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
512 }
513 return values;
514}
515
516std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
517 TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
518 std::size_t max_coords, std::size_t max_inputs) {
519 const std::size_t coord_count = GetCoordCount(texture_type);
520
521 std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
522 const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
523 if (total_coord_count > max_coords || total_reg_count > max_inputs) {
524 UNIMPLEMENTED_MSG("Unsupported Texture operation");
525 total_coord_count = std::min(total_coord_count, max_coords);
526 }
527 // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
528 total_coord_count +=
529 (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0;
530
531 return {coord_count, total_coord_count};
532}
533
534} // namespace VideoCommon::Shader \ No newline at end of file
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 52c7f2c4e..5bc3a3900 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -290,7 +290,9 @@ struct MetaTexture {
290 const Sampler& sampler; 290 const Sampler& sampler;
291 Node array{}; 291 Node array{};
292 Node depth_compare{}; 292 Node depth_compare{};
293 std::vector<Node> extras; 293 Node bias{};
294 Node lod{};
295 Node component{};
294 u32 element{}; 296 u32 element{};
295}; 297};
296 298
@@ -614,6 +616,7 @@ private:
614 u32 DecodeHfma2(NodeBlock& bb, u32 pc); 616 u32 DecodeHfma2(NodeBlock& bb, u32 pc);
615 u32 DecodeConversion(NodeBlock& bb, u32 pc); 617 u32 DecodeConversion(NodeBlock& bb, u32 pc);
616 u32 DecodeMemory(NodeBlock& bb, u32 pc); 618 u32 DecodeMemory(NodeBlock& bb, u32 pc);
619 u32 DecodeTexture(NodeBlock& bb, u32 pc);
617 u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc); 620 u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc);
618 u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc); 621 u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc);
619 u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc); 622 u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc);
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index ca880dc65..32e78049c 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -346,7 +346,7 @@ void Config::ReadValues() {
346 346
347 // Renderer 347 // Renderer
348 Settings::values.resolution_factor = 348 Settings::values.resolution_factor =
349 (float)sdl2_config->GetReal("Renderer", "resolution_factor", 1.0); 349 static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0));
350 Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true); 350 Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true);
351 Settings::values.frame_limit = 351 Settings::values.frame_limit =
352 static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100)); 352 static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100));
@@ -357,16 +357,17 @@ void Config::ReadValues() {
357 Settings::values.use_asynchronous_gpu_emulation = 357 Settings::values.use_asynchronous_gpu_emulation =
358 sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false); 358 sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false);
359 359
360 Settings::values.bg_red = (float)sdl2_config->GetReal("Renderer", "bg_red", 0.0); 360 Settings::values.bg_red = static_cast<float>(sdl2_config->GetReal("Renderer", "bg_red", 0.0));
361 Settings::values.bg_green = (float)sdl2_config->GetReal("Renderer", "bg_green", 0.0); 361 Settings::values.bg_green =
362 Settings::values.bg_blue = (float)sdl2_config->GetReal("Renderer", "bg_blue", 0.0); 362 static_cast<float>(sdl2_config->GetReal("Renderer", "bg_green", 0.0));
363 Settings::values.bg_blue = static_cast<float>(sdl2_config->GetReal("Renderer", "bg_blue", 0.0));
363 364
364 // Audio 365 // Audio
365 Settings::values.sink_id = sdl2_config->Get("Audio", "output_engine", "auto"); 366 Settings::values.sink_id = sdl2_config->Get("Audio", "output_engine", "auto");
366 Settings::values.enable_audio_stretching = 367 Settings::values.enable_audio_stretching =
367 sdl2_config->GetBoolean("Audio", "enable_audio_stretching", true); 368 sdl2_config->GetBoolean("Audio", "enable_audio_stretching", true);
368 Settings::values.audio_device_id = sdl2_config->Get("Audio", "output_device", "auto"); 369 Settings::values.audio_device_id = sdl2_config->Get("Audio", "output_device", "auto");
369 Settings::values.volume = sdl2_config->GetReal("Audio", "volume", 1); 370 Settings::values.volume = static_cast<float>(sdl2_config->GetReal("Audio", "volume", 1));
370 371
371 Settings::values.language_index = sdl2_config->GetInteger("System", "language_index", 1); 372 Settings::values.language_index = sdl2_config->GetInteger("System", "language_index", 1);
372 373