summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/audio_core/audio_renderer.cpp12
-rw-r--r--src/audio_core/audio_renderer.h5
-rw-r--r--src/audio_core/cubeb_sink.cpp6
-rw-r--r--src/common/thread_queue_list.h10
-rw-r--r--src/core/core_cpu.cpp3
-rw-r--r--src/core/core_cpu.h2
-rw-r--r--src/core/core_timing.cpp2
-rw-r--r--src/core/file_sys/partition_filesystem.h2
-rw-r--r--src/core/file_sys/program_metadata.h2
-rw-r--r--src/core/file_sys/vfs.h20
-rw-r--r--src/core/file_sys/vfs_offset.h3
-rw-r--r--src/core/file_sys/vfs_vector.h3
-rw-r--r--src/core/hle/kernel/scheduler.cpp2
-rw-r--r--src/core/hle/kernel/scheduler.h2
-rw-r--r--src/core/hle/kernel/svc.cpp3
-rw-r--r--src/core/hle/kernel/thread.cpp32
-rw-r--r--src/core/hle/service/audio/audren_u.cpp44
-rw-r--r--src/core/hle/service/audio/audren_u.h1
-rw-r--r--src/core/hle/service/hid/hid.cpp17
-rw-r--r--src/core/hle/service/service.h2
-rw-r--r--src/core/loader/loader.cpp2
-rw-r--r--src/core/loader/loader.h2
-rw-r--r--src/video_core/engines/shader_bytecode.h29
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp192
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h23
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp95
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp15
-rw-r--r--src/video_core/renderer_opengl/gl_state.h6
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.cpp201
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.h42
30 files changed, 432 insertions, 348 deletions
diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp
index 282f345c5..6ebed3fb0 100644
--- a/src/audio_core/audio_renderer.cpp
+++ b/src/audio_core/audio_renderer.cpp
@@ -26,6 +26,18 @@ AudioRenderer::AudioRenderer(AudioRendererParameter params,
26 QueueMixedBuffer(2); 26 QueueMixedBuffer(2);
27} 27}
28 28
29u32 AudioRenderer::GetSampleRate() const {
30 return worker_params.sample_rate;
31}
32
33u32 AudioRenderer::GetSampleCount() const {
34 return worker_params.sample_count;
35}
36
37u32 AudioRenderer::GetMixBufferCount() const {
38 return worker_params.mix_buffer_count;
39}
40
29std::vector<u8> AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_params) { 41std::vector<u8> AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_params) {
30 // Copy UpdateDataHeader struct 42 // Copy UpdateDataHeader struct
31 UpdateDataHeader config{}; 43 UpdateDataHeader config{};
diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h
index 6950a4681..13c5d0adc 100644
--- a/src/audio_core/audio_renderer.h
+++ b/src/audio_core/audio_renderer.h
@@ -26,7 +26,7 @@ enum class PlayState : u8 {
26struct AudioRendererParameter { 26struct AudioRendererParameter {
27 u32_le sample_rate; 27 u32_le sample_rate;
28 u32_le sample_count; 28 u32_le sample_count;
29 u32_le unknown_8; 29 u32_le mix_buffer_count;
30 u32_le unknown_c; 30 u32_le unknown_c;
31 u32_le voice_count; 31 u32_le voice_count;
32 u32_le sink_count; 32 u32_le sink_count;
@@ -160,6 +160,9 @@ public:
160 std::vector<u8> UpdateAudioRenderer(const std::vector<u8>& input_params); 160 std::vector<u8> UpdateAudioRenderer(const std::vector<u8>& input_params);
161 void QueueMixedBuffer(Buffer::Tag tag); 161 void QueueMixedBuffer(Buffer::Tag tag);
162 void ReleaseAndQueueBuffers(); 162 void ReleaseAndQueueBuffers();
163 u32 GetSampleRate() const;
164 u32 GetSampleCount() const;
165 u32 GetMixBufferCount() const;
163 166
164private: 167private:
165 class VoiceState { 168 class VoiceState {
diff --git a/src/audio_core/cubeb_sink.cpp b/src/audio_core/cubeb_sink.cpp
index 1501ef1f4..5a1177d0c 100644
--- a/src/audio_core/cubeb_sink.cpp
+++ b/src/audio_core/cubeb_sink.cpp
@@ -4,6 +4,7 @@
4 4
5#include <algorithm> 5#include <algorithm>
6#include <cstring> 6#include <cstring>
7#include <mutex>
7 8
8#include "audio_core/cubeb_sink.h" 9#include "audio_core/cubeb_sink.h"
9#include "audio_core/stream.h" 10#include "audio_core/stream.h"
@@ -66,6 +67,8 @@ public:
66 return; 67 return;
67 } 68 }
68 69
70 std::lock_guard lock{queue_mutex};
71
69 queue.reserve(queue.size() + samples.size() * GetNumChannels()); 72 queue.reserve(queue.size() + samples.size() * GetNumChannels());
70 73
71 if (is_6_channel) { 74 if (is_6_channel) {
@@ -94,6 +97,7 @@ private:
94 u32 num_channels{}; 97 u32 num_channels{};
95 bool is_6_channel{}; 98 bool is_6_channel{};
96 99
100 std::mutex queue_mutex;
97 std::vector<s16> queue; 101 std::vector<s16> queue;
98 102
99 static long DataCallback(cubeb_stream* stream, void* user_data, const void* input_buffer, 103 static long DataCallback(cubeb_stream* stream, void* user_data, const void* input_buffer,
@@ -153,6 +157,8 @@ long SinkStreamImpl::DataCallback(cubeb_stream* stream, void* user_data, const v
153 return {}; 157 return {};
154 } 158 }
155 159
160 std::lock_guard lock{impl->queue_mutex};
161
156 const size_t frames_to_write{ 162 const size_t frames_to_write{
157 std::min(impl->queue.size() / impl->GetNumChannels(), static_cast<size_t>(num_frames))}; 163 std::min(impl->queue.size() / impl->GetNumChannels(), static_cast<size_t>(num_frames))};
158 164
diff --git a/src/common/thread_queue_list.h b/src/common/thread_queue_list.h
index 38a450d69..133122c5f 100644
--- a/src/common/thread_queue_list.h
+++ b/src/common/thread_queue_list.h
@@ -16,7 +16,7 @@ struct ThreadQueueList {
16 // (dynamically resizable) circular buffers to remove their overhead when 16 // (dynamically resizable) circular buffers to remove their overhead when
17 // inserting and popping. 17 // inserting and popping.
18 18
19 typedef unsigned int Priority; 19 using Priority = unsigned int;
20 20
21 // Number of priority levels. (Valid levels are [0..NUM_QUEUES).) 21 // Number of priority levels. (Valid levels are [0..NUM_QUEUES).)
22 static const Priority NUM_QUEUES = N; 22 static const Priority NUM_QUEUES = N;
@@ -26,9 +26,9 @@ struct ThreadQueueList {
26 } 26 }
27 27
28 // Only for debugging, returns priority level. 28 // Only for debugging, returns priority level.
29 Priority contains(const T& uid) { 29 Priority contains(const T& uid) const {
30 for (Priority i = 0; i < NUM_QUEUES; ++i) { 30 for (Priority i = 0; i < NUM_QUEUES; ++i) {
31 Queue& cur = queues[i]; 31 const Queue& cur = queues[i];
32 if (std::find(cur.data.cbegin(), cur.data.cend(), uid) != cur.data.cend()) { 32 if (std::find(cur.data.cbegin(), cur.data.cend(), uid) != cur.data.cend()) {
33 return i; 33 return i;
34 } 34 }
@@ -37,8 +37,8 @@ struct ThreadQueueList {
37 return -1; 37 return -1;
38 } 38 }
39 39
40 T get_first() { 40 T get_first() const {
41 Queue* cur = first; 41 const Queue* cur = first;
42 while (cur != nullptr) { 42 while (cur != nullptr) {
43 if (!cur->data.empty()) { 43 if (!cur->data.empty()) {
44 return cur->data.front(); 44 return cur->data.front();
diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp
index 3f1c70624..b042ee02b 100644
--- a/src/core/core_cpu.cpp
+++ b/src/core/core_cpu.cpp
@@ -14,6 +14,7 @@
14#include "core/core_timing.h" 14#include "core/core_timing.h"
15#include "core/hle/kernel/scheduler.h" 15#include "core/hle/kernel/scheduler.h"
16#include "core/hle/kernel/thread.h" 16#include "core/hle/kernel/thread.h"
17#include "core/hle/lock.h"
17#include "core/settings.h" 18#include "core/settings.h"
18 19
19namespace Core { 20namespace Core {
@@ -126,6 +127,8 @@ void Cpu::Reschedule() {
126 } 127 }
127 128
128 reschedule_pending = false; 129 reschedule_pending = false;
130 // Lock the global kernel mutex when we manipulate the HLE state
131 std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
129 scheduler->Reschedule(); 132 scheduler->Reschedule();
130} 133}
131 134
diff --git a/src/core/core_cpu.h b/src/core/core_cpu.h
index 976952903..56cdae194 100644
--- a/src/core/core_cpu.h
+++ b/src/core/core_cpu.h
@@ -79,7 +79,7 @@ private:
79 std::shared_ptr<CpuBarrier> cpu_barrier; 79 std::shared_ptr<CpuBarrier> cpu_barrier;
80 std::shared_ptr<Kernel::Scheduler> scheduler; 80 std::shared_ptr<Kernel::Scheduler> scheduler;
81 81
82 bool reschedule_pending{}; 82 std::atomic<bool> reschedule_pending = false;
83 size_t core_index; 83 size_t core_index;
84}; 84};
85 85
diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp
index d3bb6f818..f977d1b32 100644
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -135,11 +135,9 @@ void ClearPendingEvents() {
135void ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata) { 135void ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata) {
136 ASSERT(event_type != nullptr); 136 ASSERT(event_type != nullptr);
137 s64 timeout = GetTicks() + cycles_into_future; 137 s64 timeout = GetTicks() + cycles_into_future;
138
139 // If this event needs to be scheduled before the next advance(), force one early 138 // If this event needs to be scheduled before the next advance(), force one early
140 if (!is_global_timer_sane) 139 if (!is_global_timer_sane)
141 ForceExceptionCheck(cycles_into_future); 140 ForceExceptionCheck(cycles_into_future);
142
143 event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type}); 141 event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type});
144 std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>()); 142 std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>());
145} 143}
diff --git a/src/core/file_sys/partition_filesystem.h b/src/core/file_sys/partition_filesystem.h
index 7c7a75816..be7bc32a8 100644
--- a/src/core/file_sys/partition_filesystem.h
+++ b/src/core/file_sys/partition_filesystem.h
@@ -13,7 +13,7 @@
13#include "core/file_sys/vfs.h" 13#include "core/file_sys/vfs.h"
14 14
15namespace Loader { 15namespace Loader {
16enum class ResultStatus; 16enum class ResultStatus : u16;
17} 17}
18 18
19namespace FileSys { 19namespace FileSys {
diff --git a/src/core/file_sys/program_metadata.h b/src/core/file_sys/program_metadata.h
index 06a7315db..74a91052b 100644
--- a/src/core/file_sys/program_metadata.h
+++ b/src/core/file_sys/program_metadata.h
@@ -13,7 +13,7 @@
13#include "partition_filesystem.h" 13#include "partition_filesystem.h"
14 14
15namespace Loader { 15namespace Loader {
16enum class ResultStatus; 16enum class ResultStatus : u16;
17} 17}
18 18
19namespace FileSys { 19namespace FileSys {
diff --git a/src/core/file_sys/vfs.h b/src/core/file_sys/vfs.h
index 141a053ce..78a63c59b 100644
--- a/src/core/file_sys/vfs.h
+++ b/src/core/file_sys/vfs.h
@@ -15,9 +15,9 @@
15 15
16namespace FileSys { 16namespace FileSys {
17 17
18struct VfsFilesystem; 18class VfsDirectory;
19struct VfsFile; 19class VfsFile;
20struct VfsDirectory; 20class VfsFilesystem;
21 21
22// Convenience typedefs to use Vfs* interfaces 22// Convenience typedefs to use Vfs* interfaces
23using VirtualFilesystem = std::shared_ptr<VfsFilesystem>; 23using VirtualFilesystem = std::shared_ptr<VfsFilesystem>;
@@ -34,8 +34,9 @@ enum class VfsEntryType {
34// A class representing an abstract filesystem. A default implementation given the root VirtualDir 34// A class representing an abstract filesystem. A default implementation given the root VirtualDir
35// is provided for convenience, but if the Vfs implementation has any additional state or 35// is provided for convenience, but if the Vfs implementation has any additional state or
36// functionality, they will need to override. 36// functionality, they will need to override.
37struct VfsFilesystem : NonCopyable { 37class VfsFilesystem : NonCopyable {
38 VfsFilesystem(VirtualDir root); 38public:
39 explicit VfsFilesystem(VirtualDir root);
39 virtual ~VfsFilesystem(); 40 virtual ~VfsFilesystem();
40 41
41 // Gets the friendly name for the filesystem. 42 // Gets the friendly name for the filesystem.
@@ -81,7 +82,8 @@ protected:
81}; 82};
82 83
83// A class representing a file in an abstract filesystem. 84// A class representing a file in an abstract filesystem.
84struct VfsFile : NonCopyable { 85class VfsFile : NonCopyable {
86public:
85 virtual ~VfsFile(); 87 virtual ~VfsFile();
86 88
87 // Retrieves the file name. 89 // Retrieves the file name.
@@ -179,7 +181,8 @@ struct VfsFile : NonCopyable {
179}; 181};
180 182
181// A class representing a directory in an abstract filesystem. 183// A class representing a directory in an abstract filesystem.
182struct VfsDirectory : NonCopyable { 184class VfsDirectory : NonCopyable {
185public:
183 virtual ~VfsDirectory(); 186 virtual ~VfsDirectory();
184 187
185 // Retrives the file located at path as if the current directory was root. Returns nullptr if 188 // Retrives the file located at path as if the current directory was root. Returns nullptr if
@@ -295,7 +298,8 @@ protected:
295 298
296// A convenience partial-implementation of VfsDirectory that stubs out methods that should only work 299// A convenience partial-implementation of VfsDirectory that stubs out methods that should only work
297// if writable. This is to avoid redundant empty methods everywhere. 300// if writable. This is to avoid redundant empty methods everywhere.
298struct ReadOnlyVfsDirectory : public VfsDirectory { 301class ReadOnlyVfsDirectory : public VfsDirectory {
302public:
299 bool IsWritable() const override; 303 bool IsWritable() const override;
300 bool IsReadable() const override; 304 bool IsReadable() const override;
301 std::shared_ptr<VfsDirectory> CreateSubdirectory(std::string_view name) override; 305 std::shared_ptr<VfsDirectory> CreateSubdirectory(std::string_view name) override;
diff --git a/src/core/file_sys/vfs_offset.h b/src/core/file_sys/vfs_offset.h
index 235970dc5..cb92d1570 100644
--- a/src/core/file_sys/vfs_offset.h
+++ b/src/core/file_sys/vfs_offset.h
@@ -15,7 +15,8 @@ namespace FileSys {
15// Similar to seeking to an offset. 15// Similar to seeking to an offset.
16// If the file is writable, operations that would write past the end of the offset file will expand 16// If the file is writable, operations that would write past the end of the offset file will expand
17// the size of this wrapper. 17// the size of this wrapper.
18struct OffsetVfsFile : public VfsFile { 18class OffsetVfsFile : public VfsFile {
19public:
19 OffsetVfsFile(std::shared_ptr<VfsFile> file, size_t size, size_t offset = 0, 20 OffsetVfsFile(std::shared_ptr<VfsFile> file, size_t size, size_t offset = 0,
20 std::string new_name = "", VirtualDir new_parent = nullptr); 21 std::string new_name = "", VirtualDir new_parent = nullptr);
21 22
diff --git a/src/core/file_sys/vfs_vector.h b/src/core/file_sys/vfs_vector.h
index ba469647b..b3b468233 100644
--- a/src/core/file_sys/vfs_vector.h
+++ b/src/core/file_sys/vfs_vector.h
@@ -10,7 +10,8 @@ namespace FileSys {
10 10
11// An implementation of VfsDirectory that maintains two vectors for subdirectories and files. 11// An implementation of VfsDirectory that maintains two vectors for subdirectories and files.
12// Vector data is supplied upon construction. 12// Vector data is supplied upon construction.
13struct VectorVfsDirectory : public VfsDirectory { 13class VectorVfsDirectory : public VfsDirectory {
14public:
14 explicit VectorVfsDirectory(std::vector<VirtualFile> files = {}, 15 explicit VectorVfsDirectory(std::vector<VirtualFile> files = {},
15 std::vector<VirtualDir> dirs = {}, VirtualDir parent = nullptr, 16 std::vector<VirtualDir> dirs = {}, VirtualDir parent = nullptr,
16 std::string name = ""); 17 std::string name = "");
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index 94065c736..e770b9103 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -25,7 +25,7 @@ Scheduler::~Scheduler() {
25 } 25 }
26} 26}
27 27
28bool Scheduler::HaveReadyThreads() { 28bool Scheduler::HaveReadyThreads() const {
29 std::lock_guard<std::mutex> lock(scheduler_mutex); 29 std::lock_guard<std::mutex> lock(scheduler_mutex);
30 return ready_queue.get_first() != nullptr; 30 return ready_queue.get_first() != nullptr;
31} 31}
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index 1a4ee8f36..6a61ef64e 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -21,7 +21,7 @@ public:
21 ~Scheduler(); 21 ~Scheduler();
22 22
23 /// Returns whether there are any threads that are ready to run. 23 /// Returns whether there are any threads that are ready to run.
24 bool HaveReadyThreads(); 24 bool HaveReadyThreads() const;
25 25
26 /// Reschedules to the next available thread (call after current thread is suspended) 26 /// Reschedules to the next available thread (call after current thread is suspended)
27 void Reschedule(); 27 void Reschedule();
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 4ca481513..b24f409b3 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -705,8 +705,7 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
705 Handle owner_handle = static_cast<Handle>(mutex_val & Mutex::MutexOwnerMask); 705 Handle owner_handle = static_cast<Handle>(mutex_val & Mutex::MutexOwnerMask);
706 auto owner = g_handle_table.Get<Thread>(owner_handle); 706 auto owner = g_handle_table.Get<Thread>(owner_handle);
707 ASSERT(owner); 707 ASSERT(owner);
708 ASSERT(thread->status != ThreadStatus::Running); 708 ASSERT(thread->status == ThreadStatus::WaitMutex);
709 thread->status = ThreadStatus::WaitMutex;
710 thread->wakeup_callback = nullptr; 709 thread->wakeup_callback = nullptr;
711 710
712 owner->AddMutexWaiter(thread); 711 owner->AddMutexWaiter(thread);
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index b9022feae..a1a7867ce 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -23,6 +23,7 @@
23#include "core/hle/kernel/object.h" 23#include "core/hle/kernel/object.h"
24#include "core/hle/kernel/process.h" 24#include "core/hle/kernel/process.h"
25#include "core/hle/kernel/thread.h" 25#include "core/hle/kernel/thread.h"
26#include "core/hle/lock.h"
26#include "core/hle/result.h" 27#include "core/hle/result.h"
27#include "core/memory.h" 28#include "core/memory.h"
28 29
@@ -104,6 +105,10 @@ void ExitCurrentThread() {
104 */ 105 */
105static void ThreadWakeupCallback(u64 thread_handle, int cycles_late) { 106static void ThreadWakeupCallback(u64 thread_handle, int cycles_late) {
106 const auto proper_handle = static_cast<Handle>(thread_handle); 107 const auto proper_handle = static_cast<Handle>(thread_handle);
108
109 // Lock the global kernel mutex when we enter the kernel HLE.
110 std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
111
107 SharedPtr<Thread> thread = wakeup_callback_handle_table.Get<Thread>(proper_handle); 112 SharedPtr<Thread> thread = wakeup_callback_handle_table.Get<Thread>(proper_handle);
108 if (thread == nullptr) { 113 if (thread == nullptr) {
109 LOG_CRITICAL(Kernel, "Callback fired for invalid thread {:08X}", proper_handle); 114 LOG_CRITICAL(Kernel, "Callback fired for invalid thread {:08X}", proper_handle);
@@ -155,8 +160,10 @@ void Thread::WakeAfterDelay(s64 nanoseconds) {
155 if (nanoseconds == -1) 160 if (nanoseconds == -1)
156 return; 161 return;
157 162
158 CoreTiming::ScheduleEvent(CoreTiming::nsToCycles(nanoseconds), ThreadWakeupEventType, 163 // This function might be called from any thread so we have to be cautious and use the
159 callback_handle); 164 // thread-safe version of ScheduleEvent.
165 CoreTiming::ScheduleEventThreadsafe(CoreTiming::nsToCycles(nanoseconds), ThreadWakeupEventType,
166 callback_handle);
160} 167}
161 168
162void Thread::CancelWakeupTimer() { 169void Thread::CancelWakeupTimer() {
@@ -419,12 +426,33 @@ VAddr Thread::GetCommandBufferAddress() const {
419} 426}
420 427
421void Thread::AddMutexWaiter(SharedPtr<Thread> thread) { 428void Thread::AddMutexWaiter(SharedPtr<Thread> thread) {
429 if (thread->lock_owner == this) {
430 // If the thread is already waiting for this thread to release the mutex, ensure that the
431 // waiters list is consistent and return without doing anything.
432 auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
433 ASSERT(itr != wait_mutex_threads.end());
434 return;
435 }
436
437 // A thread can't wait on two different mutexes at the same time.
438 ASSERT(thread->lock_owner == nullptr);
439
440 // Ensure that the thread is not already in the list of mutex waiters
441 auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
442 ASSERT(itr == wait_mutex_threads.end());
443
422 thread->lock_owner = this; 444 thread->lock_owner = this;
423 wait_mutex_threads.emplace_back(std::move(thread)); 445 wait_mutex_threads.emplace_back(std::move(thread));
424 UpdatePriority(); 446 UpdatePriority();
425} 447}
426 448
427void Thread::RemoveMutexWaiter(SharedPtr<Thread> thread) { 449void Thread::RemoveMutexWaiter(SharedPtr<Thread> thread) {
450 ASSERT(thread->lock_owner == this);
451
452 // Ensure that the thread is in the list of mutex waiters
453 auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
454 ASSERT(itr != wait_mutex_threads.end());
455
428 boost::remove_erase(wait_mutex_threads, thread); 456 boost::remove_erase(wait_mutex_threads, thread);
429 thread->lock_owner = nullptr; 457 thread->lock_owner = nullptr;
430 UpdatePriority(); 458 UpdatePriority();
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp
index f99304de5..9e75eb3a6 100644
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -20,9 +20,9 @@ public:
20 explicit IAudioRenderer(AudioCore::AudioRendererParameter audren_params) 20 explicit IAudioRenderer(AudioCore::AudioRendererParameter audren_params)
21 : ServiceFramework("IAudioRenderer") { 21 : ServiceFramework("IAudioRenderer") {
22 static const FunctionInfo functions[] = { 22 static const FunctionInfo functions[] = {
23 {0, nullptr, "GetAudioRendererSampleRate"}, 23 {0, &IAudioRenderer::GetAudioRendererSampleRate, "GetAudioRendererSampleRate"},
24 {1, nullptr, "GetAudioRendererSampleCount"}, 24 {1, &IAudioRenderer::GetAudioRendererSampleCount, "GetAudioRendererSampleCount"},
25 {2, nullptr, "GetAudioRendererMixBufferCount"}, 25 {2, &IAudioRenderer::GetAudioRendererMixBufferCount, "GetAudioRendererMixBufferCount"},
26 {3, nullptr, "GetAudioRendererState"}, 26 {3, nullptr, "GetAudioRendererState"},
27 {4, &IAudioRenderer::RequestUpdateAudioRenderer, "RequestUpdateAudioRenderer"}, 27 {4, &IAudioRenderer::RequestUpdateAudioRenderer, "RequestUpdateAudioRenderer"},
28 {5, &IAudioRenderer::StartAudioRenderer, "StartAudioRenderer"}, 28 {5, &IAudioRenderer::StartAudioRenderer, "StartAudioRenderer"},
@@ -45,6 +45,27 @@ private:
45 system_event->Signal(); 45 system_event->Signal();
46 } 46 }
47 47
48 void GetAudioRendererSampleRate(Kernel::HLERequestContext& ctx) {
49 IPC::ResponseBuilder rb{ctx, 3};
50 rb.Push(RESULT_SUCCESS);
51 rb.Push<u32>(renderer->GetSampleRate());
52 LOG_DEBUG(Service_Audio, "called");
53 }
54
55 void GetAudioRendererSampleCount(Kernel::HLERequestContext& ctx) {
56 IPC::ResponseBuilder rb{ctx, 3};
57 rb.Push(RESULT_SUCCESS);
58 rb.Push<u32>(renderer->GetSampleCount());
59 LOG_DEBUG(Service_Audio, "called");
60 }
61
62 void GetAudioRendererMixBufferCount(Kernel::HLERequestContext& ctx) {
63 IPC::ResponseBuilder rb{ctx, 3};
64 rb.Push(RESULT_SUCCESS);
65 rb.Push<u32>(renderer->GetMixBufferCount());
66 LOG_DEBUG(Service_Audio, "called");
67 }
68
48 void RequestUpdateAudioRenderer(Kernel::HLERequestContext& ctx) { 69 void RequestUpdateAudioRenderer(Kernel::HLERequestContext& ctx) {
49 ctx.WriteBuffer(renderer->UpdateAudioRenderer(ctx.ReadBuffer())); 70 ctx.WriteBuffer(renderer->UpdateAudioRenderer(ctx.ReadBuffer()));
50 IPC::ResponseBuilder rb{ctx, 2}; 71 IPC::ResponseBuilder rb{ctx, 2};
@@ -169,7 +190,8 @@ AudRenU::AudRenU() : ServiceFramework("audren:u") {
169 {1, &AudRenU::GetAudioRendererWorkBufferSize, "GetAudioRendererWorkBufferSize"}, 190 {1, &AudRenU::GetAudioRendererWorkBufferSize, "GetAudioRendererWorkBufferSize"},
170 {2, &AudRenU::GetAudioDevice, "GetAudioDevice"}, 191 {2, &AudRenU::GetAudioDevice, "GetAudioDevice"},
171 {3, nullptr, "OpenAudioRendererAuto"}, 192 {3, nullptr, "OpenAudioRendererAuto"},
172 {4, nullptr, "GetAudioDeviceServiceWithRevisionInfo"}, 193 {4, &AudRenU::GetAudioDeviceServiceWithRevisionInfo,
194 "GetAudioDeviceServiceWithRevisionInfo"},
173 }; 195 };
174 RegisterHandlers(functions); 196 RegisterHandlers(functions);
175} 197}
@@ -189,7 +211,7 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
189 IPC::RequestParser rp{ctx}; 211 IPC::RequestParser rp{ctx};
190 auto params = rp.PopRaw<AudioCore::AudioRendererParameter>(); 212 auto params = rp.PopRaw<AudioCore::AudioRendererParameter>();
191 213
192 u64 buffer_sz = Common::AlignUp(4 * params.unknown_8, 0x40); 214 u64 buffer_sz = Common::AlignUp(4 * params.mix_buffer_count, 0x40);
193 buffer_sz += params.unknown_c * 1024; 215 buffer_sz += params.unknown_c * 1024;
194 buffer_sz += 0x940 * (params.unknown_c + 1); 216 buffer_sz += 0x940 * (params.unknown_c + 1);
195 buffer_sz += 0x3F0 * params.voice_count; 217 buffer_sz += 0x3F0 * params.voice_count;
@@ -197,7 +219,7 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
197 buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10); 219 buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10);
198 buffer_sz += 220 buffer_sz +=
199 Common::AlignUp((0x3C0 * (params.sink_count + params.unknown_c) + 4 * params.sample_count) * 221 Common::AlignUp((0x3C0 * (params.sink_count + params.unknown_c) + 4 * params.sample_count) *
200 (params.unknown_8 + 6), 222 (params.mix_buffer_count + 6),
201 0x40); 223 0x40);
202 224
203 if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) { 225 if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
@@ -253,6 +275,16 @@ void AudRenU::GetAudioDevice(Kernel::HLERequestContext& ctx) {
253 LOG_DEBUG(Service_Audio, "called"); 275 LOG_DEBUG(Service_Audio, "called");
254} 276}
255 277
278void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx) {
279 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
280
281 rb.Push(RESULT_SUCCESS);
282 rb.PushIpcInterface<Audio::IAudioDevice>();
283
284 LOG_WARNING(Service_Audio, "(STUBBED) called"); // TODO(ogniK): Figure out what is different
285 // based on the current revision
286}
287
256bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const { 288bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const {
257 u32_be version_num = (revision - Common::MakeMagic('R', 'E', 'V', '0')); // Byte swap 289 u32_be version_num = (revision - Common::MakeMagic('R', 'E', 'V', '0')); // Byte swap
258 switch (feature) { 290 switch (feature) {
diff --git a/src/core/hle/service/audio/audren_u.h b/src/core/hle/service/audio/audren_u.h
index 14907f8ae..8600ac6e4 100644
--- a/src/core/hle/service/audio/audren_u.h
+++ b/src/core/hle/service/audio/audren_u.h
@@ -22,6 +22,7 @@ private:
22 void OpenAudioRenderer(Kernel::HLERequestContext& ctx); 22 void OpenAudioRenderer(Kernel::HLERequestContext& ctx);
23 void GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx); 23 void GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx);
24 void GetAudioDevice(Kernel::HLERequestContext& ctx); 24 void GetAudioDevice(Kernel::HLERequestContext& ctx);
25 void GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx);
25 26
26 enum class AudioFeatures : u32 { 27 enum class AudioFeatures : u32 {
27 Splitter, 28 Splitter,
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index dcdfa0e19..970942d3f 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -291,6 +291,7 @@ private:
291class Hid final : public ServiceFramework<Hid> { 291class Hid final : public ServiceFramework<Hid> {
292public: 292public:
293 Hid() : ServiceFramework("hid") { 293 Hid() : ServiceFramework("hid") {
294 // clang-format off
294 static const FunctionInfo functions[] = { 295 static const FunctionInfo functions[] = {
295 {0, &Hid::CreateAppletResource, "CreateAppletResource"}, 296 {0, &Hid::CreateAppletResource, "CreateAppletResource"},
296 {1, &Hid::ActivateDebugPad, "ActivateDebugPad"}, 297 {1, &Hid::ActivateDebugPad, "ActivateDebugPad"},
@@ -333,15 +334,13 @@ public:
333 {102, &Hid::SetSupportedNpadIdType, "SetSupportedNpadIdType"}, 334 {102, &Hid::SetSupportedNpadIdType, "SetSupportedNpadIdType"},
334 {103, &Hid::ActivateNpad, "ActivateNpad"}, 335 {103, &Hid::ActivateNpad, "ActivateNpad"},
335 {104, nullptr, "DeactivateNpad"}, 336 {104, nullptr, "DeactivateNpad"},
336 {106, &Hid::AcquireNpadStyleSetUpdateEventHandle, 337 {106, &Hid::AcquireNpadStyleSetUpdateEventHandle, "AcquireNpadStyleSetUpdateEventHandle"},
337 "AcquireNpadStyleSetUpdateEventHandle"}, 338 {107, &Hid::DisconnectNpad, "DisconnectNpad"},
338 {107, nullptr, "DisconnectNpad"},
339 {108, &Hid::GetPlayerLedPattern, "GetPlayerLedPattern"}, 339 {108, &Hid::GetPlayerLedPattern, "GetPlayerLedPattern"},
340 {109, nullptr, "ActivateNpadWithRevision"}, 340 {109, nullptr, "ActivateNpadWithRevision"},
341 {120, &Hid::SetNpadJoyHoldType, "SetNpadJoyHoldType"}, 341 {120, &Hid::SetNpadJoyHoldType, "SetNpadJoyHoldType"},
342 {121, &Hid::GetNpadJoyHoldType, "GetNpadJoyHoldType"}, 342 {121, &Hid::GetNpadJoyHoldType, "GetNpadJoyHoldType"},
343 {122, &Hid::SetNpadJoyAssignmentModeSingleByDefault, 343 {122, &Hid::SetNpadJoyAssignmentModeSingleByDefault, "SetNpadJoyAssignmentModeSingleByDefault"},
344 "SetNpadJoyAssignmentModeSingleByDefault"},
345 {123, nullptr, "SetNpadJoyAssignmentModeSingleByDefault"}, 344 {123, nullptr, "SetNpadJoyAssignmentModeSingleByDefault"},
346 {124, &Hid::SetNpadJoyAssignmentModeDual, "SetNpadJoyAssignmentModeDual"}, 345 {124, &Hid::SetNpadJoyAssignmentModeDual, "SetNpadJoyAssignmentModeDual"},
347 {125, &Hid::MergeSingleJoyAsDualJoy, "MergeSingleJoyAsDualJoy"}, 346 {125, &Hid::MergeSingleJoyAsDualJoy, "MergeSingleJoyAsDualJoy"},
@@ -398,6 +397,8 @@ public:
398 {1000, nullptr, "SetNpadCommunicationMode"}, 397 {1000, nullptr, "SetNpadCommunicationMode"},
399 {1001, nullptr, "GetNpadCommunicationMode"}, 398 {1001, nullptr, "GetNpadCommunicationMode"},
400 }; 399 };
400 // clang-format on
401
401 RegisterHandlers(functions); 402 RegisterHandlers(functions);
402 403
403 event = Kernel::Event::Create(Kernel::ResetType::OneShot, "hid:EventHandle"); 404 event = Kernel::Event::Create(Kernel::ResetType::OneShot, "hid:EventHandle");
@@ -496,6 +497,12 @@ private:
496 LOG_WARNING(Service_HID, "(STUBBED) called"); 497 LOG_WARNING(Service_HID, "(STUBBED) called");
497 } 498 }
498 499
500 void DisconnectNpad(Kernel::HLERequestContext& ctx) {
501 IPC::ResponseBuilder rb{ctx, 2};
502 rb.Push(RESULT_SUCCESS);
503 LOG_WARNING(Service_HID, "(STUBBED) called");
504 }
505
499 void GetPlayerLedPattern(Kernel::HLERequestContext& ctx) { 506 void GetPlayerLedPattern(Kernel::HLERequestContext& ctx) {
500 IPC::ResponseBuilder rb{ctx, 2}; 507 IPC::ResponseBuilder rb{ctx, 2};
501 rb.Push(RESULT_SUCCESS); 508 rb.Push(RESULT_SUCCESS);
diff --git a/src/core/hle/service/service.h b/src/core/hle/service/service.h
index 8a294c0f2..cd9c74f3d 100644
--- a/src/core/hle/service/service.h
+++ b/src/core/hle/service/service.h
@@ -23,7 +23,7 @@ class HLERequestContext;
23} // namespace Kernel 23} // namespace Kernel
24 24
25namespace FileSys { 25namespace FileSys {
26struct VfsFilesystem; 26class VfsFilesystem;
27} 27}
28 28
29namespace Service { 29namespace Service {
diff --git a/src/core/loader/loader.cpp b/src/core/loader/loader.cpp
index 2f5bfc67c..1f2f31535 100644
--- a/src/core/loader/loader.cpp
+++ b/src/core/loader/loader.cpp
@@ -126,7 +126,7 @@ constexpr std::array<const char*, 36> RESULT_MESSAGES{
126}; 126};
127 127
128std::string GetMessageForResultStatus(ResultStatus status) { 128std::string GetMessageForResultStatus(ResultStatus status) {
129 return GetMessageForResultStatus(static_cast<size_t>(status)); 129 return GetMessageForResultStatus(static_cast<u16>(status));
130} 130}
131 131
132std::string GetMessageForResultStatus(u16 status) { 132std::string GetMessageForResultStatus(u16 status) {
diff --git a/src/core/loader/loader.h b/src/core/loader/loader.h
index cfdadbee3..285363549 100644
--- a/src/core/loader/loader.h
+++ b/src/core/loader/loader.h
@@ -56,7 +56,7 @@ FileType GuessFromFilename(const std::string& name);
56std::string GetFileTypeString(FileType type); 56std::string GetFileTypeString(FileType type);
57 57
58/// Return type for functions in Loader namespace 58/// Return type for functions in Loader namespace
59enum class ResultStatus { 59enum class ResultStatus : u16 {
60 Success, 60 Success,
61 ErrorAlreadyLoaded, 61 ErrorAlreadyLoaded,
62 ErrorNotImplemented, 62 ErrorNotImplemented,
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 9f64b248b..2526ebf28 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -200,6 +200,14 @@ enum class IMinMaxExchange : u64 {
200 XHi = 3, 200 XHi = 3,
201}; 201};
202 202
203enum class XmadMode : u64 {
204 None = 0,
205 CLo = 1,
206 CHi = 2,
207 CSfu = 3,
208 CBcc = 4,
209};
210
203enum class FlowCondition : u64 { 211enum class FlowCondition : u64 {
204 Always = 0xF, 212 Always = 0xF,
205 Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for? 213 Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for?
@@ -457,6 +465,18 @@ union Instruction {
457 } bra; 465 } bra;
458 466
459 union { 467 union {
468 BitField<20, 16, u64> imm20_16;
469 BitField<36, 1, u64> product_shift_left;
470 BitField<37, 1, u64> merge_37;
471 BitField<48, 1, u64> sign_a;
472 BitField<49, 1, u64> sign_b;
473 BitField<50, 3, XmadMode> mode;
474 BitField<52, 1, u64> high_b;
475 BitField<53, 1, u64> high_a;
476 BitField<56, 1, u64> merge_56;
477 } xmad;
478
479 union {
460 BitField<20, 14, u64> offset; 480 BitField<20, 14, u64> offset;
461 BitField<34, 5, u64> index; 481 BitField<34, 5, u64> index;
462 } cbuf34; 482 } cbuf34;
@@ -593,6 +613,7 @@ public:
593 IntegerSetPredicate, 613 IntegerSetPredicate,
594 PredicateSetPredicate, 614 PredicateSetPredicate,
595 Conversion, 615 Conversion,
616 Xmad,
596 Unknown, 617 Unknown,
597 }; 618 };
598 619
@@ -782,10 +803,10 @@ private:
782 INST("010010110101----", Id::ISET_C, Type::IntegerSet, "ISET_C"), 803 INST("010010110101----", Id::ISET_C, Type::IntegerSet, "ISET_C"),
783 INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"), 804 INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"),
784 INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"), 805 INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"),
785 INST("0011011-00------", Id::XMAD_IMM, Type::Arithmetic, "XMAD_IMM"), 806 INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"),
786 INST("0100111---------", Id::XMAD_CR, Type::Arithmetic, "XMAD_CR"), 807 INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"),
787 INST("010100010-------", Id::XMAD_RC, Type::Arithmetic, "XMAD_RC"), 808 INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"),
788 INST("0101101100------", Id::XMAD_RR, Type::Arithmetic, "XMAD_RR"), 809 INST("0101101100------", Id::XMAD_RR, Type::Xmad, "XMAD_RR"),
789 }; 810 };
790#undef INST 811#undef INST
791 std::stable_sort(table.begin(), table.end(), [](const auto& a, const auto& b) { 812 std::stable_sort(table.begin(), table.end(), [](const auto& a, const auto& b) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 38a7b1413..52a649e2f 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -36,30 +36,21 @@ MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
36MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); 36MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255));
37MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); 37MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
38 38
39RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window) : emu_window{window} { 39RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window)
40 : emu_window{window}, stream_buffer(GL_ARRAY_BUFFER, STREAM_BUFFER_SIZE) {
40 // Create sampler objects 41 // Create sampler objects
41 for (size_t i = 0; i < texture_samplers.size(); ++i) { 42 for (size_t i = 0; i < texture_samplers.size(); ++i) {
42 texture_samplers[i].Create(); 43 texture_samplers[i].Create();
43 state.texture_units[i].sampler = texture_samplers[i].sampler.handle; 44 state.texture_units[i].sampler = texture_samplers[i].sampler.handle;
44 } 45 }
45 46
46 // Create SSBOs
47 for (size_t stage = 0; stage < ssbos.size(); ++stage) {
48 for (size_t buffer = 0; buffer < ssbos[stage].size(); ++buffer) {
49 ssbos[stage][buffer].Create();
50 state.draw.const_buffers[stage][buffer].ssbo = ssbos[stage][buffer].handle;
51 }
52 }
53
54 GLint ext_num; 47 GLint ext_num;
55 glGetIntegerv(GL_NUM_EXTENSIONS, &ext_num); 48 glGetIntegerv(GL_NUM_EXTENSIONS, &ext_num);
56 for (GLint i = 0; i < ext_num; i++) { 49 for (GLint i = 0; i < ext_num; i++) {
57 const std::string_view extension{ 50 const std::string_view extension{
58 reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, i))}; 51 reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, i))};
59 52
60 if (extension == "GL_ARB_buffer_storage") { 53 if (extension == "GL_ARB_direct_state_access") {
61 has_ARB_buffer_storage = true;
62 } else if (extension == "GL_ARB_direct_state_access") {
63 has_ARB_direct_state_access = true; 54 has_ARB_direct_state_access = true;
64 } else if (extension == "GL_ARB_separate_shader_objects") { 55 } else if (extension == "GL_ARB_separate_shader_objects") {
65 has_ARB_separate_shader_objects = true; 56 has_ARB_separate_shader_objects = true;
@@ -86,47 +77,31 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window) : emu_wind
86 77
87 hw_vao.Create(); 78 hw_vao.Create();
88 79
89 stream_buffer = OGLStreamBuffer::MakeBuffer(has_ARB_buffer_storage, GL_ARRAY_BUFFER); 80 state.draw.vertex_buffer = stream_buffer.GetHandle();
90 stream_buffer->Create(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE / 2);
91 state.draw.vertex_buffer = stream_buffer->GetHandle();
92 81
93 shader_program_manager = std::make_unique<GLShader::ProgramManager>(); 82 shader_program_manager = std::make_unique<GLShader::ProgramManager>();
94 state.draw.shader_program = 0; 83 state.draw.shader_program = 0;
95 state.draw.vertex_array = hw_vao.handle; 84 state.draw.vertex_array = hw_vao.handle;
96 state.Apply(); 85 state.Apply();
97 86
98 glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer->GetHandle()); 87 glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer.GetHandle());
99
100 for (unsigned index = 0; index < uniform_buffers.size(); ++index) {
101 auto& buffer = uniform_buffers[index];
102 buffer.Create();
103 glBindBuffer(GL_UNIFORM_BUFFER, buffer.handle);
104 glBufferData(GL_UNIFORM_BUFFER, sizeof(GLShader::MaxwellUniformData), nullptr,
105 GL_STREAM_COPY);
106 glBindBufferBase(GL_UNIFORM_BUFFER, index, buffer.handle);
107 }
108 88
109 glEnable(GL_BLEND); 89 glEnable(GL_BLEND);
110 90
91 glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment);
92
111 LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!"); 93 LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!");
112} 94}
113 95
114RasterizerOpenGL::~RasterizerOpenGL() { 96RasterizerOpenGL::~RasterizerOpenGL() {}
115 if (stream_buffer != nullptr) {
116 state.draw.vertex_buffer = stream_buffer->GetHandle();
117 state.Apply();
118 stream_buffer->Release();
119 }
120}
121 97
122std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, 98std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
123 GLintptr buffer_offset) { 99 GLintptr buffer_offset) {
124 MICROPROFILE_SCOPE(OpenGL_VAO); 100 MICROPROFILE_SCOPE(OpenGL_VAO);
125 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; 101 const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
126 const auto& memory_manager = Core::System::GetInstance().GPU().memory_manager;
127 102
128 state.draw.vertex_array = hw_vao.handle; 103 state.draw.vertex_array = hw_vao.handle;
129 state.draw.vertex_buffer = stream_buffer->GetHandle(); 104 state.draw.vertex_buffer = stream_buffer.GetHandle();
130 state.Apply(); 105 state.Apply();
131 106
132 // Upload all guest vertex arrays sequentially to our buffer 107 // Upload all guest vertex arrays sequentially to our buffer
@@ -141,16 +116,15 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
141 ASSERT(end > start); 116 ASSERT(end > start);
142 u64 size = end - start + 1; 117 u64 size = end - start + 1;
143 118
144 // Copy vertex array data 119 GLintptr vertex_buffer_offset;
145 Memory::ReadBlock(*memory_manager->GpuToCpuAddress(start), array_ptr, size); 120 std::tie(array_ptr, buffer_offset, vertex_buffer_offset) =
121 UploadMemory(array_ptr, buffer_offset, start, size);
146 122
147 // Bind the vertex array to the buffer at the current offset. 123 // Bind the vertex array to the buffer at the current offset.
148 glBindVertexBuffer(index, stream_buffer->GetHandle(), buffer_offset, vertex_array.stride); 124 glBindVertexBuffer(index, stream_buffer.GetHandle(), vertex_buffer_offset,
125 vertex_array.stride);
149 126
150 ASSERT_MSG(vertex_array.divisor == 0, "Vertex buffer divisor unimplemented"); 127 ASSERT_MSG(vertex_array.divisor == 0, "Vertex buffer divisor unimplemented");
151
152 array_ptr += size;
153 buffer_offset += size;
154 } 128 }
155 129
156 // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. 130 // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL.
@@ -201,22 +175,12 @@ static GLShader::ProgramCode GetShaderProgramCode(Maxwell::ShaderProgram program
201 return program_code; 175 return program_code;
202} 176}
203 177
204void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { 178std::pair<u8*, GLintptr> RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
205 // Helper function for uploading uniform data
206 const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) {
207 if (has_ARB_direct_state_access) {
208 glCopyNamedBufferSubData(stream_buffer->GetHandle(), handle, offset, 0, size);
209 } else {
210 glBindBuffer(GL_COPY_WRITE_BUFFER, handle);
211 glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_WRITE_BUFFER, offset, 0, size);
212 }
213 };
214
215 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 179 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
216 180
217 // Next available bindpoints to use when uploading the const buffers and textures to the GLSL 181 // Next available bindpoints to use when uploading the const buffers and textures to the GLSL
218 // shaders. The constbuffer bindpoint starts after the shader stage configuration bind points. 182 // shaders. The constbuffer bindpoint starts after the shader stage configuration bind points.
219 u32 current_constbuffer_bindpoint = static_cast<u32>(uniform_buffers.size()); 183 u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
220 u32 current_texture_bindpoint = 0; 184 u32 current_texture_bindpoint = 0;
221 185
222 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { 186 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
@@ -228,22 +192,21 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
228 continue; 192 continue;
229 } 193 }
230 194
195 std::tie(buffer_ptr, buffer_offset) =
196 AlignBuffer(buffer_ptr, buffer_offset, static_cast<size_t>(uniform_buffer_alignment));
197
231 const size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5 198 const size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5
232 199
233 GLShader::MaxwellUniformData ubo{}; 200 GLShader::MaxwellUniformData ubo{};
234 ubo.SetFromRegs(gpu.state.shader_stages[stage]); 201 ubo.SetFromRegs(gpu.state.shader_stages[stage]);
235 std::memcpy(buffer_ptr, &ubo, sizeof(ubo)); 202 std::memcpy(buffer_ptr, &ubo, sizeof(ubo));
236 203
237 // Flush the buffer so that the GPU can see the data we just wrote. 204 // Bind the buffer
238 glFlushMappedBufferRange(GL_ARRAY_BUFFER, buffer_offset, sizeof(ubo)); 205 glBindBufferRange(GL_UNIFORM_BUFFER, stage, stream_buffer.GetHandle(), buffer_offset,
239 206 sizeof(ubo));
240 // Upload uniform data as one UBO per stage
241 const GLintptr ubo_offset = buffer_offset;
242 copy_buffer(uniform_buffers[stage].handle, ubo_offset,
243 sizeof(GLShader::MaxwellUniformData));
244 207
245 buffer_ptr += sizeof(GLShader::MaxwellUniformData); 208 buffer_ptr += sizeof(ubo);
246 buffer_offset += sizeof(GLShader::MaxwellUniformData); 209 buffer_offset += sizeof(ubo);
247 210
248 GLShader::ShaderSetup setup{GetShaderProgramCode(program)}; 211 GLShader::ShaderSetup setup{GetShaderProgramCode(program)};
249 GLShader::ShaderEntries shader_resources; 212 GLShader::ShaderEntries shader_resources;
@@ -282,9 +245,9 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
282 static_cast<Maxwell::ShaderStage>(stage)); 245 static_cast<Maxwell::ShaderStage>(stage));
283 246
284 // Configure the const buffers for this shader stage. 247 // Configure the const buffers for this shader stage.
285 current_constbuffer_bindpoint = 248 std::tie(buffer_ptr, buffer_offset, current_constbuffer_bindpoint) = SetupConstBuffers(
286 SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), gl_stage_program, 249 buffer_ptr, buffer_offset, static_cast<Maxwell::ShaderStage>(stage), gl_stage_program,
287 current_constbuffer_bindpoint, shader_resources.const_buffer_entries); 250 current_constbuffer_bindpoint, shader_resources.const_buffer_entries);
288 251
289 // Configure the textures for this shader stage. 252 // Configure the textures for this shader stage.
290 current_texture_bindpoint = 253 current_texture_bindpoint =
@@ -299,6 +262,8 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
299 } 262 }
300 263
301 shader_program_manager->UseTrivialGeometryShader(); 264 shader_program_manager->UseTrivialGeometryShader();
265
266 return {buffer_ptr, buffer_offset};
302} 267}
303 268
304size_t RasterizerOpenGL::CalculateVertexArraysSize() const { 269size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
@@ -432,6 +397,31 @@ void RasterizerOpenGL::Clear() {
432 } 397 }
433} 398}
434 399
400std::pair<u8*, GLintptr> RasterizerOpenGL::AlignBuffer(u8* buffer_ptr, GLintptr buffer_offset,
401 size_t alignment) {
402 // Align the offset, not the mapped pointer
403 GLintptr offset_aligned =
404 static_cast<GLintptr>(Common::AlignUp(static_cast<size_t>(buffer_offset), alignment));
405 return {buffer_ptr + (offset_aligned - buffer_offset), offset_aligned};
406}
407
408std::tuple<u8*, GLintptr, GLintptr> RasterizerOpenGL::UploadMemory(u8* buffer_ptr,
409 GLintptr buffer_offset,
410 Tegra::GPUVAddr gpu_addr,
411 size_t size, size_t alignment) {
412 std::tie(buffer_ptr, buffer_offset) = AlignBuffer(buffer_ptr, buffer_offset, alignment);
413 GLintptr uploaded_offset = buffer_offset;
414
415 const auto& memory_manager = Core::System::GetInstance().GPU().memory_manager;
416 const boost::optional<VAddr> cpu_addr{memory_manager->GpuToCpuAddress(gpu_addr)};
417 Memory::ReadBlock(*cpu_addr, buffer_ptr, size);
418
419 buffer_ptr += size;
420 buffer_offset += size;
421
422 return {buffer_ptr, buffer_offset, uploaded_offset};
423}
424
435void RasterizerOpenGL::DrawArrays() { 425void RasterizerOpenGL::DrawArrays() {
436 if (accelerate_draw == AccelDraw::Disabled) 426 if (accelerate_draw == AccelDraw::Disabled)
437 return; 427 return;
@@ -456,7 +446,7 @@ void RasterizerOpenGL::DrawArrays() {
456 const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()}; 446 const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()};
457 const unsigned vertex_num{is_indexed ? regs.index_array.count : regs.vertex_buffer.count}; 447 const unsigned vertex_num{is_indexed ? regs.index_array.count : regs.vertex_buffer.count};
458 448
459 state.draw.vertex_buffer = stream_buffer->GetHandle(); 449 state.draw.vertex_buffer = stream_buffer.GetHandle();
460 state.Apply(); 450 state.Apply();
461 451
462 size_t buffer_size = CalculateVertexArraysSize(); 452 size_t buffer_size = CalculateVertexArraysSize();
@@ -466,41 +456,31 @@ void RasterizerOpenGL::DrawArrays() {
466 } 456 }
467 457
468 // Uniform space for the 5 shader stages 458 // Uniform space for the 5 shader stages
469 buffer_size = Common::AlignUp<size_t>(buffer_size, 4) + 459 buffer_size =
470 sizeof(GLShader::MaxwellUniformData) * Maxwell::MaxShaderStage; 460 Common::AlignUp<size_t>(buffer_size, 4) +
461 (sizeof(GLShader::MaxwellUniformData) + uniform_buffer_alignment) * Maxwell::MaxShaderStage;
462
463 // Add space for at least 18 constant buffers
464 buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment);
471 465
472 u8* buffer_ptr; 466 u8* buffer_ptr;
473 GLintptr buffer_offset; 467 GLintptr buffer_offset;
474 std::tie(buffer_ptr, buffer_offset) = 468 std::tie(buffer_ptr, buffer_offset, std::ignore) =
475 stream_buffer->Map(static_cast<GLsizeiptr>(buffer_size), 4); 469 stream_buffer.Map(static_cast<GLsizeiptr>(buffer_size), 4);
470 u8* buffer_ptr_base = buffer_ptr;
476 471
477 u8* offseted_buffer; 472 std::tie(buffer_ptr, buffer_offset) = SetupVertexArrays(buffer_ptr, buffer_offset);
478 std::tie(offseted_buffer, buffer_offset) = SetupVertexArrays(buffer_ptr, buffer_offset);
479
480 offseted_buffer =
481 reinterpret_cast<u8*>(Common::AlignUp(reinterpret_cast<size_t>(offseted_buffer), 4));
482 buffer_offset = Common::AlignUp<size_t>(buffer_offset, 4);
483 473
484 // If indexed mode, copy the index buffer 474 // If indexed mode, copy the index buffer
485 GLintptr index_buffer_offset = 0; 475 GLintptr index_buffer_offset = 0;
486 if (is_indexed) { 476 if (is_indexed) {
487 const auto& memory_manager = Core::System::GetInstance().GPU().memory_manager; 477 std::tie(buffer_ptr, buffer_offset, index_buffer_offset) = UploadMemory(
488 const boost::optional<VAddr> index_data_addr{ 478 buffer_ptr, buffer_offset, regs.index_array.StartAddress(), index_buffer_size);
489 memory_manager->GpuToCpuAddress(regs.index_array.StartAddress())};
490 Memory::ReadBlock(*index_data_addr, offseted_buffer, index_buffer_size);
491
492 index_buffer_offset = buffer_offset;
493 offseted_buffer += index_buffer_size;
494 buffer_offset += index_buffer_size;
495 } 479 }
496 480
497 offseted_buffer = 481 std::tie(buffer_ptr, buffer_offset) = SetupShaders(buffer_ptr, buffer_offset);
498 reinterpret_cast<u8*>(Common::AlignUp(reinterpret_cast<size_t>(offseted_buffer), 4));
499 buffer_offset = Common::AlignUp<size_t>(buffer_offset, 4);
500
501 SetupShaders(offseted_buffer, buffer_offset);
502 482
503 stream_buffer->Unmap(); 483 stream_buffer.Unmap(buffer_ptr - buffer_ptr_base);
504 484
505 shader_program_manager->ApplyTo(state); 485 shader_program_manager->ApplyTo(state);
506 state.Apply(); 486 state.Apply();
@@ -647,36 +627,23 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr
647 } 627 }
648} 628}
649 629
650u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint program, 630std::tuple<u8*, GLintptr, u32> RasterizerOpenGL::SetupConstBuffers(
651 u32 current_bindpoint, 631 u8* buffer_ptr, GLintptr buffer_offset, Maxwell::ShaderStage stage, GLuint program,
652 const std::vector<GLShader::ConstBufferEntry>& entries) { 632 u32 current_bindpoint, const std::vector<GLShader::ConstBufferEntry>& entries) {
653 const auto& gpu = Core::System::GetInstance().GPU(); 633 const auto& gpu = Core::System::GetInstance().GPU();
654 const auto& maxwell3d = gpu.Maxwell3D(); 634 const auto& maxwell3d = gpu.Maxwell3D();
655 635
656 // Reset all buffer draw state for this stage.
657 for (auto& buffer : state.draw.const_buffers[static_cast<size_t>(stage)]) {
658 buffer.bindpoint = 0;
659 buffer.enabled = false;
660 }
661
662 // Upload only the enabled buffers from the 16 constbuffers of each shader stage 636 // Upload only the enabled buffers from the 16 constbuffers of each shader stage
663 const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<size_t>(stage)]; 637 const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<size_t>(stage)];
664 638
665 for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { 639 for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
666 const auto& used_buffer = entries[bindpoint]; 640 const auto& used_buffer = entries[bindpoint];
667 const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()]; 641 const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()];
668 auto& buffer_draw_state =
669 state.draw.const_buffers[static_cast<size_t>(stage)][used_buffer.GetIndex()];
670 642
671 if (!buffer.enabled) { 643 if (!buffer.enabled) {
672 continue; 644 continue;
673 } 645 }
674 646
675 buffer_draw_state.enabled = true;
676 buffer_draw_state.bindpoint = current_bindpoint + bindpoint;
677
678 boost::optional<VAddr> addr = gpu.memory_manager->GpuToCpuAddress(buffer.address);
679
680 size_t size = 0; 647 size_t size = 0;
681 648
682 if (used_buffer.IsIndirect()) { 649 if (used_buffer.IsIndirect()) {
@@ -698,25 +665,26 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr
698 size = Common::AlignUp(size, sizeof(GLvec4)); 665 size = Common::AlignUp(size, sizeof(GLvec4));
699 ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big"); 666 ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big");
700 667
701 std::vector<u8> data(size); 668 GLintptr const_buffer_offset;
702 Memory::ReadBlock(*addr, data.data(), data.size()); 669 std::tie(buffer_ptr, buffer_offset, const_buffer_offset) =
670 UploadMemory(buffer_ptr, buffer_offset, buffer.address, size,
671 static_cast<size_t>(uniform_buffer_alignment));
703 672
704 glBindBuffer(GL_UNIFORM_BUFFER, buffer_draw_state.ssbo); 673 glBindBufferRange(GL_UNIFORM_BUFFER, current_bindpoint + bindpoint,
705 glBufferData(GL_UNIFORM_BUFFER, data.size(), data.data(), GL_DYNAMIC_DRAW); 674 stream_buffer.GetHandle(), const_buffer_offset, size);
706 glBindBuffer(GL_UNIFORM_BUFFER, 0);
707 675
708 // Now configure the bindpoint of the buffer inside the shader 676 // Now configure the bindpoint of the buffer inside the shader
709 const std::string buffer_name = used_buffer.GetName(); 677 const std::string buffer_name = used_buffer.GetName();
710 const GLuint index = 678 const GLuint index =
711 glGetProgramResourceIndex(program, GL_UNIFORM_BLOCK, buffer_name.c_str()); 679 glGetProgramResourceIndex(program, GL_UNIFORM_BLOCK, buffer_name.c_str());
712 if (index != GL_INVALID_INDEX) { 680 if (index != GL_INVALID_INDEX) {
713 glUniformBlockBinding(program, index, buffer_draw_state.bindpoint); 681 glUniformBlockBinding(program, index, current_bindpoint + bindpoint);
714 } 682 }
715 } 683 }
716 684
717 state.Apply(); 685 state.Apply();
718 686
719 return current_bindpoint + static_cast<u32>(entries.size()); 687 return {buffer_ptr, buffer_offset, current_bindpoint + static_cast<u32>(entries.size())};
720} 688}
721 689
722u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program, u32 current_unit, 690u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program, u32 current_unit,
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index bd01dc0ae..74307f626 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -7,6 +7,7 @@
7#include <array> 7#include <array>
8#include <cstddef> 8#include <cstddef>
9#include <memory> 9#include <memory>
10#include <tuple>
10#include <utility> 11#include <utility>
11#include <vector> 12#include <vector>
12#include <glad/glad.h> 13#include <glad/glad.h>
@@ -100,9 +101,10 @@ private:
100 * @param entries Vector describing the buffers that are actually used in the guest shader. 101 * @param entries Vector describing the buffers that are actually used in the guest shader.
101 * @returns The next available bindpoint for use in the next shader stage. 102 * @returns The next available bindpoint for use in the next shader stage.
102 */ 103 */
103 u32 SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, GLuint program, 104 std::tuple<u8*, GLintptr, u32> SetupConstBuffers(
104 u32 current_bindpoint, 105 u8* buffer_ptr, GLintptr buffer_offset, Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
105 const std::vector<GLShader::ConstBufferEntry>& entries); 106 GLuint program, u32 current_bindpoint,
107 const std::vector<GLShader::ConstBufferEntry>& entries);
106 108
107 /* 109 /*
108 * Configures the current textures to use for the draw command. 110 * Configures the current textures to use for the draw command.
@@ -139,7 +141,6 @@ private:
139 /// Syncs the blend state to match the guest state 141 /// Syncs the blend state to match the guest state
140 void SyncBlendState(); 142 void SyncBlendState();
141 143
142 bool has_ARB_buffer_storage = false;
143 bool has_ARB_direct_state_access = false; 144 bool has_ARB_direct_state_access = false;
144 bool has_ARB_separate_shader_objects = false; 145 bool has_ARB_separate_shader_objects = false;
145 bool has_ARB_vertex_attrib_binding = false; 146 bool has_ARB_vertex_attrib_binding = false;
@@ -155,22 +156,24 @@ private:
155 OGLVertexArray hw_vao; 156 OGLVertexArray hw_vao;
156 157
157 std::array<SamplerInfo, GLShader::NumTextureSamplers> texture_samplers; 158 std::array<SamplerInfo, GLShader::NumTextureSamplers> texture_samplers;
158 std::array<std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers>,
159 Tegra::Engines::Maxwell3D::Regs::MaxShaderStage>
160 ssbos;
161 159
162 static constexpr size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; 160 static constexpr size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
163 std::unique_ptr<OGLStreamBuffer> stream_buffer; 161 OGLStreamBuffer stream_buffer;
164 OGLBuffer uniform_buffer; 162 OGLBuffer uniform_buffer;
165 OGLFramebuffer framebuffer; 163 OGLFramebuffer framebuffer;
164 GLint uniform_buffer_alignment;
166 165
167 size_t CalculateVertexArraysSize() const; 166 size_t CalculateVertexArraysSize() const;
168 167
169 std::pair<u8*, GLintptr> SetupVertexArrays(u8* array_ptr, GLintptr buffer_offset); 168 std::pair<u8*, GLintptr> SetupVertexArrays(u8* array_ptr, GLintptr buffer_offset);
170 169
171 std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxShaderStage> uniform_buffers; 170 std::pair<u8*, GLintptr> SetupShaders(u8* buffer_ptr, GLintptr buffer_offset);
172 171
173 void SetupShaders(u8* buffer_ptr, GLintptr buffer_offset); 172 std::pair<u8*, GLintptr> AlignBuffer(u8* buffer_ptr, GLintptr buffer_offset, size_t alignment);
173
174 std::tuple<u8*, GLintptr, GLintptr> UploadMemory(u8* buffer_ptr, GLintptr buffer_offset,
175 Tegra::GPUVAddr gpu_addr, size_t size,
176 size_t alignment = 4);
174 177
175 enum class AccelDraw { Disabled, Arrays, Indexed }; 178 enum class AccelDraw { Disabled, Arrays, Indexed };
176 AccelDraw accelerate_draw = AccelDraw::Disabled; 179 AccelDraw accelerate_draw = AccelDraw::Disabled;
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 7e038ac86..6834d7085 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -376,6 +376,8 @@ public:
376 return value; 376 return value;
377 } else if (type == GLSLRegister::Type::Integer) { 377 } else if (type == GLSLRegister::Type::Integer) {
378 return "floatBitsToInt(" + value + ')'; 378 return "floatBitsToInt(" + value + ')';
379 } else if (type == GLSLRegister::Type::UnsignedInteger) {
380 return "floatBitsToUint(" + value + ')';
379 } else { 381 } else {
380 UNREACHABLE(); 382 UNREACHABLE();
381 } 383 }
@@ -1630,6 +1632,99 @@ private:
1630 } 1632 }
1631 break; 1633 break;
1632 } 1634 }
1635 case OpCode::Type::Xmad: {
1636 ASSERT_MSG(!instr.xmad.sign_a, "Unimplemented");
1637 ASSERT_MSG(!instr.xmad.sign_b, "Unimplemented");
1638
1639 std::string op_a{regs.GetRegisterAsInteger(instr.gpr8, 0, instr.xmad.sign_a)};
1640 std::string op_b;
1641 std::string op_c;
1642
1643 // TODO(bunnei): Needs to be fixed once op_a or op_b is signed
1644 ASSERT_MSG(instr.xmad.sign_a == instr.xmad.sign_b, "Unimplemented");
1645 const bool is_signed{instr.xmad.sign_a == 1};
1646
1647 bool is_merge{};
1648 switch (opcode->GetId()) {
1649 case OpCode::Id::XMAD_CR: {
1650 is_merge = instr.xmad.merge_56;
1651 op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
1652 instr.xmad.sign_b ? GLSLRegister::Type::Integer
1653 : GLSLRegister::Type::UnsignedInteger);
1654 op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed);
1655 break;
1656 }
1657 case OpCode::Id::XMAD_RR: {
1658 is_merge = instr.xmad.merge_37;
1659 op_b += regs.GetRegisterAsInteger(instr.gpr20, 0, instr.xmad.sign_b);
1660 op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed);
1661 break;
1662 }
1663 case OpCode::Id::XMAD_RC: {
1664 op_b += regs.GetRegisterAsInteger(instr.gpr39, 0, instr.xmad.sign_b);
1665 op_c += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
1666 is_signed ? GLSLRegister::Type::Integer
1667 : GLSLRegister::Type::UnsignedInteger);
1668 break;
1669 }
1670 case OpCode::Id::XMAD_IMM: {
1671 is_merge = instr.xmad.merge_37;
1672 op_b += std::to_string(instr.xmad.imm20_16);
1673 op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed);
1674 break;
1675 }
1676 default: {
1677 LOG_CRITICAL(HW_GPU, "Unhandled XMAD instruction: {}", opcode->GetName());
1678 UNREACHABLE();
1679 }
1680 }
1681
1682 // TODO(bunnei): Ensure this is right with signed operands
1683 if (instr.xmad.high_a) {
1684 op_a = "((" + op_a + ") >> 16)";
1685 } else {
1686 op_a = "((" + op_a + ") & 0xFFFF)";
1687 }
1688
1689 std::string src2 = '(' + op_b + ')'; // Preserve original source 2
1690 if (instr.xmad.high_b) {
1691 op_b = '(' + src2 + " >> 16)";
1692 } else {
1693 op_b = '(' + src2 + " & 0xFFFF)";
1694 }
1695
1696 std::string product = '(' + op_a + " * " + op_b + ')';
1697 if (instr.xmad.product_shift_left) {
1698 product = '(' + product + " << 16)";
1699 }
1700
1701 switch (instr.xmad.mode) {
1702 case Tegra::Shader::XmadMode::None:
1703 break;
1704 case Tegra::Shader::XmadMode::CLo:
1705 op_c = "((" + op_c + ") & 0xFFFF)";
1706 break;
1707 case Tegra::Shader::XmadMode::CHi:
1708 op_c = "((" + op_c + ") >> 16)";
1709 break;
1710 case Tegra::Shader::XmadMode::CBcc:
1711 op_c = "((" + op_c + ") + (" + src2 + "<< 16))";
1712 break;
1713 default: {
1714 LOG_CRITICAL(HW_GPU, "Unhandled XMAD mode: {}",
1715 static_cast<u32>(instr.xmad.mode.Value()));
1716 UNREACHABLE();
1717 }
1718 }
1719
1720 std::string sum{'(' + product + " + " + op_c + ')'};
1721 if (is_merge) {
1722 sum = "((" + sum + " & 0xFFFF) | (" + src2 + "<< 16))";
1723 }
1724
1725 regs.SetRegisterToInteger(instr.gpr0, is_signed, 0, sum, 1, 1);
1726 break;
1727 }
1633 default: { 1728 default: {
1634 switch (opcode->GetId()) { 1729 switch (opcode->GetId()) {
1635 case OpCode::Id::EXIT: { 1730 case OpCode::Id::EXIT: {
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 68bacd4c5..1d1975179 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -203,21 +203,6 @@ void OpenGLState::Apply() const {
203 } 203 }
204 } 204 }
205 205
206 // Constbuffers
207 for (std::size_t stage = 0; stage < draw.const_buffers.size(); ++stage) {
208 for (std::size_t buffer_id = 0; buffer_id < draw.const_buffers[stage].size(); ++buffer_id) {
209 const auto& current = cur_state.draw.const_buffers[stage][buffer_id];
210 const auto& new_state = draw.const_buffers[stage][buffer_id];
211
212 if (current.enabled != new_state.enabled || current.bindpoint != new_state.bindpoint ||
213 current.ssbo != new_state.ssbo) {
214 if (new_state.enabled) {
215 glBindBufferBase(GL_UNIFORM_BUFFER, new_state.bindpoint, new_state.ssbo);
216 }
217 }
218 }
219 }
220
221 // Framebuffer 206 // Framebuffer
222 if (draw.read_framebuffer != cur_state.draw.read_framebuffer) { 207 if (draw.read_framebuffer != cur_state.draw.read_framebuffer) {
223 glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer); 208 glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 5c7b636e4..bdb02ba25 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -119,12 +119,6 @@ public:
119 GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING 119 GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING
120 GLuint shader_program; // GL_CURRENT_PROGRAM 120 GLuint shader_program; // GL_CURRENT_PROGRAM
121 GLuint program_pipeline; // GL_PROGRAM_PIPELINE_BINDING 121 GLuint program_pipeline; // GL_PROGRAM_PIPELINE_BINDING
122 struct ConstBufferConfig {
123 bool enabled = false;
124 GLuint bindpoint;
125 GLuint ssbo;
126 };
127 std::array<std::array<ConstBufferConfig, Regs::MaxConstBuffers>, 5> const_buffers;
128 } draw; 122 } draw;
129 123
130 struct { 124 struct {
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
index a2713e9f0..03a8ed8b7 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -9,174 +9,91 @@
9#include "video_core/renderer_opengl/gl_state.h" 9#include "video_core/renderer_opengl/gl_state.h"
10#include "video_core/renderer_opengl/gl_stream_buffer.h" 10#include "video_core/renderer_opengl/gl_stream_buffer.h"
11 11
12class OrphanBuffer : public OGLStreamBuffer { 12OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coherent)
13public: 13 : gl_target(target), buffer_size(size) {
14 explicit OrphanBuffer(GLenum target) : OGLStreamBuffer(target) {} 14 gl_buffer.Create();
15 ~OrphanBuffer() override; 15 glBindBuffer(gl_target, gl_buffer.handle);
16
17private:
18 void Create(size_t size, size_t sync_subdivide) override;
19 void Release() override;
20
21 std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) override;
22 void Unmap() override;
23
24 std::vector<u8> data;
25};
26
27class StorageBuffer : public OGLStreamBuffer {
28public:
29 explicit StorageBuffer(GLenum target) : OGLStreamBuffer(target) {}
30 ~StorageBuffer() override;
31
32private:
33 void Create(size_t size, size_t sync_subdivide) override;
34 void Release() override;
35
36 std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) override;
37 void Unmap() override;
38
39 struct Fence {
40 OGLSync sync;
41 size_t offset;
42 };
43 std::deque<Fence> head;
44 std::deque<Fence> tail;
45
46 u8* mapped_ptr;
47};
48
49OGLStreamBuffer::OGLStreamBuffer(GLenum target) {
50 gl_target = target;
51}
52
53GLuint OGLStreamBuffer::GetHandle() const {
54 return gl_buffer.handle;
55}
56 16
57std::unique_ptr<OGLStreamBuffer> OGLStreamBuffer::MakeBuffer(bool storage_buffer, GLenum target) { 17 GLsizeiptr allocate_size = size;
58 if (storage_buffer) { 18 if (target == GL_ARRAY_BUFFER) {
59 return std::make_unique<StorageBuffer>(target); 19 // On AMD GPU there is a strange crash in indexed drawing. The crash happens when the buffer
20 // read position is near the end and is an out-of-bound access to the vertex buffer. This is
21 // probably a bug in the driver and is related to the usage of vec3<byte> attributes in the
22 // vertex array. Doubling the allocation size for the vertex buffer seems to avoid the
23 // crash.
24 allocate_size *= 2;
60 } 25 }
61 return std::make_unique<OrphanBuffer>(target);
62}
63 26
64OrphanBuffer::~OrphanBuffer() { 27 if (GLAD_GL_ARB_buffer_storage) {
65 Release(); 28 persistent = true;
29 coherent = prefer_coherent;
30 GLbitfield flags =
31 GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0);
32 glBufferStorage(gl_target, allocate_size, nullptr, flags);
33 mapped_ptr = static_cast<u8*>(glMapBufferRange(
34 gl_target, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT)));
35 } else {
36 glBufferData(gl_target, allocate_size, nullptr, GL_STREAM_DRAW);
37 }
66} 38}
67 39
68void OrphanBuffer::Create(size_t size, size_t /*sync_subdivide*/) { 40OGLStreamBuffer::~OGLStreamBuffer() {
69 buffer_pos = 0; 41 if (persistent) {
70 buffer_size = size;
71 data.resize(buffer_size);
72
73 if (gl_buffer.handle == 0) {
74 gl_buffer.Create();
75 glBindBuffer(gl_target, gl_buffer.handle); 42 glBindBuffer(gl_target, gl_buffer.handle);
43 glUnmapBuffer(gl_target);
76 } 44 }
77
78 glBufferData(gl_target, static_cast<GLsizeiptr>(buffer_size), nullptr, GL_STREAM_DRAW);
79}
80
81void OrphanBuffer::Release() {
82 gl_buffer.Release(); 45 gl_buffer.Release();
83} 46}
84 47
85std::pair<u8*, GLintptr> OrphanBuffer::Map(size_t size, size_t alignment) { 48GLuint OGLStreamBuffer::GetHandle() const {
86 buffer_pos = Common::AlignUp(buffer_pos, alignment); 49 return gl_buffer.handle;
87
88 if (buffer_pos + size > buffer_size) {
89 Create(std::max(buffer_size, size), 0);
90 }
91
92 mapped_size = size;
93 return std::make_pair(&data[buffer_pos], static_cast<GLintptr>(buffer_pos));
94}
95
96void OrphanBuffer::Unmap() {
97 glBufferSubData(gl_target, static_cast<GLintptr>(buffer_pos),
98 static_cast<GLsizeiptr>(mapped_size), &data[buffer_pos]);
99 buffer_pos += mapped_size;
100}
101
102StorageBuffer::~StorageBuffer() {
103 Release();
104} 50}
105 51
106void StorageBuffer::Create(size_t size, size_t sync_subdivide) { 52GLsizeiptr OGLStreamBuffer::GetSize() const {
107 if (gl_buffer.handle != 0) 53 return buffer_size;
108 return;
109
110 buffer_pos = 0;
111 buffer_size = size;
112 buffer_sync_subdivide = std::max<size_t>(sync_subdivide, 1);
113
114 gl_buffer.Create();
115 glBindBuffer(gl_target, gl_buffer.handle);
116
117 glBufferStorage(gl_target, static_cast<GLsizeiptr>(buffer_size), nullptr,
118 GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT);
119 mapped_ptr = reinterpret_cast<u8*>(
120 glMapBufferRange(gl_target, 0, static_cast<GLsizeiptr>(buffer_size),
121 GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_FLUSH_EXPLICIT_BIT));
122} 54}
123 55
124void StorageBuffer::Release() { 56std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
125 if (gl_buffer.handle == 0)
126 return;
127
128 glUnmapBuffer(gl_target);
129
130 gl_buffer.Release();
131 head.clear();
132 tail.clear();
133}
134
135std::pair<u8*, GLintptr> StorageBuffer::Map(size_t size, size_t alignment) {
136 ASSERT(size <= buffer_size); 57 ASSERT(size <= buffer_size);
58 ASSERT(alignment <= buffer_size);
59 mapped_size = size;
137 60
138 OGLSync sync; 61 if (alignment > 0) {
139 62 buffer_pos = Common::AlignUp<size_t>(buffer_pos, alignment);
140 buffer_pos = Common::AlignUp(buffer_pos, alignment);
141 size_t effective_offset = Common::AlignDown(buffer_pos, buffer_sync_subdivide);
142
143 if (!head.empty() &&
144 (effective_offset > head.back().offset || buffer_pos + size > buffer_size)) {
145 ASSERT(head.back().sync.handle == 0);
146 head.back().sync.Create();
147 } 63 }
148 64
65 bool invalidate = false;
149 if (buffer_pos + size > buffer_size) { 66 if (buffer_pos + size > buffer_size) {
150 if (!tail.empty()) {
151 std::swap(sync, tail.back().sync);
152 tail.clear();
153 }
154 std::swap(tail, head);
155 buffer_pos = 0; 67 buffer_pos = 0;
156 effective_offset = 0; 68 invalidate = true;
157 }
158 69
159 while (!tail.empty() && buffer_pos + size > tail.front().offset) { 70 if (persistent) {
160 std::swap(sync, tail.front().sync); 71 glUnmapBuffer(gl_target);
161 tail.pop_front(); 72 }
162 } 73 }
163 74
164 if (sync.handle != 0) { 75 if (invalidate | !persistent) {
165 glClientWaitSync(sync.handle, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); 76 GLbitfield flags = GL_MAP_WRITE_BIT | (persistent ? GL_MAP_PERSISTENT_BIT : 0) |
166 sync.Release(); 77 (coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) |
78 (invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT);
79 mapped_ptr = static_cast<u8*>(
80 glMapBufferRange(gl_target, buffer_pos, buffer_size - buffer_pos, flags));
81 mapped_offset = buffer_pos;
167 } 82 }
168 83
169 if (head.empty() || effective_offset > head.back().offset) { 84 return std::make_tuple(mapped_ptr + buffer_pos - mapped_offset, buffer_pos, invalidate);
170 head.emplace_back(); 85}
171 head.back().offset = effective_offset; 86
87void OGLStreamBuffer::Unmap(GLsizeiptr size) {
88 ASSERT(size <= mapped_size);
89
90 if (!coherent && size > 0) {
91 glFlushMappedBufferRange(gl_target, buffer_pos - mapped_offset, size);
172 } 92 }
173 93
174 mapped_size = size; 94 if (!persistent) {
175 return std::make_pair(&mapped_ptr[buffer_pos], static_cast<GLintptr>(buffer_pos)); 95 glUnmapBuffer(gl_target);
176} 96 }
177 97
178void StorageBuffer::Unmap() { 98 buffer_pos += size;
179 glFlushMappedBufferRange(gl_target, static_cast<GLintptr>(buffer_pos),
180 static_cast<GLsizeiptr>(mapped_size));
181 buffer_pos += mapped_size;
182} 99}
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h
index e78dc5784..45592daaf 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.h
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.h
@@ -2,35 +2,41 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#pragma once 5#include <tuple>
6
7#include <memory>
8#include <glad/glad.h> 6#include <glad/glad.h>
9#include "common/common_types.h" 7#include "common/common_types.h"
10#include "video_core/renderer_opengl/gl_resource_manager.h" 8#include "video_core/renderer_opengl/gl_resource_manager.h"
11 9
12class OGLStreamBuffer : private NonCopyable { 10class OGLStreamBuffer : private NonCopyable {
13public: 11public:
14 explicit OGLStreamBuffer(GLenum target); 12 explicit OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coherent = false);
15 virtual ~OGLStreamBuffer() = default; 13 ~OGLStreamBuffer();
16
17public:
18 static std::unique_ptr<OGLStreamBuffer> MakeBuffer(bool storage_buffer, GLenum target);
19
20 virtual void Create(size_t size, size_t sync_subdivide) = 0;
21 virtual void Release() {}
22 14
23 GLuint GetHandle() const; 15 GLuint GetHandle() const;
16 GLsizeiptr GetSize() const;
17
18 /*
19 * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
20 * and the optional alignment requirement.
21 * If the buffer is full, the whole buffer is reallocated which invalidates old chunks.
22 * The return values are the pointer to the new chunk, the offset within the buffer,
23 * and the invalidation flag for previous chunks.
24 * The actual used size must be specified on unmapping the chunk.
25 */
26 std::tuple<u8*, GLintptr, bool> Map(GLsizeiptr size, GLintptr alignment = 0);
24 27
25 virtual std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) = 0; 28 void Unmap(GLsizeiptr size);
26 virtual void Unmap() = 0;
27 29
28protected: 30private:
29 OGLBuffer gl_buffer; 31 OGLBuffer gl_buffer;
30 GLenum gl_target; 32 GLenum gl_target;
31 33
32 size_t buffer_pos = 0; 34 bool coherent = false;
33 size_t buffer_size = 0; 35 bool persistent = false;
34 size_t buffer_sync_subdivide = 0; 36
35 size_t mapped_size = 0; 37 GLintptr buffer_pos = 0;
38 GLsizeiptr buffer_size = 0;
39 GLintptr mapped_offset = 0;
40 GLsizeiptr mapped_size = 0;
41 u8* mapped_ptr = nullptr;
36}; 42};