diff options
Diffstat (limited to 'src')
40 files changed, 562 insertions, 420 deletions
diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp index 282f345c5..6ebed3fb0 100644 --- a/src/audio_core/audio_renderer.cpp +++ b/src/audio_core/audio_renderer.cpp | |||
| @@ -26,6 +26,18 @@ AudioRenderer::AudioRenderer(AudioRendererParameter params, | |||
| 26 | QueueMixedBuffer(2); | 26 | QueueMixedBuffer(2); |
| 27 | } | 27 | } |
| 28 | 28 | ||
| 29 | u32 AudioRenderer::GetSampleRate() const { | ||
| 30 | return worker_params.sample_rate; | ||
| 31 | } | ||
| 32 | |||
| 33 | u32 AudioRenderer::GetSampleCount() const { | ||
| 34 | return worker_params.sample_count; | ||
| 35 | } | ||
| 36 | |||
| 37 | u32 AudioRenderer::GetMixBufferCount() const { | ||
| 38 | return worker_params.mix_buffer_count; | ||
| 39 | } | ||
| 40 | |||
| 29 | std::vector<u8> AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_params) { | 41 | std::vector<u8> AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_params) { |
| 30 | // Copy UpdateDataHeader struct | 42 | // Copy UpdateDataHeader struct |
| 31 | UpdateDataHeader config{}; | 43 | UpdateDataHeader config{}; |
diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h index 6950a4681..13c5d0adc 100644 --- a/src/audio_core/audio_renderer.h +++ b/src/audio_core/audio_renderer.h | |||
| @@ -26,7 +26,7 @@ enum class PlayState : u8 { | |||
| 26 | struct AudioRendererParameter { | 26 | struct AudioRendererParameter { |
| 27 | u32_le sample_rate; | 27 | u32_le sample_rate; |
| 28 | u32_le sample_count; | 28 | u32_le sample_count; |
| 29 | u32_le unknown_8; | 29 | u32_le mix_buffer_count; |
| 30 | u32_le unknown_c; | 30 | u32_le unknown_c; |
| 31 | u32_le voice_count; | 31 | u32_le voice_count; |
| 32 | u32_le sink_count; | 32 | u32_le sink_count; |
| @@ -160,6 +160,9 @@ public: | |||
| 160 | std::vector<u8> UpdateAudioRenderer(const std::vector<u8>& input_params); | 160 | std::vector<u8> UpdateAudioRenderer(const std::vector<u8>& input_params); |
| 161 | void QueueMixedBuffer(Buffer::Tag tag); | 161 | void QueueMixedBuffer(Buffer::Tag tag); |
| 162 | void ReleaseAndQueueBuffers(); | 162 | void ReleaseAndQueueBuffers(); |
| 163 | u32 GetSampleRate() const; | ||
| 164 | u32 GetSampleCount() const; | ||
| 165 | u32 GetMixBufferCount() const; | ||
| 163 | 166 | ||
| 164 | private: | 167 | private: |
| 165 | class VoiceState { | 168 | class VoiceState { |
diff --git a/src/audio_core/cubeb_sink.cpp b/src/audio_core/cubeb_sink.cpp index 1501ef1f4..5a1177d0c 100644 --- a/src/audio_core/cubeb_sink.cpp +++ b/src/audio_core/cubeb_sink.cpp | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <cstring> | 6 | #include <cstring> |
| 7 | #include <mutex> | ||
| 7 | 8 | ||
| 8 | #include "audio_core/cubeb_sink.h" | 9 | #include "audio_core/cubeb_sink.h" |
| 9 | #include "audio_core/stream.h" | 10 | #include "audio_core/stream.h" |
| @@ -66,6 +67,8 @@ public: | |||
| 66 | return; | 67 | return; |
| 67 | } | 68 | } |
| 68 | 69 | ||
| 70 | std::lock_guard lock{queue_mutex}; | ||
| 71 | |||
| 69 | queue.reserve(queue.size() + samples.size() * GetNumChannels()); | 72 | queue.reserve(queue.size() + samples.size() * GetNumChannels()); |
| 70 | 73 | ||
| 71 | if (is_6_channel) { | 74 | if (is_6_channel) { |
| @@ -94,6 +97,7 @@ private: | |||
| 94 | u32 num_channels{}; | 97 | u32 num_channels{}; |
| 95 | bool is_6_channel{}; | 98 | bool is_6_channel{}; |
| 96 | 99 | ||
| 100 | std::mutex queue_mutex; | ||
| 97 | std::vector<s16> queue; | 101 | std::vector<s16> queue; |
| 98 | 102 | ||
| 99 | static long DataCallback(cubeb_stream* stream, void* user_data, const void* input_buffer, | 103 | static long DataCallback(cubeb_stream* stream, void* user_data, const void* input_buffer, |
| @@ -153,6 +157,8 @@ long SinkStreamImpl::DataCallback(cubeb_stream* stream, void* user_data, const v | |||
| 153 | return {}; | 157 | return {}; |
| 154 | } | 158 | } |
| 155 | 159 | ||
| 160 | std::lock_guard lock{impl->queue_mutex}; | ||
| 161 | |||
| 156 | const size_t frames_to_write{ | 162 | const size_t frames_to_write{ |
| 157 | std::min(impl->queue.size() / impl->GetNumChannels(), static_cast<size_t>(num_frames))}; | 163 | std::min(impl->queue.size() / impl->GetNumChannels(), static_cast<size_t>(num_frames))}; |
| 158 | 164 | ||
diff --git a/src/common/thread_queue_list.h b/src/common/thread_queue_list.h index 38a450d69..133122c5f 100644 --- a/src/common/thread_queue_list.h +++ b/src/common/thread_queue_list.h | |||
| @@ -16,7 +16,7 @@ struct ThreadQueueList { | |||
| 16 | // (dynamically resizable) circular buffers to remove their overhead when | 16 | // (dynamically resizable) circular buffers to remove their overhead when |
| 17 | // inserting and popping. | 17 | // inserting and popping. |
| 18 | 18 | ||
| 19 | typedef unsigned int Priority; | 19 | using Priority = unsigned int; |
| 20 | 20 | ||
| 21 | // Number of priority levels. (Valid levels are [0..NUM_QUEUES).) | 21 | // Number of priority levels. (Valid levels are [0..NUM_QUEUES).) |
| 22 | static const Priority NUM_QUEUES = N; | 22 | static const Priority NUM_QUEUES = N; |
| @@ -26,9 +26,9 @@ struct ThreadQueueList { | |||
| 26 | } | 26 | } |
| 27 | 27 | ||
| 28 | // Only for debugging, returns priority level. | 28 | // Only for debugging, returns priority level. |
| 29 | Priority contains(const T& uid) { | 29 | Priority contains(const T& uid) const { |
| 30 | for (Priority i = 0; i < NUM_QUEUES; ++i) { | 30 | for (Priority i = 0; i < NUM_QUEUES; ++i) { |
| 31 | Queue& cur = queues[i]; | 31 | const Queue& cur = queues[i]; |
| 32 | if (std::find(cur.data.cbegin(), cur.data.cend(), uid) != cur.data.cend()) { | 32 | if (std::find(cur.data.cbegin(), cur.data.cend(), uid) != cur.data.cend()) { |
| 33 | return i; | 33 | return i; |
| 34 | } | 34 | } |
| @@ -37,8 +37,8 @@ struct ThreadQueueList { | |||
| 37 | return -1; | 37 | return -1; |
| 38 | } | 38 | } |
| 39 | 39 | ||
| 40 | T get_first() { | 40 | T get_first() const { |
| 41 | Queue* cur = first; | 41 | const Queue* cur = first; |
| 42 | while (cur != nullptr) { | 42 | while (cur != nullptr) { |
| 43 | if (!cur->data.empty()) { | 43 | if (!cur->data.empty()) { |
| 44 | return cur->data.front(); | 44 | return cur->data.front(); |
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp index ceb3f7683..0996f129c 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic.cpp | |||
| @@ -86,7 +86,16 @@ public: | |||
| 86 | } | 86 | } |
| 87 | 87 | ||
| 88 | void AddTicks(u64 ticks) override { | 88 | void AddTicks(u64 ticks) override { |
| 89 | CoreTiming::AddTicks(ticks - num_interpreted_instructions); | 89 | // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a |
| 90 | // rough approximation of the amount of executed ticks in the system, it may be thrown off | ||
| 91 | // if not all cores are doing a similar amount of work. Instead of doing this, we should | ||
| 92 | // device a way so that timing is consistent across all cores without increasing the ticks 4 | ||
| 93 | // times. | ||
| 94 | u64 amortized_ticks = (ticks - num_interpreted_instructions) / Core::NUM_CPU_CORES; | ||
| 95 | // Always execute at least one tick. | ||
| 96 | amortized_ticks = std::max<u64>(amortized_ticks, 1); | ||
| 97 | |||
| 98 | CoreTiming::AddTicks(amortized_ticks); | ||
| 90 | num_interpreted_instructions = 0; | 99 | num_interpreted_instructions = 0; |
| 91 | } | 100 | } |
| 92 | u64 GetTicksRemaining() override { | 101 | u64 GetTicksRemaining() override { |
diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp index 46a522fcd..b042ee02b 100644 --- a/src/core/core_cpu.cpp +++ b/src/core/core_cpu.cpp | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include "core/core_timing.h" | 14 | #include "core/core_timing.h" |
| 15 | #include "core/hle/kernel/scheduler.h" | 15 | #include "core/hle/kernel/scheduler.h" |
| 16 | #include "core/hle/kernel/thread.h" | 16 | #include "core/hle/kernel/thread.h" |
| 17 | #include "core/hle/lock.h" | ||
| 17 | #include "core/settings.h" | 18 | #include "core/settings.h" |
| 18 | 19 | ||
| 19 | namespace Core { | 20 | namespace Core { |
| @@ -90,6 +91,7 @@ void Cpu::RunLoop(bool tight_loop) { | |||
| 90 | LOG_TRACE(Core, "Core-{} idling", core_index); | 91 | LOG_TRACE(Core, "Core-{} idling", core_index); |
| 91 | 92 | ||
| 92 | if (IsMainCore()) { | 93 | if (IsMainCore()) { |
| 94 | // TODO(Subv): Only let CoreTiming idle if all 4 cores are idling. | ||
| 93 | CoreTiming::Idle(); | 95 | CoreTiming::Idle(); |
| 94 | CoreTiming::Advance(); | 96 | CoreTiming::Advance(); |
| 95 | } | 97 | } |
| @@ -125,6 +127,8 @@ void Cpu::Reschedule() { | |||
| 125 | } | 127 | } |
| 126 | 128 | ||
| 127 | reschedule_pending = false; | 129 | reschedule_pending = false; |
| 130 | // Lock the global kernel mutex when we manipulate the HLE state | ||
| 131 | std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock); | ||
| 128 | scheduler->Reschedule(); | 132 | scheduler->Reschedule(); |
| 129 | } | 133 | } |
| 130 | 134 | ||
diff --git a/src/core/core_cpu.h b/src/core/core_cpu.h index 976952903..56cdae194 100644 --- a/src/core/core_cpu.h +++ b/src/core/core_cpu.h | |||
| @@ -79,7 +79,7 @@ private: | |||
| 79 | std::shared_ptr<CpuBarrier> cpu_barrier; | 79 | std::shared_ptr<CpuBarrier> cpu_barrier; |
| 80 | std::shared_ptr<Kernel::Scheduler> scheduler; | 80 | std::shared_ptr<Kernel::Scheduler> scheduler; |
| 81 | 81 | ||
| 82 | bool reschedule_pending{}; | 82 | std::atomic<bool> reschedule_pending = false; |
| 83 | size_t core_index; | 83 | size_t core_index; |
| 84 | }; | 84 | }; |
| 85 | 85 | ||
diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index d3bb6f818..f977d1b32 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp | |||
| @@ -135,11 +135,9 @@ void ClearPendingEvents() { | |||
| 135 | void ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata) { | 135 | void ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata) { |
| 136 | ASSERT(event_type != nullptr); | 136 | ASSERT(event_type != nullptr); |
| 137 | s64 timeout = GetTicks() + cycles_into_future; | 137 | s64 timeout = GetTicks() + cycles_into_future; |
| 138 | |||
| 139 | // If this event needs to be scheduled before the next advance(), force one early | 138 | // If this event needs to be scheduled before the next advance(), force one early |
| 140 | if (!is_global_timer_sane) | 139 | if (!is_global_timer_sane) |
| 141 | ForceExceptionCheck(cycles_into_future); | 140 | ForceExceptionCheck(cycles_into_future); |
| 142 | |||
| 143 | event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type}); | 141 | event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type}); |
| 144 | std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>()); | 142 | std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>()); |
| 145 | } | 143 | } |
diff --git a/src/core/file_sys/card_image.cpp b/src/core/file_sys/card_image.cpp index a4823353e..8e05b9d0e 100644 --- a/src/core/file_sys/card_image.cpp +++ b/src/core/file_sys/card_image.cpp | |||
| @@ -107,19 +107,19 @@ VirtualFile XCI::GetNCAFileByType(NCAContentType type) const { | |||
| 107 | return nullptr; | 107 | return nullptr; |
| 108 | } | 108 | } |
| 109 | 109 | ||
| 110 | std::vector<std::shared_ptr<VfsFile>> XCI::GetFiles() const { | 110 | std::vector<VirtualFile> XCI::GetFiles() const { |
| 111 | return {}; | 111 | return {}; |
| 112 | } | 112 | } |
| 113 | 113 | ||
| 114 | std::vector<std::shared_ptr<VfsDirectory>> XCI::GetSubdirectories() const { | 114 | std::vector<VirtualDir> XCI::GetSubdirectories() const { |
| 115 | return std::vector<std::shared_ptr<VfsDirectory>>(); | 115 | return {}; |
| 116 | } | 116 | } |
| 117 | 117 | ||
| 118 | std::string XCI::GetName() const { | 118 | std::string XCI::GetName() const { |
| 119 | return file->GetName(); | 119 | return file->GetName(); |
| 120 | } | 120 | } |
| 121 | 121 | ||
| 122 | std::shared_ptr<VfsDirectory> XCI::GetParentDirectory() const { | 122 | VirtualDir XCI::GetParentDirectory() const { |
| 123 | return file->GetContainingDirectory(); | 123 | return file->GetContainingDirectory(); |
| 124 | } | 124 | } |
| 125 | 125 | ||
diff --git a/src/core/file_sys/card_image.h b/src/core/file_sys/card_image.h index e089d737c..4618d9c00 100644 --- a/src/core/file_sys/card_image.h +++ b/src/core/file_sys/card_image.h | |||
| @@ -71,13 +71,13 @@ public: | |||
| 71 | std::shared_ptr<NCA> GetNCAByType(NCAContentType type) const; | 71 | std::shared_ptr<NCA> GetNCAByType(NCAContentType type) const; |
| 72 | VirtualFile GetNCAFileByType(NCAContentType type) const; | 72 | VirtualFile GetNCAFileByType(NCAContentType type) const; |
| 73 | 73 | ||
| 74 | std::vector<std::shared_ptr<VfsFile>> GetFiles() const override; | 74 | std::vector<VirtualFile> GetFiles() const override; |
| 75 | 75 | ||
| 76 | std::vector<std::shared_ptr<VfsDirectory>> GetSubdirectories() const override; | 76 | std::vector<VirtualDir> GetSubdirectories() const override; |
| 77 | 77 | ||
| 78 | std::string GetName() const override; | 78 | std::string GetName() const override; |
| 79 | 79 | ||
| 80 | std::shared_ptr<VfsDirectory> GetParentDirectory() const override; | 80 | VirtualDir GetParentDirectory() const override; |
| 81 | 81 | ||
| 82 | protected: | 82 | protected: |
| 83 | bool ReplaceFileWithSubdirectory(VirtualFile file, VirtualDir dir) override; | 83 | bool ReplaceFileWithSubdirectory(VirtualFile file, VirtualDir dir) override; |
diff --git a/src/core/file_sys/partition_filesystem.h b/src/core/file_sys/partition_filesystem.h index 7c7a75816..be7bc32a8 100644 --- a/src/core/file_sys/partition_filesystem.h +++ b/src/core/file_sys/partition_filesystem.h | |||
| @@ -13,7 +13,7 @@ | |||
| 13 | #include "core/file_sys/vfs.h" | 13 | #include "core/file_sys/vfs.h" |
| 14 | 14 | ||
| 15 | namespace Loader { | 15 | namespace Loader { |
| 16 | enum class ResultStatus; | 16 | enum class ResultStatus : u16; |
| 17 | } | 17 | } |
| 18 | 18 | ||
| 19 | namespace FileSys { | 19 | namespace FileSys { |
diff --git a/src/core/file_sys/program_metadata.h b/src/core/file_sys/program_metadata.h index 06a7315db..74a91052b 100644 --- a/src/core/file_sys/program_metadata.h +++ b/src/core/file_sys/program_metadata.h | |||
| @@ -13,7 +13,7 @@ | |||
| 13 | #include "partition_filesystem.h" | 13 | #include "partition_filesystem.h" |
| 14 | 14 | ||
| 15 | namespace Loader { | 15 | namespace Loader { |
| 16 | enum class ResultStatus; | 16 | enum class ResultStatus : u16; |
| 17 | } | 17 | } |
| 18 | 18 | ||
| 19 | namespace FileSys { | 19 | namespace FileSys { |
diff --git a/src/core/file_sys/vfs.h b/src/core/file_sys/vfs.h index 141a053ce..78a63c59b 100644 --- a/src/core/file_sys/vfs.h +++ b/src/core/file_sys/vfs.h | |||
| @@ -15,9 +15,9 @@ | |||
| 15 | 15 | ||
| 16 | namespace FileSys { | 16 | namespace FileSys { |
| 17 | 17 | ||
| 18 | struct VfsFilesystem; | 18 | class VfsDirectory; |
| 19 | struct VfsFile; | 19 | class VfsFile; |
| 20 | struct VfsDirectory; | 20 | class VfsFilesystem; |
| 21 | 21 | ||
| 22 | // Convenience typedefs to use Vfs* interfaces | 22 | // Convenience typedefs to use Vfs* interfaces |
| 23 | using VirtualFilesystem = std::shared_ptr<VfsFilesystem>; | 23 | using VirtualFilesystem = std::shared_ptr<VfsFilesystem>; |
| @@ -34,8 +34,9 @@ enum class VfsEntryType { | |||
| 34 | // A class representing an abstract filesystem. A default implementation given the root VirtualDir | 34 | // A class representing an abstract filesystem. A default implementation given the root VirtualDir |
| 35 | // is provided for convenience, but if the Vfs implementation has any additional state or | 35 | // is provided for convenience, but if the Vfs implementation has any additional state or |
| 36 | // functionality, they will need to override. | 36 | // functionality, they will need to override. |
| 37 | struct VfsFilesystem : NonCopyable { | 37 | class VfsFilesystem : NonCopyable { |
| 38 | VfsFilesystem(VirtualDir root); | 38 | public: |
| 39 | explicit VfsFilesystem(VirtualDir root); | ||
| 39 | virtual ~VfsFilesystem(); | 40 | virtual ~VfsFilesystem(); |
| 40 | 41 | ||
| 41 | // Gets the friendly name for the filesystem. | 42 | // Gets the friendly name for the filesystem. |
| @@ -81,7 +82,8 @@ protected: | |||
| 81 | }; | 82 | }; |
| 82 | 83 | ||
| 83 | // A class representing a file in an abstract filesystem. | 84 | // A class representing a file in an abstract filesystem. |
| 84 | struct VfsFile : NonCopyable { | 85 | class VfsFile : NonCopyable { |
| 86 | public: | ||
| 85 | virtual ~VfsFile(); | 87 | virtual ~VfsFile(); |
| 86 | 88 | ||
| 87 | // Retrieves the file name. | 89 | // Retrieves the file name. |
| @@ -179,7 +181,8 @@ struct VfsFile : NonCopyable { | |||
| 179 | }; | 181 | }; |
| 180 | 182 | ||
| 181 | // A class representing a directory in an abstract filesystem. | 183 | // A class representing a directory in an abstract filesystem. |
| 182 | struct VfsDirectory : NonCopyable { | 184 | class VfsDirectory : NonCopyable { |
| 185 | public: | ||
| 183 | virtual ~VfsDirectory(); | 186 | virtual ~VfsDirectory(); |
| 184 | 187 | ||
| 185 | // Retrives the file located at path as if the current directory was root. Returns nullptr if | 188 | // Retrives the file located at path as if the current directory was root. Returns nullptr if |
| @@ -295,7 +298,8 @@ protected: | |||
| 295 | 298 | ||
| 296 | // A convenience partial-implementation of VfsDirectory that stubs out methods that should only work | 299 | // A convenience partial-implementation of VfsDirectory that stubs out methods that should only work |
| 297 | // if writable. This is to avoid redundant empty methods everywhere. | 300 | // if writable. This is to avoid redundant empty methods everywhere. |
| 298 | struct ReadOnlyVfsDirectory : public VfsDirectory { | 301 | class ReadOnlyVfsDirectory : public VfsDirectory { |
| 302 | public: | ||
| 299 | bool IsWritable() const override; | 303 | bool IsWritable() const override; |
| 300 | bool IsReadable() const override; | 304 | bool IsReadable() const override; |
| 301 | std::shared_ptr<VfsDirectory> CreateSubdirectory(std::string_view name) override; | 305 | std::shared_ptr<VfsDirectory> CreateSubdirectory(std::string_view name) override; |
diff --git a/src/core/file_sys/vfs_offset.h b/src/core/file_sys/vfs_offset.h index 235970dc5..cb92d1570 100644 --- a/src/core/file_sys/vfs_offset.h +++ b/src/core/file_sys/vfs_offset.h | |||
| @@ -15,7 +15,8 @@ namespace FileSys { | |||
| 15 | // Similar to seeking to an offset. | 15 | // Similar to seeking to an offset. |
| 16 | // If the file is writable, operations that would write past the end of the offset file will expand | 16 | // If the file is writable, operations that would write past the end of the offset file will expand |
| 17 | // the size of this wrapper. | 17 | // the size of this wrapper. |
| 18 | struct OffsetVfsFile : public VfsFile { | 18 | class OffsetVfsFile : public VfsFile { |
| 19 | public: | ||
| 19 | OffsetVfsFile(std::shared_ptr<VfsFile> file, size_t size, size_t offset = 0, | 20 | OffsetVfsFile(std::shared_ptr<VfsFile> file, size_t size, size_t offset = 0, |
| 20 | std::string new_name = "", VirtualDir new_parent = nullptr); | 21 | std::string new_name = "", VirtualDir new_parent = nullptr); |
| 21 | 22 | ||
diff --git a/src/core/file_sys/vfs_vector.h b/src/core/file_sys/vfs_vector.h index ba469647b..b3b468233 100644 --- a/src/core/file_sys/vfs_vector.h +++ b/src/core/file_sys/vfs_vector.h | |||
| @@ -10,7 +10,8 @@ namespace FileSys { | |||
| 10 | 10 | ||
| 11 | // An implementation of VfsDirectory that maintains two vectors for subdirectories and files. | 11 | // An implementation of VfsDirectory that maintains two vectors for subdirectories and files. |
| 12 | // Vector data is supplied upon construction. | 12 | // Vector data is supplied upon construction. |
| 13 | struct VectorVfsDirectory : public VfsDirectory { | 13 | class VectorVfsDirectory : public VfsDirectory { |
| 14 | public: | ||
| 14 | explicit VectorVfsDirectory(std::vector<VirtualFile> files = {}, | 15 | explicit VectorVfsDirectory(std::vector<VirtualFile> files = {}, |
| 15 | std::vector<VirtualDir> dirs = {}, VirtualDir parent = nullptr, | 16 | std::vector<VirtualDir> dirs = {}, VirtualDir parent = nullptr, |
| 16 | std::string name = ""); | 17 | std::string name = ""); |
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 1b0cd0abf..8c19e86d3 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp | |||
| @@ -11,7 +11,7 @@ | |||
| 11 | 11 | ||
| 12 | namespace Kernel { | 12 | namespace Kernel { |
| 13 | 13 | ||
| 14 | unsigned int Object::next_object_id; | 14 | std::atomic<u32> Object::next_object_id{0}; |
| 15 | 15 | ||
| 16 | /// Initialize the kernel | 16 | /// Initialize the kernel |
| 17 | void Init() { | 17 | void Init() { |
diff --git a/src/core/hle/kernel/object.h b/src/core/hle/kernel/object.h index 83df68dfd..526ac9cc3 100644 --- a/src/core/hle/kernel/object.h +++ b/src/core/hle/kernel/object.h | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <atomic> | ||
| 7 | #include <string> | 8 | #include <string> |
| 8 | #include <utility> | 9 | #include <utility> |
| 9 | 10 | ||
| @@ -42,8 +43,8 @@ public: | |||
| 42 | virtual ~Object(); | 43 | virtual ~Object(); |
| 43 | 44 | ||
| 44 | /// Returns a unique identifier for the object. For debugging purposes only. | 45 | /// Returns a unique identifier for the object. For debugging purposes only. |
| 45 | unsigned int GetObjectId() const { | 46 | u32 GetObjectId() const { |
| 46 | return object_id; | 47 | return object_id.load(std::memory_order_relaxed); |
| 47 | } | 48 | } |
| 48 | 49 | ||
| 49 | virtual std::string GetTypeName() const { | 50 | virtual std::string GetTypeName() const { |
| @@ -61,23 +62,23 @@ public: | |||
| 61 | bool IsWaitable() const; | 62 | bool IsWaitable() const; |
| 62 | 63 | ||
| 63 | public: | 64 | public: |
| 64 | static unsigned int next_object_id; | 65 | static std::atomic<u32> next_object_id; |
| 65 | 66 | ||
| 66 | private: | 67 | private: |
| 67 | friend void intrusive_ptr_add_ref(Object*); | 68 | friend void intrusive_ptr_add_ref(Object*); |
| 68 | friend void intrusive_ptr_release(Object*); | 69 | friend void intrusive_ptr_release(Object*); |
| 69 | 70 | ||
| 70 | unsigned int ref_count = 0; | 71 | std::atomic<u32> ref_count{0}; |
| 71 | unsigned int object_id = next_object_id++; | 72 | std::atomic<u32> object_id{next_object_id++}; |
| 72 | }; | 73 | }; |
| 73 | 74 | ||
| 74 | // Special functions used by boost::instrusive_ptr to do automatic ref-counting | 75 | // Special functions used by boost::instrusive_ptr to do automatic ref-counting |
| 75 | inline void intrusive_ptr_add_ref(Object* object) { | 76 | inline void intrusive_ptr_add_ref(Object* object) { |
| 76 | ++object->ref_count; | 77 | object->ref_count.fetch_add(1, std::memory_order_relaxed); |
| 77 | } | 78 | } |
| 78 | 79 | ||
| 79 | inline void intrusive_ptr_release(Object* object) { | 80 | inline void intrusive_ptr_release(Object* object) { |
| 80 | if (--object->ref_count == 0) { | 81 | if (object->ref_count.fetch_sub(1, std::memory_order_acq_rel) == 1) { |
| 81 | delete object; | 82 | delete object; |
| 82 | } | 83 | } |
| 83 | } | 84 | } |
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp index 94065c736..e770b9103 100644 --- a/src/core/hle/kernel/scheduler.cpp +++ b/src/core/hle/kernel/scheduler.cpp | |||
| @@ -25,7 +25,7 @@ Scheduler::~Scheduler() { | |||
| 25 | } | 25 | } |
| 26 | } | 26 | } |
| 27 | 27 | ||
| 28 | bool Scheduler::HaveReadyThreads() { | 28 | bool Scheduler::HaveReadyThreads() const { |
| 29 | std::lock_guard<std::mutex> lock(scheduler_mutex); | 29 | std::lock_guard<std::mutex> lock(scheduler_mutex); |
| 30 | return ready_queue.get_first() != nullptr; | 30 | return ready_queue.get_first() != nullptr; |
| 31 | } | 31 | } |
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h index 1a4ee8f36..6a61ef64e 100644 --- a/src/core/hle/kernel/scheduler.h +++ b/src/core/hle/kernel/scheduler.h | |||
| @@ -21,7 +21,7 @@ public: | |||
| 21 | ~Scheduler(); | 21 | ~Scheduler(); |
| 22 | 22 | ||
| 23 | /// Returns whether there are any threads that are ready to run. | 23 | /// Returns whether there are any threads that are ready to run. |
| 24 | bool HaveReadyThreads(); | 24 | bool HaveReadyThreads() const; |
| 25 | 25 | ||
| 26 | /// Reschedules to the next available thread (call after current thread is suspended) | 26 | /// Reschedules to the next available thread (call after current thread is suspended) |
| 27 | void Reschedule(); | 27 | void Reschedule(); |
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 5db2db687..b24f409b3 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp | |||
| @@ -532,7 +532,6 @@ static ResultCode CreateThread(Handle* out_handle, VAddr entry_point, u64 arg, V | |||
| 532 | CASCADE_RESULT(thread->guest_handle, g_handle_table.Create(thread)); | 532 | CASCADE_RESULT(thread->guest_handle, g_handle_table.Create(thread)); |
| 533 | *out_handle = thread->guest_handle; | 533 | *out_handle = thread->guest_handle; |
| 534 | 534 | ||
| 535 | Core::System::GetInstance().PrepareReschedule(); | ||
| 536 | Core::System::GetInstance().CpuCore(thread->processor_id).PrepareReschedule(); | 535 | Core::System::GetInstance().CpuCore(thread->processor_id).PrepareReschedule(); |
| 537 | 536 | ||
| 538 | LOG_TRACE(Kernel_SVC, | 537 | LOG_TRACE(Kernel_SVC, |
| @@ -706,8 +705,7 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target | |||
| 706 | Handle owner_handle = static_cast<Handle>(mutex_val & Mutex::MutexOwnerMask); | 705 | Handle owner_handle = static_cast<Handle>(mutex_val & Mutex::MutexOwnerMask); |
| 707 | auto owner = g_handle_table.Get<Thread>(owner_handle); | 706 | auto owner = g_handle_table.Get<Thread>(owner_handle); |
| 708 | ASSERT(owner); | 707 | ASSERT(owner); |
| 709 | ASSERT(thread->status != ThreadStatus::Running); | 708 | ASSERT(thread->status == ThreadStatus::WaitMutex); |
| 710 | thread->status = ThreadStatus::WaitMutex; | ||
| 711 | thread->wakeup_callback = nullptr; | 709 | thread->wakeup_callback = nullptr; |
| 712 | 710 | ||
| 713 | owner->AddMutexWaiter(thread); | 711 | owner->AddMutexWaiter(thread); |
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index b9022feae..a1a7867ce 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp | |||
| @@ -23,6 +23,7 @@ | |||
| 23 | #include "core/hle/kernel/object.h" | 23 | #include "core/hle/kernel/object.h" |
| 24 | #include "core/hle/kernel/process.h" | 24 | #include "core/hle/kernel/process.h" |
| 25 | #include "core/hle/kernel/thread.h" | 25 | #include "core/hle/kernel/thread.h" |
| 26 | #include "core/hle/lock.h" | ||
| 26 | #include "core/hle/result.h" | 27 | #include "core/hle/result.h" |
| 27 | #include "core/memory.h" | 28 | #include "core/memory.h" |
| 28 | 29 | ||
| @@ -104,6 +105,10 @@ void ExitCurrentThread() { | |||
| 104 | */ | 105 | */ |
| 105 | static void ThreadWakeupCallback(u64 thread_handle, int cycles_late) { | 106 | static void ThreadWakeupCallback(u64 thread_handle, int cycles_late) { |
| 106 | const auto proper_handle = static_cast<Handle>(thread_handle); | 107 | const auto proper_handle = static_cast<Handle>(thread_handle); |
| 108 | |||
| 109 | // Lock the global kernel mutex when we enter the kernel HLE. | ||
| 110 | std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock); | ||
| 111 | |||
| 107 | SharedPtr<Thread> thread = wakeup_callback_handle_table.Get<Thread>(proper_handle); | 112 | SharedPtr<Thread> thread = wakeup_callback_handle_table.Get<Thread>(proper_handle); |
| 108 | if (thread == nullptr) { | 113 | if (thread == nullptr) { |
| 109 | LOG_CRITICAL(Kernel, "Callback fired for invalid thread {:08X}", proper_handle); | 114 | LOG_CRITICAL(Kernel, "Callback fired for invalid thread {:08X}", proper_handle); |
| @@ -155,8 +160,10 @@ void Thread::WakeAfterDelay(s64 nanoseconds) { | |||
| 155 | if (nanoseconds == -1) | 160 | if (nanoseconds == -1) |
| 156 | return; | 161 | return; |
| 157 | 162 | ||
| 158 | CoreTiming::ScheduleEvent(CoreTiming::nsToCycles(nanoseconds), ThreadWakeupEventType, | 163 | // This function might be called from any thread so we have to be cautious and use the |
| 159 | callback_handle); | 164 | // thread-safe version of ScheduleEvent. |
| 165 | CoreTiming::ScheduleEventThreadsafe(CoreTiming::nsToCycles(nanoseconds), ThreadWakeupEventType, | ||
| 166 | callback_handle); | ||
| 160 | } | 167 | } |
| 161 | 168 | ||
| 162 | void Thread::CancelWakeupTimer() { | 169 | void Thread::CancelWakeupTimer() { |
| @@ -419,12 +426,33 @@ VAddr Thread::GetCommandBufferAddress() const { | |||
| 419 | } | 426 | } |
| 420 | 427 | ||
| 421 | void Thread::AddMutexWaiter(SharedPtr<Thread> thread) { | 428 | void Thread::AddMutexWaiter(SharedPtr<Thread> thread) { |
| 429 | if (thread->lock_owner == this) { | ||
| 430 | // If the thread is already waiting for this thread to release the mutex, ensure that the | ||
| 431 | // waiters list is consistent and return without doing anything. | ||
| 432 | auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread); | ||
| 433 | ASSERT(itr != wait_mutex_threads.end()); | ||
| 434 | return; | ||
| 435 | } | ||
| 436 | |||
| 437 | // A thread can't wait on two different mutexes at the same time. | ||
| 438 | ASSERT(thread->lock_owner == nullptr); | ||
| 439 | |||
| 440 | // Ensure that the thread is not already in the list of mutex waiters | ||
| 441 | auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread); | ||
| 442 | ASSERT(itr == wait_mutex_threads.end()); | ||
| 443 | |||
| 422 | thread->lock_owner = this; | 444 | thread->lock_owner = this; |
| 423 | wait_mutex_threads.emplace_back(std::move(thread)); | 445 | wait_mutex_threads.emplace_back(std::move(thread)); |
| 424 | UpdatePriority(); | 446 | UpdatePriority(); |
| 425 | } | 447 | } |
| 426 | 448 | ||
| 427 | void Thread::RemoveMutexWaiter(SharedPtr<Thread> thread) { | 449 | void Thread::RemoveMutexWaiter(SharedPtr<Thread> thread) { |
| 450 | ASSERT(thread->lock_owner == this); | ||
| 451 | |||
| 452 | // Ensure that the thread is in the list of mutex waiters | ||
| 453 | auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread); | ||
| 454 | ASSERT(itr != wait_mutex_threads.end()); | ||
| 455 | |||
| 428 | boost::remove_erase(wait_mutex_threads, thread); | 456 | boost::remove_erase(wait_mutex_threads, thread); |
| 429 | thread->lock_owner = nullptr; | 457 | thread->lock_owner = nullptr; |
| 430 | UpdatePriority(); | 458 | UpdatePriority(); |
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp index f99304de5..9e75eb3a6 100644 --- a/src/core/hle/service/audio/audren_u.cpp +++ b/src/core/hle/service/audio/audren_u.cpp | |||
| @@ -20,9 +20,9 @@ public: | |||
| 20 | explicit IAudioRenderer(AudioCore::AudioRendererParameter audren_params) | 20 | explicit IAudioRenderer(AudioCore::AudioRendererParameter audren_params) |
| 21 | : ServiceFramework("IAudioRenderer") { | 21 | : ServiceFramework("IAudioRenderer") { |
| 22 | static const FunctionInfo functions[] = { | 22 | static const FunctionInfo functions[] = { |
| 23 | {0, nullptr, "GetAudioRendererSampleRate"}, | 23 | {0, &IAudioRenderer::GetAudioRendererSampleRate, "GetAudioRendererSampleRate"}, |
| 24 | {1, nullptr, "GetAudioRendererSampleCount"}, | 24 | {1, &IAudioRenderer::GetAudioRendererSampleCount, "GetAudioRendererSampleCount"}, |
| 25 | {2, nullptr, "GetAudioRendererMixBufferCount"}, | 25 | {2, &IAudioRenderer::GetAudioRendererMixBufferCount, "GetAudioRendererMixBufferCount"}, |
| 26 | {3, nullptr, "GetAudioRendererState"}, | 26 | {3, nullptr, "GetAudioRendererState"}, |
| 27 | {4, &IAudioRenderer::RequestUpdateAudioRenderer, "RequestUpdateAudioRenderer"}, | 27 | {4, &IAudioRenderer::RequestUpdateAudioRenderer, "RequestUpdateAudioRenderer"}, |
| 28 | {5, &IAudioRenderer::StartAudioRenderer, "StartAudioRenderer"}, | 28 | {5, &IAudioRenderer::StartAudioRenderer, "StartAudioRenderer"}, |
| @@ -45,6 +45,27 @@ private: | |||
| 45 | system_event->Signal(); | 45 | system_event->Signal(); |
| 46 | } | 46 | } |
| 47 | 47 | ||
| 48 | void GetAudioRendererSampleRate(Kernel::HLERequestContext& ctx) { | ||
| 49 | IPC::ResponseBuilder rb{ctx, 3}; | ||
| 50 | rb.Push(RESULT_SUCCESS); | ||
| 51 | rb.Push<u32>(renderer->GetSampleRate()); | ||
| 52 | LOG_DEBUG(Service_Audio, "called"); | ||
| 53 | } | ||
| 54 | |||
| 55 | void GetAudioRendererSampleCount(Kernel::HLERequestContext& ctx) { | ||
| 56 | IPC::ResponseBuilder rb{ctx, 3}; | ||
| 57 | rb.Push(RESULT_SUCCESS); | ||
| 58 | rb.Push<u32>(renderer->GetSampleCount()); | ||
| 59 | LOG_DEBUG(Service_Audio, "called"); | ||
| 60 | } | ||
| 61 | |||
| 62 | void GetAudioRendererMixBufferCount(Kernel::HLERequestContext& ctx) { | ||
| 63 | IPC::ResponseBuilder rb{ctx, 3}; | ||
| 64 | rb.Push(RESULT_SUCCESS); | ||
| 65 | rb.Push<u32>(renderer->GetMixBufferCount()); | ||
| 66 | LOG_DEBUG(Service_Audio, "called"); | ||
| 67 | } | ||
| 68 | |||
| 48 | void RequestUpdateAudioRenderer(Kernel::HLERequestContext& ctx) { | 69 | void RequestUpdateAudioRenderer(Kernel::HLERequestContext& ctx) { |
| 49 | ctx.WriteBuffer(renderer->UpdateAudioRenderer(ctx.ReadBuffer())); | 70 | ctx.WriteBuffer(renderer->UpdateAudioRenderer(ctx.ReadBuffer())); |
| 50 | IPC::ResponseBuilder rb{ctx, 2}; | 71 | IPC::ResponseBuilder rb{ctx, 2}; |
| @@ -169,7 +190,8 @@ AudRenU::AudRenU() : ServiceFramework("audren:u") { | |||
| 169 | {1, &AudRenU::GetAudioRendererWorkBufferSize, "GetAudioRendererWorkBufferSize"}, | 190 | {1, &AudRenU::GetAudioRendererWorkBufferSize, "GetAudioRendererWorkBufferSize"}, |
| 170 | {2, &AudRenU::GetAudioDevice, "GetAudioDevice"}, | 191 | {2, &AudRenU::GetAudioDevice, "GetAudioDevice"}, |
| 171 | {3, nullptr, "OpenAudioRendererAuto"}, | 192 | {3, nullptr, "OpenAudioRendererAuto"}, |
| 172 | {4, nullptr, "GetAudioDeviceServiceWithRevisionInfo"}, | 193 | {4, &AudRenU::GetAudioDeviceServiceWithRevisionInfo, |
| 194 | "GetAudioDeviceServiceWithRevisionInfo"}, | ||
| 173 | }; | 195 | }; |
| 174 | RegisterHandlers(functions); | 196 | RegisterHandlers(functions); |
| 175 | } | 197 | } |
| @@ -189,7 +211,7 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) { | |||
| 189 | IPC::RequestParser rp{ctx}; | 211 | IPC::RequestParser rp{ctx}; |
| 190 | auto params = rp.PopRaw<AudioCore::AudioRendererParameter>(); | 212 | auto params = rp.PopRaw<AudioCore::AudioRendererParameter>(); |
| 191 | 213 | ||
| 192 | u64 buffer_sz = Common::AlignUp(4 * params.unknown_8, 0x40); | 214 | u64 buffer_sz = Common::AlignUp(4 * params.mix_buffer_count, 0x40); |
| 193 | buffer_sz += params.unknown_c * 1024; | 215 | buffer_sz += params.unknown_c * 1024; |
| 194 | buffer_sz += 0x940 * (params.unknown_c + 1); | 216 | buffer_sz += 0x940 * (params.unknown_c + 1); |
| 195 | buffer_sz += 0x3F0 * params.voice_count; | 217 | buffer_sz += 0x3F0 * params.voice_count; |
| @@ -197,7 +219,7 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) { | |||
| 197 | buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10); | 219 | buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10); |
| 198 | buffer_sz += | 220 | buffer_sz += |
| 199 | Common::AlignUp((0x3C0 * (params.sink_count + params.unknown_c) + 4 * params.sample_count) * | 221 | Common::AlignUp((0x3C0 * (params.sink_count + params.unknown_c) + 4 * params.sample_count) * |
| 200 | (params.unknown_8 + 6), | 222 | (params.mix_buffer_count + 6), |
| 201 | 0x40); | 223 | 0x40); |
| 202 | 224 | ||
| 203 | if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) { | 225 | if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) { |
| @@ -253,6 +275,16 @@ void AudRenU::GetAudioDevice(Kernel::HLERequestContext& ctx) { | |||
| 253 | LOG_DEBUG(Service_Audio, "called"); | 275 | LOG_DEBUG(Service_Audio, "called"); |
| 254 | } | 276 | } |
| 255 | 277 | ||
| 278 | void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx) { | ||
| 279 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | ||
| 280 | |||
| 281 | rb.Push(RESULT_SUCCESS); | ||
| 282 | rb.PushIpcInterface<Audio::IAudioDevice>(); | ||
| 283 | |||
| 284 | LOG_WARNING(Service_Audio, "(STUBBED) called"); // TODO(ogniK): Figure out what is different | ||
| 285 | // based on the current revision | ||
| 286 | } | ||
| 287 | |||
| 256 | bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const { | 288 | bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const { |
| 257 | u32_be version_num = (revision - Common::MakeMagic('R', 'E', 'V', '0')); // Byte swap | 289 | u32_be version_num = (revision - Common::MakeMagic('R', 'E', 'V', '0')); // Byte swap |
| 258 | switch (feature) { | 290 | switch (feature) { |
diff --git a/src/core/hle/service/audio/audren_u.h b/src/core/hle/service/audio/audren_u.h index 14907f8ae..8600ac6e4 100644 --- a/src/core/hle/service/audio/audren_u.h +++ b/src/core/hle/service/audio/audren_u.h | |||
| @@ -22,6 +22,7 @@ private: | |||
| 22 | void OpenAudioRenderer(Kernel::HLERequestContext& ctx); | 22 | void OpenAudioRenderer(Kernel::HLERequestContext& ctx); |
| 23 | void GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx); | 23 | void GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx); |
| 24 | void GetAudioDevice(Kernel::HLERequestContext& ctx); | 24 | void GetAudioDevice(Kernel::HLERequestContext& ctx); |
| 25 | void GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx); | ||
| 25 | 26 | ||
| 26 | enum class AudioFeatures : u32 { | 27 | enum class AudioFeatures : u32 { |
| 27 | Splitter, | 28 | Splitter, |
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp index dcdfa0e19..970942d3f 100644 --- a/src/core/hle/service/hid/hid.cpp +++ b/src/core/hle/service/hid/hid.cpp | |||
| @@ -291,6 +291,7 @@ private: | |||
| 291 | class Hid final : public ServiceFramework<Hid> { | 291 | class Hid final : public ServiceFramework<Hid> { |
| 292 | public: | 292 | public: |
| 293 | Hid() : ServiceFramework("hid") { | 293 | Hid() : ServiceFramework("hid") { |
| 294 | // clang-format off | ||
| 294 | static const FunctionInfo functions[] = { | 295 | static const FunctionInfo functions[] = { |
| 295 | {0, &Hid::CreateAppletResource, "CreateAppletResource"}, | 296 | {0, &Hid::CreateAppletResource, "CreateAppletResource"}, |
| 296 | {1, &Hid::ActivateDebugPad, "ActivateDebugPad"}, | 297 | {1, &Hid::ActivateDebugPad, "ActivateDebugPad"}, |
| @@ -333,15 +334,13 @@ public: | |||
| 333 | {102, &Hid::SetSupportedNpadIdType, "SetSupportedNpadIdType"}, | 334 | {102, &Hid::SetSupportedNpadIdType, "SetSupportedNpadIdType"}, |
| 334 | {103, &Hid::ActivateNpad, "ActivateNpad"}, | 335 | {103, &Hid::ActivateNpad, "ActivateNpad"}, |
| 335 | {104, nullptr, "DeactivateNpad"}, | 336 | {104, nullptr, "DeactivateNpad"}, |
| 336 | {106, &Hid::AcquireNpadStyleSetUpdateEventHandle, | 337 | {106, &Hid::AcquireNpadStyleSetUpdateEventHandle, "AcquireNpadStyleSetUpdateEventHandle"}, |
| 337 | "AcquireNpadStyleSetUpdateEventHandle"}, | 338 | {107, &Hid::DisconnectNpad, "DisconnectNpad"}, |
| 338 | {107, nullptr, "DisconnectNpad"}, | ||
| 339 | {108, &Hid::GetPlayerLedPattern, "GetPlayerLedPattern"}, | 339 | {108, &Hid::GetPlayerLedPattern, "GetPlayerLedPattern"}, |
| 340 | {109, nullptr, "ActivateNpadWithRevision"}, | 340 | {109, nullptr, "ActivateNpadWithRevision"}, |
| 341 | {120, &Hid::SetNpadJoyHoldType, "SetNpadJoyHoldType"}, | 341 | {120, &Hid::SetNpadJoyHoldType, "SetNpadJoyHoldType"}, |
| 342 | {121, &Hid::GetNpadJoyHoldType, "GetNpadJoyHoldType"}, | 342 | {121, &Hid::GetNpadJoyHoldType, "GetNpadJoyHoldType"}, |
| 343 | {122, &Hid::SetNpadJoyAssignmentModeSingleByDefault, | 343 | {122, &Hid::SetNpadJoyAssignmentModeSingleByDefault, "SetNpadJoyAssignmentModeSingleByDefault"}, |
| 344 | "SetNpadJoyAssignmentModeSingleByDefault"}, | ||
| 345 | {123, nullptr, "SetNpadJoyAssignmentModeSingleByDefault"}, | 344 | {123, nullptr, "SetNpadJoyAssignmentModeSingleByDefault"}, |
| 346 | {124, &Hid::SetNpadJoyAssignmentModeDual, "SetNpadJoyAssignmentModeDual"}, | 345 | {124, &Hid::SetNpadJoyAssignmentModeDual, "SetNpadJoyAssignmentModeDual"}, |
| 347 | {125, &Hid::MergeSingleJoyAsDualJoy, "MergeSingleJoyAsDualJoy"}, | 346 | {125, &Hid::MergeSingleJoyAsDualJoy, "MergeSingleJoyAsDualJoy"}, |
| @@ -398,6 +397,8 @@ public: | |||
| 398 | {1000, nullptr, "SetNpadCommunicationMode"}, | 397 | {1000, nullptr, "SetNpadCommunicationMode"}, |
| 399 | {1001, nullptr, "GetNpadCommunicationMode"}, | 398 | {1001, nullptr, "GetNpadCommunicationMode"}, |
| 400 | }; | 399 | }; |
| 400 | // clang-format on | ||
| 401 | |||
| 401 | RegisterHandlers(functions); | 402 | RegisterHandlers(functions); |
| 402 | 403 | ||
| 403 | event = Kernel::Event::Create(Kernel::ResetType::OneShot, "hid:EventHandle"); | 404 | event = Kernel::Event::Create(Kernel::ResetType::OneShot, "hid:EventHandle"); |
| @@ -496,6 +497,12 @@ private: | |||
| 496 | LOG_WARNING(Service_HID, "(STUBBED) called"); | 497 | LOG_WARNING(Service_HID, "(STUBBED) called"); |
| 497 | } | 498 | } |
| 498 | 499 | ||
| 500 | void DisconnectNpad(Kernel::HLERequestContext& ctx) { | ||
| 501 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 502 | rb.Push(RESULT_SUCCESS); | ||
| 503 | LOG_WARNING(Service_HID, "(STUBBED) called"); | ||
| 504 | } | ||
| 505 | |||
| 499 | void GetPlayerLedPattern(Kernel::HLERequestContext& ctx) { | 506 | void GetPlayerLedPattern(Kernel::HLERequestContext& ctx) { |
| 500 | IPC::ResponseBuilder rb{ctx, 2}; | 507 | IPC::ResponseBuilder rb{ctx, 2}; |
| 501 | rb.Push(RESULT_SUCCESS); | 508 | rb.Push(RESULT_SUCCESS); |
diff --git a/src/core/hle/service/service.h b/src/core/hle/service/service.h index 8a294c0f2..cd9c74f3d 100644 --- a/src/core/hle/service/service.h +++ b/src/core/hle/service/service.h | |||
| @@ -23,7 +23,7 @@ class HLERequestContext; | |||
| 23 | } // namespace Kernel | 23 | } // namespace Kernel |
| 24 | 24 | ||
| 25 | namespace FileSys { | 25 | namespace FileSys { |
| 26 | struct VfsFilesystem; | 26 | class VfsFilesystem; |
| 27 | } | 27 | } |
| 28 | 28 | ||
| 29 | namespace Service { | 29 | namespace Service { |
diff --git a/src/core/loader/loader.cpp b/src/core/loader/loader.cpp index 2f5bfc67c..1f2f31535 100644 --- a/src/core/loader/loader.cpp +++ b/src/core/loader/loader.cpp | |||
| @@ -126,7 +126,7 @@ constexpr std::array<const char*, 36> RESULT_MESSAGES{ | |||
| 126 | }; | 126 | }; |
| 127 | 127 | ||
| 128 | std::string GetMessageForResultStatus(ResultStatus status) { | 128 | std::string GetMessageForResultStatus(ResultStatus status) { |
| 129 | return GetMessageForResultStatus(static_cast<size_t>(status)); | 129 | return GetMessageForResultStatus(static_cast<u16>(status)); |
| 130 | } | 130 | } |
| 131 | 131 | ||
| 132 | std::string GetMessageForResultStatus(u16 status) { | 132 | std::string GetMessageForResultStatus(u16 status) { |
diff --git a/src/core/loader/loader.h b/src/core/loader/loader.h index cfdadbee3..285363549 100644 --- a/src/core/loader/loader.h +++ b/src/core/loader/loader.h | |||
| @@ -56,7 +56,7 @@ FileType GuessFromFilename(const std::string& name); | |||
| 56 | std::string GetFileTypeString(FileType type); | 56 | std::string GetFileTypeString(FileType type); |
| 57 | 57 | ||
| 58 | /// Return type for functions in Loader namespace | 58 | /// Return type for functions in Loader namespace |
| 59 | enum class ResultStatus { | 59 | enum class ResultStatus : u16 { |
| 60 | Success, | 60 | Success, |
| 61 | ErrorAlreadyLoaded, | 61 | ErrorAlreadyLoaded, |
| 62 | ErrorNotImplemented, | 62 | ErrorNotImplemented, |
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 9f64b248b..2526ebf28 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -200,6 +200,14 @@ enum class IMinMaxExchange : u64 { | |||
| 200 | XHi = 3, | 200 | XHi = 3, |
| 201 | }; | 201 | }; |
| 202 | 202 | ||
| 203 | enum class XmadMode : u64 { | ||
| 204 | None = 0, | ||
| 205 | CLo = 1, | ||
| 206 | CHi = 2, | ||
| 207 | CSfu = 3, | ||
| 208 | CBcc = 4, | ||
| 209 | }; | ||
| 210 | |||
| 203 | enum class FlowCondition : u64 { | 211 | enum class FlowCondition : u64 { |
| 204 | Always = 0xF, | 212 | Always = 0xF, |
| 205 | Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for? | 213 | Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for? |
| @@ -457,6 +465,18 @@ union Instruction { | |||
| 457 | } bra; | 465 | } bra; |
| 458 | 466 | ||
| 459 | union { | 467 | union { |
| 468 | BitField<20, 16, u64> imm20_16; | ||
| 469 | BitField<36, 1, u64> product_shift_left; | ||
| 470 | BitField<37, 1, u64> merge_37; | ||
| 471 | BitField<48, 1, u64> sign_a; | ||
| 472 | BitField<49, 1, u64> sign_b; | ||
| 473 | BitField<50, 3, XmadMode> mode; | ||
| 474 | BitField<52, 1, u64> high_b; | ||
| 475 | BitField<53, 1, u64> high_a; | ||
| 476 | BitField<56, 1, u64> merge_56; | ||
| 477 | } xmad; | ||
| 478 | |||
| 479 | union { | ||
| 460 | BitField<20, 14, u64> offset; | 480 | BitField<20, 14, u64> offset; |
| 461 | BitField<34, 5, u64> index; | 481 | BitField<34, 5, u64> index; |
| 462 | } cbuf34; | 482 | } cbuf34; |
| @@ -593,6 +613,7 @@ public: | |||
| 593 | IntegerSetPredicate, | 613 | IntegerSetPredicate, |
| 594 | PredicateSetPredicate, | 614 | PredicateSetPredicate, |
| 595 | Conversion, | 615 | Conversion, |
| 616 | Xmad, | ||
| 596 | Unknown, | 617 | Unknown, |
| 597 | }; | 618 | }; |
| 598 | 619 | ||
| @@ -782,10 +803,10 @@ private: | |||
| 782 | INST("010010110101----", Id::ISET_C, Type::IntegerSet, "ISET_C"), | 803 | INST("010010110101----", Id::ISET_C, Type::IntegerSet, "ISET_C"), |
| 783 | INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"), | 804 | INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"), |
| 784 | INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"), | 805 | INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"), |
| 785 | INST("0011011-00------", Id::XMAD_IMM, Type::Arithmetic, "XMAD_IMM"), | 806 | INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"), |
| 786 | INST("0100111---------", Id::XMAD_CR, Type::Arithmetic, "XMAD_CR"), | 807 | INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"), |
| 787 | INST("010100010-------", Id::XMAD_RC, Type::Arithmetic, "XMAD_RC"), | 808 | INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"), |
| 788 | INST("0101101100------", Id::XMAD_RR, Type::Arithmetic, "XMAD_RR"), | 809 | INST("0101101100------", Id::XMAD_RR, Type::Xmad, "XMAD_RR"), |
| 789 | }; | 810 | }; |
| 790 | #undef INST | 811 | #undef INST |
| 791 | std::stable_sort(table.begin(), table.end(), [](const auto& a, const auto& b) { | 812 | std::stable_sort(table.begin(), table.end(), [](const auto& a, const auto& b) { |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 19e7f1161..6f0343888 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -46,6 +46,7 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { | |||
| 46 | case RenderTargetFormat::RGBA32_FLOAT: | 46 | case RenderTargetFormat::RGBA32_FLOAT: |
| 47 | case RenderTargetFormat::RGBA32_UINT: | 47 | case RenderTargetFormat::RGBA32_UINT: |
| 48 | return 16; | 48 | return 16; |
| 49 | case RenderTargetFormat::RGBA16_UINT: | ||
| 49 | case RenderTargetFormat::RGBA16_FLOAT: | 50 | case RenderTargetFormat::RGBA16_FLOAT: |
| 50 | case RenderTargetFormat::RG32_FLOAT: | 51 | case RenderTargetFormat::RG32_FLOAT: |
| 51 | return 8; | 52 | return 8; |
| @@ -67,6 +68,7 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { | |||
| 67 | case RenderTargetFormat::R16_UINT: | 68 | case RenderTargetFormat::R16_UINT: |
| 68 | case RenderTargetFormat::R16_SINT: | 69 | case RenderTargetFormat::R16_SINT: |
| 69 | case RenderTargetFormat::R16_FLOAT: | 70 | case RenderTargetFormat::R16_FLOAT: |
| 71 | case RenderTargetFormat::RG8_UNORM: | ||
| 70 | case RenderTargetFormat::RG8_SNORM: | 72 | case RenderTargetFormat::RG8_SNORM: |
| 71 | return 2; | 73 | return 2; |
| 72 | case RenderTargetFormat::R8_UNORM: | 74 | case RenderTargetFormat::R8_UNORM: |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index e008d8f26..73abb7a18 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -20,6 +20,7 @@ enum class RenderTargetFormat : u32 { | |||
| 20 | NONE = 0x0, | 20 | NONE = 0x0, |
| 21 | RGBA32_FLOAT = 0xC0, | 21 | RGBA32_FLOAT = 0xC0, |
| 22 | RGBA32_UINT = 0xC2, | 22 | RGBA32_UINT = 0xC2, |
| 23 | RGBA16_UINT = 0xC9, | ||
| 23 | RGBA16_FLOAT = 0xCA, | 24 | RGBA16_FLOAT = 0xCA, |
| 24 | RG32_FLOAT = 0xCB, | 25 | RG32_FLOAT = 0xCB, |
| 25 | BGRA8_UNORM = 0xCF, | 26 | BGRA8_UNORM = 0xCF, |
| @@ -35,6 +36,7 @@ enum class RenderTargetFormat : u32 { | |||
| 35 | R11G11B10_FLOAT = 0xE0, | 36 | R11G11B10_FLOAT = 0xE0, |
| 36 | R32_FLOAT = 0xE5, | 37 | R32_FLOAT = 0xE5, |
| 37 | B5G6R5_UNORM = 0xE8, | 38 | B5G6R5_UNORM = 0xE8, |
| 39 | RG8_UNORM = 0xEA, | ||
| 38 | RG8_SNORM = 0xEB, | 40 | RG8_SNORM = 0xEB, |
| 39 | R16_UNORM = 0xEE, | 41 | R16_UNORM = 0xEE, |
| 40 | R16_SNORM = 0xEF, | 42 | R16_SNORM = 0xEF, |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 38a7b1413..52a649e2f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -36,30 +36,21 @@ MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); | |||
| 36 | MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); | 36 | MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); |
| 37 | MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); | 37 | MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); |
| 38 | 38 | ||
| 39 | RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window) : emu_window{window} { | 39 | RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window) |
| 40 | : emu_window{window}, stream_buffer(GL_ARRAY_BUFFER, STREAM_BUFFER_SIZE) { | ||
| 40 | // Create sampler objects | 41 | // Create sampler objects |
| 41 | for (size_t i = 0; i < texture_samplers.size(); ++i) { | 42 | for (size_t i = 0; i < texture_samplers.size(); ++i) { |
| 42 | texture_samplers[i].Create(); | 43 | texture_samplers[i].Create(); |
| 43 | state.texture_units[i].sampler = texture_samplers[i].sampler.handle; | 44 | state.texture_units[i].sampler = texture_samplers[i].sampler.handle; |
| 44 | } | 45 | } |
| 45 | 46 | ||
| 46 | // Create SSBOs | ||
| 47 | for (size_t stage = 0; stage < ssbos.size(); ++stage) { | ||
| 48 | for (size_t buffer = 0; buffer < ssbos[stage].size(); ++buffer) { | ||
| 49 | ssbos[stage][buffer].Create(); | ||
| 50 | state.draw.const_buffers[stage][buffer].ssbo = ssbos[stage][buffer].handle; | ||
| 51 | } | ||
| 52 | } | ||
| 53 | |||
| 54 | GLint ext_num; | 47 | GLint ext_num; |
| 55 | glGetIntegerv(GL_NUM_EXTENSIONS, &ext_num); | 48 | glGetIntegerv(GL_NUM_EXTENSIONS, &ext_num); |
| 56 | for (GLint i = 0; i < ext_num; i++) { | 49 | for (GLint i = 0; i < ext_num; i++) { |
| 57 | const std::string_view extension{ | 50 | const std::string_view extension{ |
| 58 | reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, i))}; | 51 | reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, i))}; |
| 59 | 52 | ||
| 60 | if (extension == "GL_ARB_buffer_storage") { | 53 | if (extension == "GL_ARB_direct_state_access") { |
| 61 | has_ARB_buffer_storage = true; | ||
| 62 | } else if (extension == "GL_ARB_direct_state_access") { | ||
| 63 | has_ARB_direct_state_access = true; | 54 | has_ARB_direct_state_access = true; |
| 64 | } else if (extension == "GL_ARB_separate_shader_objects") { | 55 | } else if (extension == "GL_ARB_separate_shader_objects") { |
| 65 | has_ARB_separate_shader_objects = true; | 56 | has_ARB_separate_shader_objects = true; |
| @@ -86,47 +77,31 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window) : emu_wind | |||
| 86 | 77 | ||
| 87 | hw_vao.Create(); | 78 | hw_vao.Create(); |
| 88 | 79 | ||
| 89 | stream_buffer = OGLStreamBuffer::MakeBuffer(has_ARB_buffer_storage, GL_ARRAY_BUFFER); | 80 | state.draw.vertex_buffer = stream_buffer.GetHandle(); |
| 90 | stream_buffer->Create(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE / 2); | ||
| 91 | state.draw.vertex_buffer = stream_buffer->GetHandle(); | ||
| 92 | 81 | ||
| 93 | shader_program_manager = std::make_unique<GLShader::ProgramManager>(); | 82 | shader_program_manager = std::make_unique<GLShader::ProgramManager>(); |
| 94 | state.draw.shader_program = 0; | 83 | state.draw.shader_program = 0; |
| 95 | state.draw.vertex_array = hw_vao.handle; | 84 | state.draw.vertex_array = hw_vao.handle; |
| 96 | state.Apply(); | 85 | state.Apply(); |
| 97 | 86 | ||
| 98 | glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer->GetHandle()); | 87 | glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer.GetHandle()); |
| 99 | |||
| 100 | for (unsigned index = 0; index < uniform_buffers.size(); ++index) { | ||
| 101 | auto& buffer = uniform_buffers[index]; | ||
| 102 | buffer.Create(); | ||
| 103 | glBindBuffer(GL_UNIFORM_BUFFER, buffer.handle); | ||
| 104 | glBufferData(GL_UNIFORM_BUFFER, sizeof(GLShader::MaxwellUniformData), nullptr, | ||
| 105 | GL_STREAM_COPY); | ||
| 106 | glBindBufferBase(GL_UNIFORM_BUFFER, index, buffer.handle); | ||
| 107 | } | ||
| 108 | 88 | ||
| 109 | glEnable(GL_BLEND); | 89 | glEnable(GL_BLEND); |
| 110 | 90 | ||
| 91 | glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment); | ||
| 92 | |||
| 111 | LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!"); | 93 | LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!"); |
| 112 | } | 94 | } |
| 113 | 95 | ||
| 114 | RasterizerOpenGL::~RasterizerOpenGL() { | 96 | RasterizerOpenGL::~RasterizerOpenGL() {} |
| 115 | if (stream_buffer != nullptr) { | ||
| 116 | state.draw.vertex_buffer = stream_buffer->GetHandle(); | ||
| 117 | state.Apply(); | ||
| 118 | stream_buffer->Release(); | ||
| 119 | } | ||
| 120 | } | ||
| 121 | 97 | ||
| 122 | std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, | 98 | std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, |
| 123 | GLintptr buffer_offset) { | 99 | GLintptr buffer_offset) { |
| 124 | MICROPROFILE_SCOPE(OpenGL_VAO); | 100 | MICROPROFILE_SCOPE(OpenGL_VAO); |
| 125 | const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; | 101 | const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; |
| 126 | const auto& memory_manager = Core::System::GetInstance().GPU().memory_manager; | ||
| 127 | 102 | ||
| 128 | state.draw.vertex_array = hw_vao.handle; | 103 | state.draw.vertex_array = hw_vao.handle; |
| 129 | state.draw.vertex_buffer = stream_buffer->GetHandle(); | 104 | state.draw.vertex_buffer = stream_buffer.GetHandle(); |
| 130 | state.Apply(); | 105 | state.Apply(); |
| 131 | 106 | ||
| 132 | // Upload all guest vertex arrays sequentially to our buffer | 107 | // Upload all guest vertex arrays sequentially to our buffer |
| @@ -141,16 +116,15 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, | |||
| 141 | ASSERT(end > start); | 116 | ASSERT(end > start); |
| 142 | u64 size = end - start + 1; | 117 | u64 size = end - start + 1; |
| 143 | 118 | ||
| 144 | // Copy vertex array data | 119 | GLintptr vertex_buffer_offset; |
| 145 | Memory::ReadBlock(*memory_manager->GpuToCpuAddress(start), array_ptr, size); | 120 | std::tie(array_ptr, buffer_offset, vertex_buffer_offset) = |
| 121 | UploadMemory(array_ptr, buffer_offset, start, size); | ||
| 146 | 122 | ||
| 147 | // Bind the vertex array to the buffer at the current offset. | 123 | // Bind the vertex array to the buffer at the current offset. |
| 148 | glBindVertexBuffer(index, stream_buffer->GetHandle(), buffer_offset, vertex_array.stride); | 124 | glBindVertexBuffer(index, stream_buffer.GetHandle(), vertex_buffer_offset, |
| 125 | vertex_array.stride); | ||
| 149 | 126 | ||
| 150 | ASSERT_MSG(vertex_array.divisor == 0, "Vertex buffer divisor unimplemented"); | 127 | ASSERT_MSG(vertex_array.divisor == 0, "Vertex buffer divisor unimplemented"); |
| 151 | |||
| 152 | array_ptr += size; | ||
| 153 | buffer_offset += size; | ||
| 154 | } | 128 | } |
| 155 | 129 | ||
| 156 | // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. | 130 | // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. |
| @@ -201,22 +175,12 @@ static GLShader::ProgramCode GetShaderProgramCode(Maxwell::ShaderProgram program | |||
| 201 | return program_code; | 175 | return program_code; |
| 202 | } | 176 | } |
| 203 | 177 | ||
| 204 | void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { | 178 | std::pair<u8*, GLintptr> RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { |
| 205 | // Helper function for uploading uniform data | ||
| 206 | const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) { | ||
| 207 | if (has_ARB_direct_state_access) { | ||
| 208 | glCopyNamedBufferSubData(stream_buffer->GetHandle(), handle, offset, 0, size); | ||
| 209 | } else { | ||
| 210 | glBindBuffer(GL_COPY_WRITE_BUFFER, handle); | ||
| 211 | glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_WRITE_BUFFER, offset, 0, size); | ||
| 212 | } | ||
| 213 | }; | ||
| 214 | |||
| 215 | auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); | 179 | auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); |
| 216 | 180 | ||
| 217 | // Next available bindpoints to use when uploading the const buffers and textures to the GLSL | 181 | // Next available bindpoints to use when uploading the const buffers and textures to the GLSL |
| 218 | // shaders. The constbuffer bindpoint starts after the shader stage configuration bind points. | 182 | // shaders. The constbuffer bindpoint starts after the shader stage configuration bind points. |
| 219 | u32 current_constbuffer_bindpoint = static_cast<u32>(uniform_buffers.size()); | 183 | u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; |
| 220 | u32 current_texture_bindpoint = 0; | 184 | u32 current_texture_bindpoint = 0; |
| 221 | 185 | ||
| 222 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | 186 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |
| @@ -228,22 +192,21 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { | |||
| 228 | continue; | 192 | continue; |
| 229 | } | 193 | } |
| 230 | 194 | ||
| 195 | std::tie(buffer_ptr, buffer_offset) = | ||
| 196 | AlignBuffer(buffer_ptr, buffer_offset, static_cast<size_t>(uniform_buffer_alignment)); | ||
| 197 | |||
| 231 | const size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5 | 198 | const size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5 |
| 232 | 199 | ||
| 233 | GLShader::MaxwellUniformData ubo{}; | 200 | GLShader::MaxwellUniformData ubo{}; |
| 234 | ubo.SetFromRegs(gpu.state.shader_stages[stage]); | 201 | ubo.SetFromRegs(gpu.state.shader_stages[stage]); |
| 235 | std::memcpy(buffer_ptr, &ubo, sizeof(ubo)); | 202 | std::memcpy(buffer_ptr, &ubo, sizeof(ubo)); |
| 236 | 203 | ||
| 237 | // Flush the buffer so that the GPU can see the data we just wrote. | 204 | // Bind the buffer |
| 238 | glFlushMappedBufferRange(GL_ARRAY_BUFFER, buffer_offset, sizeof(ubo)); | 205 | glBindBufferRange(GL_UNIFORM_BUFFER, stage, stream_buffer.GetHandle(), buffer_offset, |
| 239 | 206 | sizeof(ubo)); | |
| 240 | // Upload uniform data as one UBO per stage | ||
| 241 | const GLintptr ubo_offset = buffer_offset; | ||
| 242 | copy_buffer(uniform_buffers[stage].handle, ubo_offset, | ||
| 243 | sizeof(GLShader::MaxwellUniformData)); | ||
| 244 | 207 | ||
| 245 | buffer_ptr += sizeof(GLShader::MaxwellUniformData); | 208 | buffer_ptr += sizeof(ubo); |
| 246 | buffer_offset += sizeof(GLShader::MaxwellUniformData); | 209 | buffer_offset += sizeof(ubo); |
| 247 | 210 | ||
| 248 | GLShader::ShaderSetup setup{GetShaderProgramCode(program)}; | 211 | GLShader::ShaderSetup setup{GetShaderProgramCode(program)}; |
| 249 | GLShader::ShaderEntries shader_resources; | 212 | GLShader::ShaderEntries shader_resources; |
| @@ -282,9 +245,9 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { | |||
| 282 | static_cast<Maxwell::ShaderStage>(stage)); | 245 | static_cast<Maxwell::ShaderStage>(stage)); |
| 283 | 246 | ||
| 284 | // Configure the const buffers for this shader stage. | 247 | // Configure the const buffers for this shader stage. |
| 285 | current_constbuffer_bindpoint = | 248 | std::tie(buffer_ptr, buffer_offset, current_constbuffer_bindpoint) = SetupConstBuffers( |
| 286 | SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), gl_stage_program, | 249 | buffer_ptr, buffer_offset, static_cast<Maxwell::ShaderStage>(stage), gl_stage_program, |
| 287 | current_constbuffer_bindpoint, shader_resources.const_buffer_entries); | 250 | current_constbuffer_bindpoint, shader_resources.const_buffer_entries); |
| 288 | 251 | ||
| 289 | // Configure the textures for this shader stage. | 252 | // Configure the textures for this shader stage. |
| 290 | current_texture_bindpoint = | 253 | current_texture_bindpoint = |
| @@ -299,6 +262,8 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { | |||
| 299 | } | 262 | } |
| 300 | 263 | ||
| 301 | shader_program_manager->UseTrivialGeometryShader(); | 264 | shader_program_manager->UseTrivialGeometryShader(); |
| 265 | |||
| 266 | return {buffer_ptr, buffer_offset}; | ||
| 302 | } | 267 | } |
| 303 | 268 | ||
| 304 | size_t RasterizerOpenGL::CalculateVertexArraysSize() const { | 269 | size_t RasterizerOpenGL::CalculateVertexArraysSize() const { |
| @@ -432,6 +397,31 @@ void RasterizerOpenGL::Clear() { | |||
| 432 | } | 397 | } |
| 433 | } | 398 | } |
| 434 | 399 | ||
| 400 | std::pair<u8*, GLintptr> RasterizerOpenGL::AlignBuffer(u8* buffer_ptr, GLintptr buffer_offset, | ||
| 401 | size_t alignment) { | ||
| 402 | // Align the offset, not the mapped pointer | ||
| 403 | GLintptr offset_aligned = | ||
| 404 | static_cast<GLintptr>(Common::AlignUp(static_cast<size_t>(buffer_offset), alignment)); | ||
| 405 | return {buffer_ptr + (offset_aligned - buffer_offset), offset_aligned}; | ||
| 406 | } | ||
| 407 | |||
| 408 | std::tuple<u8*, GLintptr, GLintptr> RasterizerOpenGL::UploadMemory(u8* buffer_ptr, | ||
| 409 | GLintptr buffer_offset, | ||
| 410 | Tegra::GPUVAddr gpu_addr, | ||
| 411 | size_t size, size_t alignment) { | ||
| 412 | std::tie(buffer_ptr, buffer_offset) = AlignBuffer(buffer_ptr, buffer_offset, alignment); | ||
| 413 | GLintptr uploaded_offset = buffer_offset; | ||
| 414 | |||
| 415 | const auto& memory_manager = Core::System::GetInstance().GPU().memory_manager; | ||
| 416 | const boost::optional<VAddr> cpu_addr{memory_manager->GpuToCpuAddress(gpu_addr)}; | ||
| 417 | Memory::ReadBlock(*cpu_addr, buffer_ptr, size); | ||
| 418 | |||
| 419 | buffer_ptr += size; | ||
| 420 | buffer_offset += size; | ||
| 421 | |||
| 422 | return {buffer_ptr, buffer_offset, uploaded_offset}; | ||
| 423 | } | ||
| 424 | |||
| 435 | void RasterizerOpenGL::DrawArrays() { | 425 | void RasterizerOpenGL::DrawArrays() { |
| 436 | if (accelerate_draw == AccelDraw::Disabled) | 426 | if (accelerate_draw == AccelDraw::Disabled) |
| 437 | return; | 427 | return; |
| @@ -456,7 +446,7 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 456 | const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()}; | 446 | const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()}; |
| 457 | const unsigned vertex_num{is_indexed ? regs.index_array.count : regs.vertex_buffer.count}; | 447 | const unsigned vertex_num{is_indexed ? regs.index_array.count : regs.vertex_buffer.count}; |
| 458 | 448 | ||
| 459 | state.draw.vertex_buffer = stream_buffer->GetHandle(); | 449 | state.draw.vertex_buffer = stream_buffer.GetHandle(); |
| 460 | state.Apply(); | 450 | state.Apply(); |
| 461 | 451 | ||
| 462 | size_t buffer_size = CalculateVertexArraysSize(); | 452 | size_t buffer_size = CalculateVertexArraysSize(); |
| @@ -466,41 +456,31 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 466 | } | 456 | } |
| 467 | 457 | ||
| 468 | // Uniform space for the 5 shader stages | 458 | // Uniform space for the 5 shader stages |
| 469 | buffer_size = Common::AlignUp<size_t>(buffer_size, 4) + | 459 | buffer_size = |
| 470 | sizeof(GLShader::MaxwellUniformData) * Maxwell::MaxShaderStage; | 460 | Common::AlignUp<size_t>(buffer_size, 4) + |
| 461 | (sizeof(GLShader::MaxwellUniformData) + uniform_buffer_alignment) * Maxwell::MaxShaderStage; | ||
| 462 | |||
| 463 | // Add space for at least 18 constant buffers | ||
| 464 | buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment); | ||
| 471 | 465 | ||
| 472 | u8* buffer_ptr; | 466 | u8* buffer_ptr; |
| 473 | GLintptr buffer_offset; | 467 | GLintptr buffer_offset; |
| 474 | std::tie(buffer_ptr, buffer_offset) = | 468 | std::tie(buffer_ptr, buffer_offset, std::ignore) = |
| 475 | stream_buffer->Map(static_cast<GLsizeiptr>(buffer_size), 4); | 469 | stream_buffer.Map(static_cast<GLsizeiptr>(buffer_size), 4); |
| 470 | u8* buffer_ptr_base = buffer_ptr; | ||
| 476 | 471 | ||
| 477 | u8* offseted_buffer; | 472 | std::tie(buffer_ptr, buffer_offset) = SetupVertexArrays(buffer_ptr, buffer_offset); |
| 478 | std::tie(offseted_buffer, buffer_offset) = SetupVertexArrays(buffer_ptr, buffer_offset); | ||
| 479 | |||
| 480 | offseted_buffer = | ||
| 481 | reinterpret_cast<u8*>(Common::AlignUp(reinterpret_cast<size_t>(offseted_buffer), 4)); | ||
| 482 | buffer_offset = Common::AlignUp<size_t>(buffer_offset, 4); | ||
| 483 | 473 | ||
| 484 | // If indexed mode, copy the index buffer | 474 | // If indexed mode, copy the index buffer |
| 485 | GLintptr index_buffer_offset = 0; | 475 | GLintptr index_buffer_offset = 0; |
| 486 | if (is_indexed) { | 476 | if (is_indexed) { |
| 487 | const auto& memory_manager = Core::System::GetInstance().GPU().memory_manager; | 477 | std::tie(buffer_ptr, buffer_offset, index_buffer_offset) = UploadMemory( |
| 488 | const boost::optional<VAddr> index_data_addr{ | 478 | buffer_ptr, buffer_offset, regs.index_array.StartAddress(), index_buffer_size); |
| 489 | memory_manager->GpuToCpuAddress(regs.index_array.StartAddress())}; | ||
| 490 | Memory::ReadBlock(*index_data_addr, offseted_buffer, index_buffer_size); | ||
| 491 | |||
| 492 | index_buffer_offset = buffer_offset; | ||
| 493 | offseted_buffer += index_buffer_size; | ||
| 494 | buffer_offset += index_buffer_size; | ||
| 495 | } | 479 | } |
| 496 | 480 | ||
| 497 | offseted_buffer = | 481 | std::tie(buffer_ptr, buffer_offset) = SetupShaders(buffer_ptr, buffer_offset); |
| 498 | reinterpret_cast<u8*>(Common::AlignUp(reinterpret_cast<size_t>(offseted_buffer), 4)); | ||
| 499 | buffer_offset = Common::AlignUp<size_t>(buffer_offset, 4); | ||
| 500 | |||
| 501 | SetupShaders(offseted_buffer, buffer_offset); | ||
| 502 | 482 | ||
| 503 | stream_buffer->Unmap(); | 483 | stream_buffer.Unmap(buffer_ptr - buffer_ptr_base); |
| 504 | 484 | ||
| 505 | shader_program_manager->ApplyTo(state); | 485 | shader_program_manager->ApplyTo(state); |
| 506 | state.Apply(); | 486 | state.Apply(); |
| @@ -647,36 +627,23 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr | |||
| 647 | } | 627 | } |
| 648 | } | 628 | } |
| 649 | 629 | ||
| 650 | u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint program, | 630 | std::tuple<u8*, GLintptr, u32> RasterizerOpenGL::SetupConstBuffers( |
| 651 | u32 current_bindpoint, | 631 | u8* buffer_ptr, GLintptr buffer_offset, Maxwell::ShaderStage stage, GLuint program, |
| 652 | const std::vector<GLShader::ConstBufferEntry>& entries) { | 632 | u32 current_bindpoint, const std::vector<GLShader::ConstBufferEntry>& entries) { |
| 653 | const auto& gpu = Core::System::GetInstance().GPU(); | 633 | const auto& gpu = Core::System::GetInstance().GPU(); |
| 654 | const auto& maxwell3d = gpu.Maxwell3D(); | 634 | const auto& maxwell3d = gpu.Maxwell3D(); |
| 655 | 635 | ||
| 656 | // Reset all buffer draw state for this stage. | ||
| 657 | for (auto& buffer : state.draw.const_buffers[static_cast<size_t>(stage)]) { | ||
| 658 | buffer.bindpoint = 0; | ||
| 659 | buffer.enabled = false; | ||
| 660 | } | ||
| 661 | |||
| 662 | // Upload only the enabled buffers from the 16 constbuffers of each shader stage | 636 | // Upload only the enabled buffers from the 16 constbuffers of each shader stage |
| 663 | const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<size_t>(stage)]; | 637 | const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<size_t>(stage)]; |
| 664 | 638 | ||
| 665 | for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { | 639 | for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { |
| 666 | const auto& used_buffer = entries[bindpoint]; | 640 | const auto& used_buffer = entries[bindpoint]; |
| 667 | const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()]; | 641 | const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()]; |
| 668 | auto& buffer_draw_state = | ||
| 669 | state.draw.const_buffers[static_cast<size_t>(stage)][used_buffer.GetIndex()]; | ||
| 670 | 642 | ||
| 671 | if (!buffer.enabled) { | 643 | if (!buffer.enabled) { |
| 672 | continue; | 644 | continue; |
| 673 | } | 645 | } |
| 674 | 646 | ||
| 675 | buffer_draw_state.enabled = true; | ||
| 676 | buffer_draw_state.bindpoint = current_bindpoint + bindpoint; | ||
| 677 | |||
| 678 | boost::optional<VAddr> addr = gpu.memory_manager->GpuToCpuAddress(buffer.address); | ||
| 679 | |||
| 680 | size_t size = 0; | 647 | size_t size = 0; |
| 681 | 648 | ||
| 682 | if (used_buffer.IsIndirect()) { | 649 | if (used_buffer.IsIndirect()) { |
| @@ -698,25 +665,26 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr | |||
| 698 | size = Common::AlignUp(size, sizeof(GLvec4)); | 665 | size = Common::AlignUp(size, sizeof(GLvec4)); |
| 699 | ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big"); | 666 | ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big"); |
| 700 | 667 | ||
| 701 | std::vector<u8> data(size); | 668 | GLintptr const_buffer_offset; |
| 702 | Memory::ReadBlock(*addr, data.data(), data.size()); | 669 | std::tie(buffer_ptr, buffer_offset, const_buffer_offset) = |
| 670 | UploadMemory(buffer_ptr, buffer_offset, buffer.address, size, | ||
| 671 | static_cast<size_t>(uniform_buffer_alignment)); | ||
| 703 | 672 | ||
| 704 | glBindBuffer(GL_UNIFORM_BUFFER, buffer_draw_state.ssbo); | 673 | glBindBufferRange(GL_UNIFORM_BUFFER, current_bindpoint + bindpoint, |
| 705 | glBufferData(GL_UNIFORM_BUFFER, data.size(), data.data(), GL_DYNAMIC_DRAW); | 674 | stream_buffer.GetHandle(), const_buffer_offset, size); |
| 706 | glBindBuffer(GL_UNIFORM_BUFFER, 0); | ||
| 707 | 675 | ||
| 708 | // Now configure the bindpoint of the buffer inside the shader | 676 | // Now configure the bindpoint of the buffer inside the shader |
| 709 | const std::string buffer_name = used_buffer.GetName(); | 677 | const std::string buffer_name = used_buffer.GetName(); |
| 710 | const GLuint index = | 678 | const GLuint index = |
| 711 | glGetProgramResourceIndex(program, GL_UNIFORM_BLOCK, buffer_name.c_str()); | 679 | glGetProgramResourceIndex(program, GL_UNIFORM_BLOCK, buffer_name.c_str()); |
| 712 | if (index != GL_INVALID_INDEX) { | 680 | if (index != GL_INVALID_INDEX) { |
| 713 | glUniformBlockBinding(program, index, buffer_draw_state.bindpoint); | 681 | glUniformBlockBinding(program, index, current_bindpoint + bindpoint); |
| 714 | } | 682 | } |
| 715 | } | 683 | } |
| 716 | 684 | ||
| 717 | state.Apply(); | 685 | state.Apply(); |
| 718 | 686 | ||
| 719 | return current_bindpoint + static_cast<u32>(entries.size()); | 687 | return {buffer_ptr, buffer_offset, current_bindpoint + static_cast<u32>(entries.size())}; |
| 720 | } | 688 | } |
| 721 | 689 | ||
| 722 | u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program, u32 current_unit, | 690 | u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program, u32 current_unit, |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index bd01dc0ae..74307f626 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <cstddef> | 8 | #include <cstddef> |
| 9 | #include <memory> | 9 | #include <memory> |
| 10 | #include <tuple> | ||
| 10 | #include <utility> | 11 | #include <utility> |
| 11 | #include <vector> | 12 | #include <vector> |
| 12 | #include <glad/glad.h> | 13 | #include <glad/glad.h> |
| @@ -100,9 +101,10 @@ private: | |||
| 100 | * @param entries Vector describing the buffers that are actually used in the guest shader. | 101 | * @param entries Vector describing the buffers that are actually used in the guest shader. |
| 101 | * @returns The next available bindpoint for use in the next shader stage. | 102 | * @returns The next available bindpoint for use in the next shader stage. |
| 102 | */ | 103 | */ |
| 103 | u32 SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, GLuint program, | 104 | std::tuple<u8*, GLintptr, u32> SetupConstBuffers( |
| 104 | u32 current_bindpoint, | 105 | u8* buffer_ptr, GLintptr buffer_offset, Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, |
| 105 | const std::vector<GLShader::ConstBufferEntry>& entries); | 106 | GLuint program, u32 current_bindpoint, |
| 107 | const std::vector<GLShader::ConstBufferEntry>& entries); | ||
| 106 | 108 | ||
| 107 | /* | 109 | /* |
| 108 | * Configures the current textures to use for the draw command. | 110 | * Configures the current textures to use for the draw command. |
| @@ -139,7 +141,6 @@ private: | |||
| 139 | /// Syncs the blend state to match the guest state | 141 | /// Syncs the blend state to match the guest state |
| 140 | void SyncBlendState(); | 142 | void SyncBlendState(); |
| 141 | 143 | ||
| 142 | bool has_ARB_buffer_storage = false; | ||
| 143 | bool has_ARB_direct_state_access = false; | 144 | bool has_ARB_direct_state_access = false; |
| 144 | bool has_ARB_separate_shader_objects = false; | 145 | bool has_ARB_separate_shader_objects = false; |
| 145 | bool has_ARB_vertex_attrib_binding = false; | 146 | bool has_ARB_vertex_attrib_binding = false; |
| @@ -155,22 +156,24 @@ private: | |||
| 155 | OGLVertexArray hw_vao; | 156 | OGLVertexArray hw_vao; |
| 156 | 157 | ||
| 157 | std::array<SamplerInfo, GLShader::NumTextureSamplers> texture_samplers; | 158 | std::array<SamplerInfo, GLShader::NumTextureSamplers> texture_samplers; |
| 158 | std::array<std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers>, | ||
| 159 | Tegra::Engines::Maxwell3D::Regs::MaxShaderStage> | ||
| 160 | ssbos; | ||
| 161 | 159 | ||
| 162 | static constexpr size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; | 160 | static constexpr size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; |
| 163 | std::unique_ptr<OGLStreamBuffer> stream_buffer; | 161 | OGLStreamBuffer stream_buffer; |
| 164 | OGLBuffer uniform_buffer; | 162 | OGLBuffer uniform_buffer; |
| 165 | OGLFramebuffer framebuffer; | 163 | OGLFramebuffer framebuffer; |
| 164 | GLint uniform_buffer_alignment; | ||
| 166 | 165 | ||
| 167 | size_t CalculateVertexArraysSize() const; | 166 | size_t CalculateVertexArraysSize() const; |
| 168 | 167 | ||
| 169 | std::pair<u8*, GLintptr> SetupVertexArrays(u8* array_ptr, GLintptr buffer_offset); | 168 | std::pair<u8*, GLintptr> SetupVertexArrays(u8* array_ptr, GLintptr buffer_offset); |
| 170 | 169 | ||
| 171 | std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxShaderStage> uniform_buffers; | 170 | std::pair<u8*, GLintptr> SetupShaders(u8* buffer_ptr, GLintptr buffer_offset); |
| 172 | 171 | ||
| 173 | void SetupShaders(u8* buffer_ptr, GLintptr buffer_offset); | 172 | std::pair<u8*, GLintptr> AlignBuffer(u8* buffer_ptr, GLintptr buffer_offset, size_t alignment); |
| 173 | |||
| 174 | std::tuple<u8*, GLintptr, GLintptr> UploadMemory(u8* buffer_ptr, GLintptr buffer_offset, | ||
| 175 | Tegra::GPUVAddr gpu_addr, size_t size, | ||
| 176 | size_t alignment = 4); | ||
| 174 | 177 | ||
| 175 | enum class AccelDraw { Disabled, Arrays, Indexed }; | 178 | enum class AccelDraw { Disabled, Arrays, Indexed }; |
| 176 | AccelDraw accelerate_draw = AccelDraw::Disabled; | 179 | AccelDraw accelerate_draw = AccelDraw::Disabled; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 84c250c63..d635550d2 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | |||
| @@ -101,6 +101,7 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form | |||
| 101 | {GL_R8, GL_RED, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // R8 | 101 | {GL_R8, GL_RED, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // R8 |
| 102 | {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // R8UI | 102 | {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // R8UI |
| 103 | {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, ComponentType::Float, false}, // RGBA16F | 103 | {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, ComponentType::Float, false}, // RGBA16F |
| 104 | {GL_RGBA16UI, GL_RGBA, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RGBA16UI | ||
| 104 | {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float, | 105 | {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float, |
| 105 | false}, // R11FG11FB10F | 106 | false}, // R11FG11FB10F |
| 106 | {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RGBA32UI | 107 | {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RGBA32UI |
| @@ -134,6 +135,7 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form | |||
| 134 | {GL_RG16_SNORM, GL_RG, GL_SHORT, ComponentType::SNorm, false}, // RG16S | 135 | {GL_RG16_SNORM, GL_RG, GL_SHORT, ComponentType::SNorm, false}, // RG16S |
| 135 | {GL_RGB32F, GL_RGB, GL_FLOAT, ComponentType::Float, false}, // RGB32F | 136 | {GL_RGB32F, GL_RGB, GL_FLOAT, ComponentType::Float, false}, // RGB32F |
| 136 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // SRGBA8 | 137 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // SRGBA8 |
| 138 | {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // RG8U | ||
| 137 | {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S | 139 | {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S |
| 138 | 140 | ||
| 139 | // DepthStencil formats | 141 | // DepthStencil formats |
| @@ -234,32 +236,57 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, std::vector<u8>& gl_bu | |||
| 234 | static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPUVAddr), | 236 | static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPUVAddr), |
| 235 | SurfaceParams::MaxPixelFormat> | 237 | SurfaceParams::MaxPixelFormat> |
| 236 | morton_to_gl_fns = { | 238 | morton_to_gl_fns = { |
| 237 | MortonCopy<true, PixelFormat::ABGR8U>, MortonCopy<true, PixelFormat::ABGR8S>, | 239 | // clang-format off |
| 238 | MortonCopy<true, PixelFormat::B5G6R5>, MortonCopy<true, PixelFormat::A2B10G10R10>, | 240 | MortonCopy<true, PixelFormat::ABGR8U>, |
| 239 | MortonCopy<true, PixelFormat::A1B5G5R5>, MortonCopy<true, PixelFormat::R8>, | 241 | MortonCopy<true, PixelFormat::ABGR8S>, |
| 240 | MortonCopy<true, PixelFormat::R8UI>, MortonCopy<true, PixelFormat::RGBA16F>, | 242 | MortonCopy<true, PixelFormat::B5G6R5>, |
| 241 | MortonCopy<true, PixelFormat::R11FG11FB10F>, MortonCopy<true, PixelFormat::RGBA32UI>, | 243 | MortonCopy<true, PixelFormat::A2B10G10R10>, |
| 242 | MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>, | 244 | MortonCopy<true, PixelFormat::A1B5G5R5>, |
| 243 | MortonCopy<true, PixelFormat::DXT45>, MortonCopy<true, PixelFormat::DXN1>, | 245 | MortonCopy<true, PixelFormat::R8>, |
| 244 | MortonCopy<true, PixelFormat::DXN2UNORM>, MortonCopy<true, PixelFormat::DXN2SNORM>, | 246 | MortonCopy<true, PixelFormat::R8UI>, |
| 245 | MortonCopy<true, PixelFormat::BC7U>, MortonCopy<true, PixelFormat::ASTC_2D_4X4>, | 247 | MortonCopy<true, PixelFormat::RGBA16F>, |
| 246 | MortonCopy<true, PixelFormat::G8R8>, MortonCopy<true, PixelFormat::BGRA8>, | 248 | MortonCopy<true, PixelFormat::RGBA16UI>, |
| 247 | MortonCopy<true, PixelFormat::RGBA32F>, MortonCopy<true, PixelFormat::RG32F>, | 249 | MortonCopy<true, PixelFormat::R11FG11FB10F>, |
| 248 | MortonCopy<true, PixelFormat::R32F>, MortonCopy<true, PixelFormat::R16F>, | 250 | MortonCopy<true, PixelFormat::RGBA32UI>, |
| 249 | MortonCopy<true, PixelFormat::R16UNORM>, MortonCopy<true, PixelFormat::R16S>, | 251 | MortonCopy<true, PixelFormat::DXT1>, |
| 250 | MortonCopy<true, PixelFormat::R16UI>, MortonCopy<true, PixelFormat::R16I>, | 252 | MortonCopy<true, PixelFormat::DXT23>, |
| 251 | MortonCopy<true, PixelFormat::RG16>, MortonCopy<true, PixelFormat::RG16F>, | 253 | MortonCopy<true, PixelFormat::DXT45>, |
| 252 | MortonCopy<true, PixelFormat::RG16UI>, MortonCopy<true, PixelFormat::RG16I>, | 254 | MortonCopy<true, PixelFormat::DXN1>, |
| 253 | MortonCopy<true, PixelFormat::RG16S>, MortonCopy<true, PixelFormat::RGB32F>, | 255 | MortonCopy<true, PixelFormat::DXN2UNORM>, |
| 254 | MortonCopy<true, PixelFormat::SRGBA8>, MortonCopy<true, PixelFormat::RG8S>, | 256 | MortonCopy<true, PixelFormat::DXN2SNORM>, |
| 255 | MortonCopy<true, PixelFormat::Z24S8>, MortonCopy<true, PixelFormat::S8Z24>, | 257 | MortonCopy<true, PixelFormat::BC7U>, |
| 256 | MortonCopy<true, PixelFormat::Z32F>, MortonCopy<true, PixelFormat::Z16>, | 258 | MortonCopy<true, PixelFormat::ASTC_2D_4X4>, |
| 259 | MortonCopy<true, PixelFormat::G8R8>, | ||
| 260 | MortonCopy<true, PixelFormat::BGRA8>, | ||
| 261 | MortonCopy<true, PixelFormat::RGBA32F>, | ||
| 262 | MortonCopy<true, PixelFormat::RG32F>, | ||
| 263 | MortonCopy<true, PixelFormat::R32F>, | ||
| 264 | MortonCopy<true, PixelFormat::R16F>, | ||
| 265 | MortonCopy<true, PixelFormat::R16UNORM>, | ||
| 266 | MortonCopy<true, PixelFormat::R16S>, | ||
| 267 | MortonCopy<true, PixelFormat::R16UI>, | ||
| 268 | MortonCopy<true, PixelFormat::R16I>, | ||
| 269 | MortonCopy<true, PixelFormat::RG16>, | ||
| 270 | MortonCopy<true, PixelFormat::RG16F>, | ||
| 271 | MortonCopy<true, PixelFormat::RG16UI>, | ||
| 272 | MortonCopy<true, PixelFormat::RG16I>, | ||
| 273 | MortonCopy<true, PixelFormat::RG16S>, | ||
| 274 | MortonCopy<true, PixelFormat::RGB32F>, | ||
| 275 | MortonCopy<true, PixelFormat::SRGBA8>, | ||
| 276 | MortonCopy<true, PixelFormat::RG8U>, | ||
| 277 | MortonCopy<true, PixelFormat::RG8S>, | ||
| 278 | MortonCopy<true, PixelFormat::Z24S8>, | ||
| 279 | MortonCopy<true, PixelFormat::S8Z24>, | ||
| 280 | MortonCopy<true, PixelFormat::Z32F>, | ||
| 281 | MortonCopy<true, PixelFormat::Z16>, | ||
| 257 | MortonCopy<true, PixelFormat::Z32FS8>, | 282 | MortonCopy<true, PixelFormat::Z32FS8>, |
| 283 | // clang-format on | ||
| 258 | }; | 284 | }; |
| 259 | 285 | ||
| 260 | static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPUVAddr), | 286 | static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPUVAddr), |
| 261 | SurfaceParams::MaxPixelFormat> | 287 | SurfaceParams::MaxPixelFormat> |
| 262 | gl_to_morton_fns = { | 288 | gl_to_morton_fns = { |
| 289 | // clang-format off | ||
| 263 | MortonCopy<false, PixelFormat::ABGR8U>, | 290 | MortonCopy<false, PixelFormat::ABGR8U>, |
| 264 | MortonCopy<false, PixelFormat::ABGR8S>, | 291 | MortonCopy<false, PixelFormat::ABGR8S>, |
| 265 | MortonCopy<false, PixelFormat::B5G6R5>, | 292 | MortonCopy<false, PixelFormat::B5G6R5>, |
| @@ -268,6 +295,7 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU | |||
| 268 | MortonCopy<false, PixelFormat::R8>, | 295 | MortonCopy<false, PixelFormat::R8>, |
| 269 | MortonCopy<false, PixelFormat::R8UI>, | 296 | MortonCopy<false, PixelFormat::R8UI>, |
| 270 | MortonCopy<false, PixelFormat::RGBA16F>, | 297 | MortonCopy<false, PixelFormat::RGBA16F>, |
| 298 | MortonCopy<false, PixelFormat::RGBA16UI>, | ||
| 271 | MortonCopy<false, PixelFormat::R11FG11FB10F>, | 299 | MortonCopy<false, PixelFormat::R11FG11FB10F>, |
| 272 | MortonCopy<false, PixelFormat::RGBA32UI>, | 300 | MortonCopy<false, PixelFormat::RGBA32UI>, |
| 273 | // TODO(Subv): Swizzling DXT1/DXT23/DXT45/DXN1/DXN2/BC7U/ASTC_2D_4X4 formats is not | 301 | // TODO(Subv): Swizzling DXT1/DXT23/DXT45/DXN1/DXN2/BC7U/ASTC_2D_4X4 formats is not |
| @@ -297,12 +325,14 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU | |||
| 297 | MortonCopy<false, PixelFormat::RG16S>, | 325 | MortonCopy<false, PixelFormat::RG16S>, |
| 298 | MortonCopy<false, PixelFormat::RGB32F>, | 326 | MortonCopy<false, PixelFormat::RGB32F>, |
| 299 | MortonCopy<false, PixelFormat::SRGBA8>, | 327 | MortonCopy<false, PixelFormat::SRGBA8>, |
| 328 | MortonCopy<false, PixelFormat::RG8U>, | ||
| 300 | MortonCopy<false, PixelFormat::RG8S>, | 329 | MortonCopy<false, PixelFormat::RG8S>, |
| 301 | MortonCopy<false, PixelFormat::Z24S8>, | 330 | MortonCopy<false, PixelFormat::Z24S8>, |
| 302 | MortonCopy<false, PixelFormat::S8Z24>, | 331 | MortonCopy<false, PixelFormat::S8Z24>, |
| 303 | MortonCopy<false, PixelFormat::Z32F>, | 332 | MortonCopy<false, PixelFormat::Z32F>, |
| 304 | MortonCopy<false, PixelFormat::Z16>, | 333 | MortonCopy<false, PixelFormat::Z16>, |
| 305 | MortonCopy<false, PixelFormat::Z32FS8>, | 334 | MortonCopy<false, PixelFormat::Z32FS8>, |
| 335 | // clang-format on | ||
| 306 | }; | 336 | }; |
| 307 | 337 | ||
| 308 | // Allocate an uninitialized texture of appropriate size and format for the surface | 338 | // Allocate an uninitialized texture of appropriate size and format for the surface |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 202257b58..4ab74342e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h | |||
| @@ -31,43 +31,45 @@ struct SurfaceParams { | |||
| 31 | R8 = 5, | 31 | R8 = 5, |
| 32 | R8UI = 6, | 32 | R8UI = 6, |
| 33 | RGBA16F = 7, | 33 | RGBA16F = 7, |
| 34 | R11FG11FB10F = 8, | 34 | RGBA16UI = 8, |
| 35 | RGBA32UI = 9, | 35 | R11FG11FB10F = 9, |
| 36 | DXT1 = 10, | 36 | RGBA32UI = 10, |
| 37 | DXT23 = 11, | 37 | DXT1 = 11, |
| 38 | DXT45 = 12, | 38 | DXT23 = 12, |
| 39 | DXN1 = 13, // This is also known as BC4 | 39 | DXT45 = 13, |
| 40 | DXN2UNORM = 14, | 40 | DXN1 = 14, // This is also known as BC4 |
| 41 | DXN2SNORM = 15, | 41 | DXN2UNORM = 15, |
| 42 | BC7U = 16, | 42 | DXN2SNORM = 16, |
| 43 | ASTC_2D_4X4 = 17, | 43 | BC7U = 17, |
| 44 | G8R8 = 18, | 44 | ASTC_2D_4X4 = 18, |
| 45 | BGRA8 = 19, | 45 | G8R8 = 19, |
| 46 | RGBA32F = 20, | 46 | BGRA8 = 20, |
| 47 | RG32F = 21, | 47 | RGBA32F = 21, |
| 48 | R32F = 22, | 48 | RG32F = 22, |
| 49 | R16F = 23, | 49 | R32F = 23, |
| 50 | R16UNORM = 24, | 50 | R16F = 24, |
| 51 | R16S = 25, | 51 | R16UNORM = 25, |
| 52 | R16UI = 26, | 52 | R16S = 26, |
| 53 | R16I = 27, | 53 | R16UI = 27, |
| 54 | RG16 = 28, | 54 | R16I = 28, |
| 55 | RG16F = 29, | 55 | RG16 = 29, |
| 56 | RG16UI = 30, | 56 | RG16F = 30, |
| 57 | RG16I = 31, | 57 | RG16UI = 31, |
| 58 | RG16S = 32, | 58 | RG16I = 32, |
| 59 | RGB32F = 33, | 59 | RG16S = 33, |
| 60 | SRGBA8 = 34, | 60 | RGB32F = 34, |
| 61 | RG8S = 35, | 61 | SRGBA8 = 35, |
| 62 | RG8U = 36, | ||
| 63 | RG8S = 37, | ||
| 62 | 64 | ||
| 63 | MaxColorFormat, | 65 | MaxColorFormat, |
| 64 | 66 | ||
| 65 | // DepthStencil formats | 67 | // DepthStencil formats |
| 66 | Z24S8 = 36, | 68 | Z24S8 = 38, |
| 67 | S8Z24 = 37, | 69 | S8Z24 = 39, |
| 68 | Z32F = 38, | 70 | Z32F = 40, |
| 69 | Z16 = 39, | 71 | Z16 = 41, |
| 70 | Z32FS8 = 40, | 72 | Z32FS8 = 42, |
| 71 | 73 | ||
| 72 | MaxDepthStencilFormat, | 74 | MaxDepthStencilFormat, |
| 73 | 75 | ||
| @@ -113,6 +115,7 @@ struct SurfaceParams { | |||
| 113 | 1, // R8 | 115 | 1, // R8 |
| 114 | 1, // R8UI | 116 | 1, // R8UI |
| 115 | 1, // RGBA16F | 117 | 1, // RGBA16F |
| 118 | 1, // RGBA16UI | ||
| 116 | 1, // R11FG11FB10F | 119 | 1, // R11FG11FB10F |
| 117 | 1, // RGBA32UI | 120 | 1, // RGBA32UI |
| 118 | 4, // DXT1 | 121 | 4, // DXT1 |
| @@ -140,6 +143,7 @@ struct SurfaceParams { | |||
| 140 | 1, // RG16S | 143 | 1, // RG16S |
| 141 | 1, // RGB32F | 144 | 1, // RGB32F |
| 142 | 1, // SRGBA8 | 145 | 1, // SRGBA8 |
| 146 | 1, // RG8U | ||
| 143 | 1, // RG8S | 147 | 1, // RG8S |
| 144 | 1, // Z24S8 | 148 | 1, // Z24S8 |
| 145 | 1, // S8Z24 | 149 | 1, // S8Z24 |
| @@ -165,6 +169,7 @@ struct SurfaceParams { | |||
| 165 | 8, // R8 | 169 | 8, // R8 |
| 166 | 8, // R8UI | 170 | 8, // R8UI |
| 167 | 64, // RGBA16F | 171 | 64, // RGBA16F |
| 172 | 64, // RGBA16UI | ||
| 168 | 32, // R11FG11FB10F | 173 | 32, // R11FG11FB10F |
| 169 | 128, // RGBA32UI | 174 | 128, // RGBA32UI |
| 170 | 64, // DXT1 | 175 | 64, // DXT1 |
| @@ -192,6 +197,7 @@ struct SurfaceParams { | |||
| 192 | 32, // RG16S | 197 | 32, // RG16S |
| 193 | 96, // RGB32F | 198 | 96, // RGB32F |
| 194 | 32, // SRGBA8 | 199 | 32, // SRGBA8 |
| 200 | 16, // RG8U | ||
| 195 | 16, // RG8S | 201 | 16, // RG8S |
| 196 | 32, // Z24S8 | 202 | 32, // Z24S8 |
| 197 | 32, // S8Z24 | 203 | 32, // S8Z24 |
| @@ -241,6 +247,8 @@ struct SurfaceParams { | |||
| 241 | return PixelFormat::A2B10G10R10; | 247 | return PixelFormat::A2B10G10R10; |
| 242 | case Tegra::RenderTargetFormat::RGBA16_FLOAT: | 248 | case Tegra::RenderTargetFormat::RGBA16_FLOAT: |
| 243 | return PixelFormat::RGBA16F; | 249 | return PixelFormat::RGBA16F; |
| 250 | case Tegra::RenderTargetFormat::RGBA16_UINT: | ||
| 251 | return PixelFormat::RGBA16UI; | ||
| 244 | case Tegra::RenderTargetFormat::RGBA32_FLOAT: | 252 | case Tegra::RenderTargetFormat::RGBA32_FLOAT: |
| 245 | return PixelFormat::RGBA32F; | 253 | return PixelFormat::RGBA32F; |
| 246 | case Tegra::RenderTargetFormat::RG32_FLOAT: | 254 | case Tegra::RenderTargetFormat::RG32_FLOAT: |
| @@ -265,6 +273,8 @@ struct SurfaceParams { | |||
| 265 | return PixelFormat::RG16; | 273 | return PixelFormat::RG16; |
| 266 | case Tegra::RenderTargetFormat::RG16_SNORM: | 274 | case Tegra::RenderTargetFormat::RG16_SNORM: |
| 267 | return PixelFormat::RG16S; | 275 | return PixelFormat::RG16S; |
| 276 | case Tegra::RenderTargetFormat::RG8_UNORM: | ||
| 277 | return PixelFormat::RG8U; | ||
| 268 | case Tegra::RenderTargetFormat::RG8_SNORM: | 278 | case Tegra::RenderTargetFormat::RG8_SNORM: |
| 269 | return PixelFormat::RG8S; | 279 | return PixelFormat::RG8S; |
| 270 | case Tegra::RenderTargetFormat::R16_FLOAT: | 280 | case Tegra::RenderTargetFormat::R16_FLOAT: |
| @@ -432,6 +442,7 @@ struct SurfaceParams { | |||
| 432 | case Tegra::RenderTargetFormat::RG16_UNORM: | 442 | case Tegra::RenderTargetFormat::RG16_UNORM: |
| 433 | case Tegra::RenderTargetFormat::R16_UNORM: | 443 | case Tegra::RenderTargetFormat::R16_UNORM: |
| 434 | case Tegra::RenderTargetFormat::B5G6R5_UNORM: | 444 | case Tegra::RenderTargetFormat::B5G6R5_UNORM: |
| 445 | case Tegra::RenderTargetFormat::RG8_UNORM: | ||
| 435 | return ComponentType::UNorm; | 446 | return ComponentType::UNorm; |
| 436 | case Tegra::RenderTargetFormat::RGBA8_SNORM: | 447 | case Tegra::RenderTargetFormat::RGBA8_SNORM: |
| 437 | case Tegra::RenderTargetFormat::RG16_SNORM: | 448 | case Tegra::RenderTargetFormat::RG16_SNORM: |
| @@ -447,6 +458,7 @@ struct SurfaceParams { | |||
| 447 | case Tegra::RenderTargetFormat::R32_FLOAT: | 458 | case Tegra::RenderTargetFormat::R32_FLOAT: |
| 448 | return ComponentType::Float; | 459 | return ComponentType::Float; |
| 449 | case Tegra::RenderTargetFormat::RGBA32_UINT: | 460 | case Tegra::RenderTargetFormat::RGBA32_UINT: |
| 461 | case Tegra::RenderTargetFormat::RGBA16_UINT: | ||
| 450 | case Tegra::RenderTargetFormat::RG16_UINT: | 462 | case Tegra::RenderTargetFormat::RG16_UINT: |
| 451 | case Tegra::RenderTargetFormat::R8_UINT: | 463 | case Tegra::RenderTargetFormat::R8_UINT: |
| 452 | case Tegra::RenderTargetFormat::R16_UINT: | 464 | case Tegra::RenderTargetFormat::R16_UINT: |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index d21daf28a..6834d7085 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -356,13 +356,13 @@ public: | |||
| 356 | * @param reg The register to use as the source value. | 356 | * @param reg The register to use as the source value. |
| 357 | */ | 357 | */ |
| 358 | void SetOutputAttributeToRegister(Attribute::Index attribute, u64 elem, const Register& reg) { | 358 | void SetOutputAttributeToRegister(Attribute::Index attribute, u64 elem, const Register& reg) { |
| 359 | std::string dest = GetOutputAttribute(attribute) + GetSwizzle(elem); | 359 | std::string dest = GetOutputAttribute(attribute); |
| 360 | std::string src = GetRegisterAsFloat(reg); | 360 | std::string src = GetRegisterAsFloat(reg); |
| 361 | 361 | ||
| 362 | if (!dest.empty()) { | 362 | if (!dest.empty()) { |
| 363 | // Can happen with unknown/unimplemented output attributes, in which case we ignore the | 363 | // Can happen with unknown/unimplemented output attributes, in which case we ignore the |
| 364 | // instruction for now. | 364 | // instruction for now. |
| 365 | shader.AddLine(dest + " = " + src + ';'); | 365 | shader.AddLine(dest + GetSwizzle(elem) + " = " + src + ';'); |
| 366 | } | 366 | } |
| 367 | } | 367 | } |
| 368 | 368 | ||
| @@ -376,6 +376,8 @@ public: | |||
| 376 | return value; | 376 | return value; |
| 377 | } else if (type == GLSLRegister::Type::Integer) { | 377 | } else if (type == GLSLRegister::Type::Integer) { |
| 378 | return "floatBitsToInt(" + value + ')'; | 378 | return "floatBitsToInt(" + value + ')'; |
| 379 | } else if (type == GLSLRegister::Type::UnsignedInteger) { | ||
| 380 | return "floatBitsToUint(" + value + ')'; | ||
| 379 | } else { | 381 | } else { |
| 380 | UNREACHABLE(); | 382 | UNREACHABLE(); |
| 381 | } | 383 | } |
| @@ -1630,6 +1632,99 @@ private: | |||
| 1630 | } | 1632 | } |
| 1631 | break; | 1633 | break; |
| 1632 | } | 1634 | } |
| 1635 | case OpCode::Type::Xmad: { | ||
| 1636 | ASSERT_MSG(!instr.xmad.sign_a, "Unimplemented"); | ||
| 1637 | ASSERT_MSG(!instr.xmad.sign_b, "Unimplemented"); | ||
| 1638 | |||
| 1639 | std::string op_a{regs.GetRegisterAsInteger(instr.gpr8, 0, instr.xmad.sign_a)}; | ||
| 1640 | std::string op_b; | ||
| 1641 | std::string op_c; | ||
| 1642 | |||
| 1643 | // TODO(bunnei): Needs to be fixed once op_a or op_b is signed | ||
| 1644 | ASSERT_MSG(instr.xmad.sign_a == instr.xmad.sign_b, "Unimplemented"); | ||
| 1645 | const bool is_signed{instr.xmad.sign_a == 1}; | ||
| 1646 | |||
| 1647 | bool is_merge{}; | ||
| 1648 | switch (opcode->GetId()) { | ||
| 1649 | case OpCode::Id::XMAD_CR: { | ||
| 1650 | is_merge = instr.xmad.merge_56; | ||
| 1651 | op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 1652 | instr.xmad.sign_b ? GLSLRegister::Type::Integer | ||
| 1653 | : GLSLRegister::Type::UnsignedInteger); | ||
| 1654 | op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed); | ||
| 1655 | break; | ||
| 1656 | } | ||
| 1657 | case OpCode::Id::XMAD_RR: { | ||
| 1658 | is_merge = instr.xmad.merge_37; | ||
| 1659 | op_b += regs.GetRegisterAsInteger(instr.gpr20, 0, instr.xmad.sign_b); | ||
| 1660 | op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed); | ||
| 1661 | break; | ||
| 1662 | } | ||
| 1663 | case OpCode::Id::XMAD_RC: { | ||
| 1664 | op_b += regs.GetRegisterAsInteger(instr.gpr39, 0, instr.xmad.sign_b); | ||
| 1665 | op_c += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 1666 | is_signed ? GLSLRegister::Type::Integer | ||
| 1667 | : GLSLRegister::Type::UnsignedInteger); | ||
| 1668 | break; | ||
| 1669 | } | ||
| 1670 | case OpCode::Id::XMAD_IMM: { | ||
| 1671 | is_merge = instr.xmad.merge_37; | ||
| 1672 | op_b += std::to_string(instr.xmad.imm20_16); | ||
| 1673 | op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed); | ||
| 1674 | break; | ||
| 1675 | } | ||
| 1676 | default: { | ||
| 1677 | LOG_CRITICAL(HW_GPU, "Unhandled XMAD instruction: {}", opcode->GetName()); | ||
| 1678 | UNREACHABLE(); | ||
| 1679 | } | ||
| 1680 | } | ||
| 1681 | |||
| 1682 | // TODO(bunnei): Ensure this is right with signed operands | ||
| 1683 | if (instr.xmad.high_a) { | ||
| 1684 | op_a = "((" + op_a + ") >> 16)"; | ||
| 1685 | } else { | ||
| 1686 | op_a = "((" + op_a + ") & 0xFFFF)"; | ||
| 1687 | } | ||
| 1688 | |||
| 1689 | std::string src2 = '(' + op_b + ')'; // Preserve original source 2 | ||
| 1690 | if (instr.xmad.high_b) { | ||
| 1691 | op_b = '(' + src2 + " >> 16)"; | ||
| 1692 | } else { | ||
| 1693 | op_b = '(' + src2 + " & 0xFFFF)"; | ||
| 1694 | } | ||
| 1695 | |||
| 1696 | std::string product = '(' + op_a + " * " + op_b + ')'; | ||
| 1697 | if (instr.xmad.product_shift_left) { | ||
| 1698 | product = '(' + product + " << 16)"; | ||
| 1699 | } | ||
| 1700 | |||
| 1701 | switch (instr.xmad.mode) { | ||
| 1702 | case Tegra::Shader::XmadMode::None: | ||
| 1703 | break; | ||
| 1704 | case Tegra::Shader::XmadMode::CLo: | ||
| 1705 | op_c = "((" + op_c + ") & 0xFFFF)"; | ||
| 1706 | break; | ||
| 1707 | case Tegra::Shader::XmadMode::CHi: | ||
| 1708 | op_c = "((" + op_c + ") >> 16)"; | ||
| 1709 | break; | ||
| 1710 | case Tegra::Shader::XmadMode::CBcc: | ||
| 1711 | op_c = "((" + op_c + ") + (" + src2 + "<< 16))"; | ||
| 1712 | break; | ||
| 1713 | default: { | ||
| 1714 | LOG_CRITICAL(HW_GPU, "Unhandled XMAD mode: {}", | ||
| 1715 | static_cast<u32>(instr.xmad.mode.Value())); | ||
| 1716 | UNREACHABLE(); | ||
| 1717 | } | ||
| 1718 | } | ||
| 1719 | |||
| 1720 | std::string sum{'(' + product + " + " + op_c + ')'}; | ||
| 1721 | if (is_merge) { | ||
| 1722 | sum = "((" + sum + " & 0xFFFF) | (" + src2 + "<< 16))"; | ||
| 1723 | } | ||
| 1724 | |||
| 1725 | regs.SetRegisterToInteger(instr.gpr0, is_signed, 0, sum, 1, 1); | ||
| 1726 | break; | ||
| 1727 | } | ||
| 1633 | default: { | 1728 | default: { |
| 1634 | switch (opcode->GetId()) { | 1729 | switch (opcode->GetId()) { |
| 1635 | case OpCode::Id::EXIT: { | 1730 | case OpCode::Id::EXIT: { |
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 68bacd4c5..1d1975179 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp | |||
| @@ -203,21 +203,6 @@ void OpenGLState::Apply() const { | |||
| 203 | } | 203 | } |
| 204 | } | 204 | } |
| 205 | 205 | ||
| 206 | // Constbuffers | ||
| 207 | for (std::size_t stage = 0; stage < draw.const_buffers.size(); ++stage) { | ||
| 208 | for (std::size_t buffer_id = 0; buffer_id < draw.const_buffers[stage].size(); ++buffer_id) { | ||
| 209 | const auto& current = cur_state.draw.const_buffers[stage][buffer_id]; | ||
| 210 | const auto& new_state = draw.const_buffers[stage][buffer_id]; | ||
| 211 | |||
| 212 | if (current.enabled != new_state.enabled || current.bindpoint != new_state.bindpoint || | ||
| 213 | current.ssbo != new_state.ssbo) { | ||
| 214 | if (new_state.enabled) { | ||
| 215 | glBindBufferBase(GL_UNIFORM_BUFFER, new_state.bindpoint, new_state.ssbo); | ||
| 216 | } | ||
| 217 | } | ||
| 218 | } | ||
| 219 | } | ||
| 220 | |||
| 221 | // Framebuffer | 206 | // Framebuffer |
| 222 | if (draw.read_framebuffer != cur_state.draw.read_framebuffer) { | 207 | if (draw.read_framebuffer != cur_state.draw.read_framebuffer) { |
| 223 | glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer); | 208 | glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer); |
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 5c7b636e4..bdb02ba25 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h | |||
| @@ -119,12 +119,6 @@ public: | |||
| 119 | GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING | 119 | GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING |
| 120 | GLuint shader_program; // GL_CURRENT_PROGRAM | 120 | GLuint shader_program; // GL_CURRENT_PROGRAM |
| 121 | GLuint program_pipeline; // GL_PROGRAM_PIPELINE_BINDING | 121 | GLuint program_pipeline; // GL_PROGRAM_PIPELINE_BINDING |
| 122 | struct ConstBufferConfig { | ||
| 123 | bool enabled = false; | ||
| 124 | GLuint bindpoint; | ||
| 125 | GLuint ssbo; | ||
| 126 | }; | ||
| 127 | std::array<std::array<ConstBufferConfig, Regs::MaxConstBuffers>, 5> const_buffers; | ||
| 128 | } draw; | 122 | } draw; |
| 129 | 123 | ||
| 130 | struct { | 124 | struct { |
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index a2713e9f0..03a8ed8b7 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp | |||
| @@ -9,174 +9,91 @@ | |||
| 9 | #include "video_core/renderer_opengl/gl_state.h" | 9 | #include "video_core/renderer_opengl/gl_state.h" |
| 10 | #include "video_core/renderer_opengl/gl_stream_buffer.h" | 10 | #include "video_core/renderer_opengl/gl_stream_buffer.h" |
| 11 | 11 | ||
| 12 | class OrphanBuffer : public OGLStreamBuffer { | 12 | OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coherent) |
| 13 | public: | 13 | : gl_target(target), buffer_size(size) { |
| 14 | explicit OrphanBuffer(GLenum target) : OGLStreamBuffer(target) {} | 14 | gl_buffer.Create(); |
| 15 | ~OrphanBuffer() override; | 15 | glBindBuffer(gl_target, gl_buffer.handle); |
| 16 | |||
| 17 | private: | ||
| 18 | void Create(size_t size, size_t sync_subdivide) override; | ||
| 19 | void Release() override; | ||
| 20 | |||
| 21 | std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) override; | ||
| 22 | void Unmap() override; | ||
| 23 | |||
| 24 | std::vector<u8> data; | ||
| 25 | }; | ||
| 26 | |||
| 27 | class StorageBuffer : public OGLStreamBuffer { | ||
| 28 | public: | ||
| 29 | explicit StorageBuffer(GLenum target) : OGLStreamBuffer(target) {} | ||
| 30 | ~StorageBuffer() override; | ||
| 31 | |||
| 32 | private: | ||
| 33 | void Create(size_t size, size_t sync_subdivide) override; | ||
| 34 | void Release() override; | ||
| 35 | |||
| 36 | std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) override; | ||
| 37 | void Unmap() override; | ||
| 38 | |||
| 39 | struct Fence { | ||
| 40 | OGLSync sync; | ||
| 41 | size_t offset; | ||
| 42 | }; | ||
| 43 | std::deque<Fence> head; | ||
| 44 | std::deque<Fence> tail; | ||
| 45 | |||
| 46 | u8* mapped_ptr; | ||
| 47 | }; | ||
| 48 | |||
| 49 | OGLStreamBuffer::OGLStreamBuffer(GLenum target) { | ||
| 50 | gl_target = target; | ||
| 51 | } | ||
| 52 | |||
| 53 | GLuint OGLStreamBuffer::GetHandle() const { | ||
| 54 | return gl_buffer.handle; | ||
| 55 | } | ||
| 56 | 16 | ||
| 57 | std::unique_ptr<OGLStreamBuffer> OGLStreamBuffer::MakeBuffer(bool storage_buffer, GLenum target) { | 17 | GLsizeiptr allocate_size = size; |
| 58 | if (storage_buffer) { | 18 | if (target == GL_ARRAY_BUFFER) { |
| 59 | return std::make_unique<StorageBuffer>(target); | 19 | // On AMD GPU there is a strange crash in indexed drawing. The crash happens when the buffer |
| 20 | // read position is near the end and is an out-of-bound access to the vertex buffer. This is | ||
| 21 | // probably a bug in the driver and is related to the usage of vec3<byte> attributes in the | ||
| 22 | // vertex array. Doubling the allocation size for the vertex buffer seems to avoid the | ||
| 23 | // crash. | ||
| 24 | allocate_size *= 2; | ||
| 60 | } | 25 | } |
| 61 | return std::make_unique<OrphanBuffer>(target); | ||
| 62 | } | ||
| 63 | 26 | ||
| 64 | OrphanBuffer::~OrphanBuffer() { | 27 | if (GLAD_GL_ARB_buffer_storage) { |
| 65 | Release(); | 28 | persistent = true; |
| 29 | coherent = prefer_coherent; | ||
| 30 | GLbitfield flags = | ||
| 31 | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0); | ||
| 32 | glBufferStorage(gl_target, allocate_size, nullptr, flags); | ||
| 33 | mapped_ptr = static_cast<u8*>(glMapBufferRange( | ||
| 34 | gl_target, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT))); | ||
| 35 | } else { | ||
| 36 | glBufferData(gl_target, allocate_size, nullptr, GL_STREAM_DRAW); | ||
| 37 | } | ||
| 66 | } | 38 | } |
| 67 | 39 | ||
| 68 | void OrphanBuffer::Create(size_t size, size_t /*sync_subdivide*/) { | 40 | OGLStreamBuffer::~OGLStreamBuffer() { |
| 69 | buffer_pos = 0; | 41 | if (persistent) { |
| 70 | buffer_size = size; | ||
| 71 | data.resize(buffer_size); | ||
| 72 | |||
| 73 | if (gl_buffer.handle == 0) { | ||
| 74 | gl_buffer.Create(); | ||
| 75 | glBindBuffer(gl_target, gl_buffer.handle); | 42 | glBindBuffer(gl_target, gl_buffer.handle); |
| 43 | glUnmapBuffer(gl_target); | ||
| 76 | } | 44 | } |
| 77 | |||
| 78 | glBufferData(gl_target, static_cast<GLsizeiptr>(buffer_size), nullptr, GL_STREAM_DRAW); | ||
| 79 | } | ||
| 80 | |||
| 81 | void OrphanBuffer::Release() { | ||
| 82 | gl_buffer.Release(); | 45 | gl_buffer.Release(); |
| 83 | } | 46 | } |
| 84 | 47 | ||
| 85 | std::pair<u8*, GLintptr> OrphanBuffer::Map(size_t size, size_t alignment) { | 48 | GLuint OGLStreamBuffer::GetHandle() const { |
| 86 | buffer_pos = Common::AlignUp(buffer_pos, alignment); | 49 | return gl_buffer.handle; |
| 87 | |||
| 88 | if (buffer_pos + size > buffer_size) { | ||
| 89 | Create(std::max(buffer_size, size), 0); | ||
| 90 | } | ||
| 91 | |||
| 92 | mapped_size = size; | ||
| 93 | return std::make_pair(&data[buffer_pos], static_cast<GLintptr>(buffer_pos)); | ||
| 94 | } | ||
| 95 | |||
| 96 | void OrphanBuffer::Unmap() { | ||
| 97 | glBufferSubData(gl_target, static_cast<GLintptr>(buffer_pos), | ||
| 98 | static_cast<GLsizeiptr>(mapped_size), &data[buffer_pos]); | ||
| 99 | buffer_pos += mapped_size; | ||
| 100 | } | ||
| 101 | |||
| 102 | StorageBuffer::~StorageBuffer() { | ||
| 103 | Release(); | ||
| 104 | } | 50 | } |
| 105 | 51 | ||
| 106 | void StorageBuffer::Create(size_t size, size_t sync_subdivide) { | 52 | GLsizeiptr OGLStreamBuffer::GetSize() const { |
| 107 | if (gl_buffer.handle != 0) | 53 | return buffer_size; |
| 108 | return; | ||
| 109 | |||
| 110 | buffer_pos = 0; | ||
| 111 | buffer_size = size; | ||
| 112 | buffer_sync_subdivide = std::max<size_t>(sync_subdivide, 1); | ||
| 113 | |||
| 114 | gl_buffer.Create(); | ||
| 115 | glBindBuffer(gl_target, gl_buffer.handle); | ||
| 116 | |||
| 117 | glBufferStorage(gl_target, static_cast<GLsizeiptr>(buffer_size), nullptr, | ||
| 118 | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT); | ||
| 119 | mapped_ptr = reinterpret_cast<u8*>( | ||
| 120 | glMapBufferRange(gl_target, 0, static_cast<GLsizeiptr>(buffer_size), | ||
| 121 | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_FLUSH_EXPLICIT_BIT)); | ||
| 122 | } | 54 | } |
| 123 | 55 | ||
| 124 | void StorageBuffer::Release() { | 56 | std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) { |
| 125 | if (gl_buffer.handle == 0) | ||
| 126 | return; | ||
| 127 | |||
| 128 | glUnmapBuffer(gl_target); | ||
| 129 | |||
| 130 | gl_buffer.Release(); | ||
| 131 | head.clear(); | ||
| 132 | tail.clear(); | ||
| 133 | } | ||
| 134 | |||
| 135 | std::pair<u8*, GLintptr> StorageBuffer::Map(size_t size, size_t alignment) { | ||
| 136 | ASSERT(size <= buffer_size); | 57 | ASSERT(size <= buffer_size); |
| 58 | ASSERT(alignment <= buffer_size); | ||
| 59 | mapped_size = size; | ||
| 137 | 60 | ||
| 138 | OGLSync sync; | 61 | if (alignment > 0) { |
| 139 | 62 | buffer_pos = Common::AlignUp<size_t>(buffer_pos, alignment); | |
| 140 | buffer_pos = Common::AlignUp(buffer_pos, alignment); | ||
| 141 | size_t effective_offset = Common::AlignDown(buffer_pos, buffer_sync_subdivide); | ||
| 142 | |||
| 143 | if (!head.empty() && | ||
| 144 | (effective_offset > head.back().offset || buffer_pos + size > buffer_size)) { | ||
| 145 | ASSERT(head.back().sync.handle == 0); | ||
| 146 | head.back().sync.Create(); | ||
| 147 | } | 63 | } |
| 148 | 64 | ||
| 65 | bool invalidate = false; | ||
| 149 | if (buffer_pos + size > buffer_size) { | 66 | if (buffer_pos + size > buffer_size) { |
| 150 | if (!tail.empty()) { | ||
| 151 | std::swap(sync, tail.back().sync); | ||
| 152 | tail.clear(); | ||
| 153 | } | ||
| 154 | std::swap(tail, head); | ||
| 155 | buffer_pos = 0; | 67 | buffer_pos = 0; |
| 156 | effective_offset = 0; | 68 | invalidate = true; |
| 157 | } | ||
| 158 | 69 | ||
| 159 | while (!tail.empty() && buffer_pos + size > tail.front().offset) { | 70 | if (persistent) { |
| 160 | std::swap(sync, tail.front().sync); | 71 | glUnmapBuffer(gl_target); |
| 161 | tail.pop_front(); | 72 | } |
| 162 | } | 73 | } |
| 163 | 74 | ||
| 164 | if (sync.handle != 0) { | 75 | if (invalidate | !persistent) { |
| 165 | glClientWaitSync(sync.handle, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); | 76 | GLbitfield flags = GL_MAP_WRITE_BIT | (persistent ? GL_MAP_PERSISTENT_BIT : 0) | |
| 166 | sync.Release(); | 77 | (coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) | |
| 78 | (invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT); | ||
| 79 | mapped_ptr = static_cast<u8*>( | ||
| 80 | glMapBufferRange(gl_target, buffer_pos, buffer_size - buffer_pos, flags)); | ||
| 81 | mapped_offset = buffer_pos; | ||
| 167 | } | 82 | } |
| 168 | 83 | ||
| 169 | if (head.empty() || effective_offset > head.back().offset) { | 84 | return std::make_tuple(mapped_ptr + buffer_pos - mapped_offset, buffer_pos, invalidate); |
| 170 | head.emplace_back(); | 85 | } |
| 171 | head.back().offset = effective_offset; | 86 | |
| 87 | void OGLStreamBuffer::Unmap(GLsizeiptr size) { | ||
| 88 | ASSERT(size <= mapped_size); | ||
| 89 | |||
| 90 | if (!coherent && size > 0) { | ||
| 91 | glFlushMappedBufferRange(gl_target, buffer_pos - mapped_offset, size); | ||
| 172 | } | 92 | } |
| 173 | 93 | ||
| 174 | mapped_size = size; | 94 | if (!persistent) { |
| 175 | return std::make_pair(&mapped_ptr[buffer_pos], static_cast<GLintptr>(buffer_pos)); | 95 | glUnmapBuffer(gl_target); |
| 176 | } | 96 | } |
| 177 | 97 | ||
| 178 | void StorageBuffer::Unmap() { | 98 | buffer_pos += size; |
| 179 | glFlushMappedBufferRange(gl_target, static_cast<GLintptr>(buffer_pos), | ||
| 180 | static_cast<GLsizeiptr>(mapped_size)); | ||
| 181 | buffer_pos += mapped_size; | ||
| 182 | } | 99 | } |
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h index e78dc5784..45592daaf 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h | |||
| @@ -2,35 +2,41 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #pragma once | 5 | #include <tuple> |
| 6 | |||
| 7 | #include <memory> | ||
| 8 | #include <glad/glad.h> | 6 | #include <glad/glad.h> |
| 9 | #include "common/common_types.h" | 7 | #include "common/common_types.h" |
| 10 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 8 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 11 | 9 | ||
| 12 | class OGLStreamBuffer : private NonCopyable { | 10 | class OGLStreamBuffer : private NonCopyable { |
| 13 | public: | 11 | public: |
| 14 | explicit OGLStreamBuffer(GLenum target); | 12 | explicit OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coherent = false); |
| 15 | virtual ~OGLStreamBuffer() = default; | 13 | ~OGLStreamBuffer(); |
| 16 | |||
| 17 | public: | ||
| 18 | static std::unique_ptr<OGLStreamBuffer> MakeBuffer(bool storage_buffer, GLenum target); | ||
| 19 | |||
| 20 | virtual void Create(size_t size, size_t sync_subdivide) = 0; | ||
| 21 | virtual void Release() {} | ||
| 22 | 14 | ||
| 23 | GLuint GetHandle() const; | 15 | GLuint GetHandle() const; |
| 16 | GLsizeiptr GetSize() const; | ||
| 17 | |||
| 18 | /* | ||
| 19 | * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes | ||
| 20 | * and the optional alignment requirement. | ||
| 21 | * If the buffer is full, the whole buffer is reallocated which invalidates old chunks. | ||
| 22 | * The return values are the pointer to the new chunk, the offset within the buffer, | ||
| 23 | * and the invalidation flag for previous chunks. | ||
| 24 | * The actual used size must be specified on unmapping the chunk. | ||
| 25 | */ | ||
| 26 | std::tuple<u8*, GLintptr, bool> Map(GLsizeiptr size, GLintptr alignment = 0); | ||
| 24 | 27 | ||
| 25 | virtual std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) = 0; | 28 | void Unmap(GLsizeiptr size); |
| 26 | virtual void Unmap() = 0; | ||
| 27 | 29 | ||
| 28 | protected: | 30 | private: |
| 29 | OGLBuffer gl_buffer; | 31 | OGLBuffer gl_buffer; |
| 30 | GLenum gl_target; | 32 | GLenum gl_target; |
| 31 | 33 | ||
| 32 | size_t buffer_pos = 0; | 34 | bool coherent = false; |
| 33 | size_t buffer_size = 0; | 35 | bool persistent = false; |
| 34 | size_t buffer_sync_subdivide = 0; | 36 | |
| 35 | size_t mapped_size = 0; | 37 | GLintptr buffer_pos = 0; |
| 38 | GLsizeiptr buffer_size = 0; | ||
| 39 | GLintptr mapped_offset = 0; | ||
| 40 | GLsizeiptr mapped_size = 0; | ||
| 41 | u8* mapped_ptr = nullptr; | ||
| 36 | }; | 42 | }; |
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 5afd20dbe..679e5ceb2 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h | |||
| @@ -91,6 +91,8 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) { | |||
| 91 | switch (topology) { | 91 | switch (topology) { |
| 92 | case Maxwell::PrimitiveTopology::Points: | 92 | case Maxwell::PrimitiveTopology::Points: |
| 93 | return GL_POINTS; | 93 | return GL_POINTS; |
| 94 | case Maxwell::PrimitiveTopology::LineStrip: | ||
| 95 | return GL_LINE_STRIP; | ||
| 94 | case Maxwell::PrimitiveTopology::Triangles: | 96 | case Maxwell::PrimitiveTopology::Triangles: |
| 95 | return GL_TRIANGLES; | 97 | return GL_TRIANGLES; |
| 96 | case Maxwell::PrimitiveTopology::TriangleStrip: | 98 | case Maxwell::PrimitiveTopology::TriangleStrip: |