diff options
43 files changed, 775 insertions, 319 deletions
diff --git a/externals/sirit b/externals/sirit | |||
| Subproject 414fc4dbd28d8fe48f735a0c389db8a234f733c | Subproject a62c5bbc100a5e5a31ea0ccc4a78d8fa6a4167c | ||
diff --git a/src/core/file_sys/system_archive/system_version.cpp b/src/core/file_sys/system_archive/system_version.cpp index 6e22f97b0..aa313de66 100644 --- a/src/core/file_sys/system_archive/system_version.cpp +++ b/src/core/file_sys/system_archive/system_version.cpp | |||
| @@ -12,17 +12,17 @@ namespace SystemVersionData { | |||
| 12 | // This section should reflect the best system version to describe yuzu's HLE api. | 12 | // This section should reflect the best system version to describe yuzu's HLE api. |
| 13 | // TODO(DarkLordZach): Update when HLE gets better. | 13 | // TODO(DarkLordZach): Update when HLE gets better. |
| 14 | 14 | ||
| 15 | constexpr u8 VERSION_MAJOR = 5; | 15 | constexpr u8 VERSION_MAJOR = 10; |
| 16 | constexpr u8 VERSION_MINOR = 1; | 16 | constexpr u8 VERSION_MINOR = 0; |
| 17 | constexpr u8 VERSION_MICRO = 0; | 17 | constexpr u8 VERSION_MICRO = 2; |
| 18 | 18 | ||
| 19 | constexpr u8 REVISION_MAJOR = 3; | 19 | constexpr u8 REVISION_MAJOR = 1; |
| 20 | constexpr u8 REVISION_MINOR = 0; | 20 | constexpr u8 REVISION_MINOR = 0; |
| 21 | 21 | ||
| 22 | constexpr char PLATFORM_STRING[] = "NX"; | 22 | constexpr char PLATFORM_STRING[] = "NX"; |
| 23 | constexpr char VERSION_HASH[] = "23f9df53e25709d756e0c76effcb2473bd3447dd"; | 23 | constexpr char VERSION_HASH[] = "f90143fa8bbc061d4f68c35f95f04f8080c0ecdc"; |
| 24 | constexpr char DISPLAY_VERSION[] = "5.1.0"; | 24 | constexpr char DISPLAY_VERSION[] = "10.0.2"; |
| 25 | constexpr char DISPLAY_TITLE[] = "NintendoSDK Firmware for NX 5.1.0-3.0"; | 25 | constexpr char DISPLAY_TITLE[] = "NintendoSDK Firmware for NX 10.0.2-1.0"; |
| 26 | 26 | ||
| 27 | } // namespace SystemVersionData | 27 | } // namespace SystemVersionData |
| 28 | 28 | ||
diff --git a/src/core/hle/service/hid/controllers/keyboard.cpp b/src/core/hle/service/hid/controllers/keyboard.cpp index 358cb9329..9a8d354ba 100644 --- a/src/core/hle/service/hid/controllers/keyboard.cpp +++ b/src/core/hle/service/hid/controllers/keyboard.cpp | |||
| @@ -38,10 +38,11 @@ void Controller_Keyboard::OnUpdate(const Core::Timing::CoreTiming& core_timing, | |||
| 38 | cur_entry.sampling_number = last_entry.sampling_number + 1; | 38 | cur_entry.sampling_number = last_entry.sampling_number + 1; |
| 39 | cur_entry.sampling_number2 = cur_entry.sampling_number; | 39 | cur_entry.sampling_number2 = cur_entry.sampling_number; |
| 40 | 40 | ||
| 41 | cur_entry.key.fill(0); | ||
| 42 | cur_entry.modifier = 0; | ||
| 43 | |||
| 41 | for (std::size_t i = 0; i < keyboard_keys.size(); ++i) { | 44 | for (std::size_t i = 0; i < keyboard_keys.size(); ++i) { |
| 42 | for (std::size_t k = 0; k < KEYS_PER_BYTE; ++k) { | 45 | cur_entry.key[i / KEYS_PER_BYTE] |= (keyboard_keys[i]->GetStatus() << (i % KEYS_PER_BYTE)); |
| 43 | cur_entry.key[i / KEYS_PER_BYTE] |= (keyboard_keys[i]->GetStatus() << k); | ||
| 44 | } | ||
| 45 | } | 46 | } |
| 46 | 47 | ||
| 47 | for (std::size_t i = 0; i < keyboard_mods.size(); ++i) { | 48 | for (std::size_t i = 0; i < keyboard_mods.size(); ++i) { |
diff --git a/src/core/hle/service/nifm/nifm.cpp b/src/core/hle/service/nifm/nifm.cpp index 767158444..01ddcdbd6 100644 --- a/src/core/hle/service/nifm/nifm.cpp +++ b/src/core/hle/service/nifm/nifm.cpp | |||
| @@ -177,7 +177,8 @@ private: | |||
| 177 | void CreateTemporaryNetworkProfile(Kernel::HLERequestContext& ctx) { | 177 | void CreateTemporaryNetworkProfile(Kernel::HLERequestContext& ctx) { |
| 178 | LOG_DEBUG(Service_NIFM, "called"); | 178 | LOG_DEBUG(Service_NIFM, "called"); |
| 179 | 179 | ||
| 180 | ASSERT_MSG(ctx.GetReadBufferSize() == 0x17c, "NetworkProfileData is not the correct size"); | 180 | ASSERT_MSG(ctx.GetReadBufferSize() == 0x17c, |
| 181 | "SfNetworkProfileData is not the correct size"); | ||
| 181 | u128 uuid{}; | 182 | u128 uuid{}; |
| 182 | auto buffer = ctx.ReadBuffer(); | 183 | auto buffer = ctx.ReadBuffer(); |
| 183 | std::memcpy(&uuid, buffer.data() + 8, sizeof(u128)); | 184 | std::memcpy(&uuid, buffer.data() + 8, sizeof(u128)); |
diff --git a/src/core/settings.cpp b/src/core/settings.cpp index da53cde05..4edff9cd8 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp | |||
| @@ -112,6 +112,7 @@ void LogSettings() { | |||
| 112 | LogSetting("Renderer_UseAsynchronousGpuEmulation", | 112 | LogSetting("Renderer_UseAsynchronousGpuEmulation", |
| 113 | Settings::values.use_asynchronous_gpu_emulation); | 113 | Settings::values.use_asynchronous_gpu_emulation); |
| 114 | LogSetting("Renderer_UseVsync", Settings::values.use_vsync); | 114 | LogSetting("Renderer_UseVsync", Settings::values.use_vsync); |
| 115 | LogSetting("Renderer_UseAssemblyShaders", Settings::values.use_assembly_shaders); | ||
| 115 | LogSetting("Renderer_AnisotropicFilteringLevel", Settings::values.max_anisotropy); | 116 | LogSetting("Renderer_AnisotropicFilteringLevel", Settings::values.max_anisotropy); |
| 116 | LogSetting("Audio_OutputEngine", Settings::values.sink_id); | 117 | LogSetting("Audio_OutputEngine", Settings::values.sink_id); |
| 117 | LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching); | 118 | LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching); |
diff --git a/src/core/settings.h b/src/core/settings.h index c1266b341..78eb33737 100644 --- a/src/core/settings.h +++ b/src/core/settings.h | |||
| @@ -446,6 +446,7 @@ struct Values { | |||
| 446 | GPUAccuracy gpu_accuracy; | 446 | GPUAccuracy gpu_accuracy; |
| 447 | bool use_asynchronous_gpu_emulation; | 447 | bool use_asynchronous_gpu_emulation; |
| 448 | bool use_vsync; | 448 | bool use_vsync; |
| 449 | bool use_assembly_shaders; | ||
| 449 | bool force_30fps_mode; | 450 | bool force_30fps_mode; |
| 450 | bool use_fast_gpu_time; | 451 | bool use_fast_gpu_time; |
| 451 | 452 | ||
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index 1c3b03a1c..c781b3cfc 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp | |||
| @@ -201,6 +201,7 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) { | |||
| 201 | AddField(field_type, "Renderer_UseAsynchronousGpuEmulation", | 201 | AddField(field_type, "Renderer_UseAsynchronousGpuEmulation", |
| 202 | Settings::values.use_asynchronous_gpu_emulation); | 202 | Settings::values.use_asynchronous_gpu_emulation); |
| 203 | AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync); | 203 | AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync); |
| 204 | AddField(field_type, "Renderer_UseAssemblyShaders", Settings::values.use_assembly_shaders); | ||
| 204 | AddField(field_type, "System_UseDockedMode", Settings::values.use_docked_mode); | 205 | AddField(field_type, "System_UseDockedMode", Settings::values.use_docked_mode); |
| 205 | } | 206 | } |
| 206 | 207 | ||
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index d23c53843..f00c71dae 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -1,6 +1,7 @@ | |||
| 1 | add_library(video_core STATIC | 1 | add_library(video_core STATIC |
| 2 | buffer_cache/buffer_block.h | 2 | buffer_cache/buffer_block.h |
| 3 | buffer_cache/buffer_cache.h | 3 | buffer_cache/buffer_cache.h |
| 4 | buffer_cache/map_interval.cpp | ||
| 4 | buffer_cache/map_interval.h | 5 | buffer_cache/map_interval.h |
| 5 | dirty_flags.cpp | 6 | dirty_flags.cpp |
| 6 | dirty_flags.h | 7 | dirty_flags.h |
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 56e570994..d9a4a1b4d 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -12,11 +12,12 @@ | |||
| 12 | #include <utility> | 12 | #include <utility> |
| 13 | #include <vector> | 13 | #include <vector> |
| 14 | 14 | ||
| 15 | #include <boost/icl/interval_map.hpp> | 15 | #include <boost/container/small_vector.hpp> |
| 16 | #include <boost/icl/interval_set.hpp> | 16 | #include <boost/icl/interval_set.hpp> |
| 17 | #include <boost/range/iterator_range.hpp> | 17 | #include <boost/intrusive/set.hpp> |
| 18 | 18 | ||
| 19 | #include "common/alignment.h" | 19 | #include "common/alignment.h" |
| 20 | #include "common/assert.h" | ||
| 20 | #include "common/common_types.h" | 21 | #include "common/common_types.h" |
| 21 | #include "common/logging/log.h" | 22 | #include "common/logging/log.h" |
| 22 | #include "core/core.h" | 23 | #include "core/core.h" |
| @@ -29,10 +30,12 @@ | |||
| 29 | 30 | ||
| 30 | namespace VideoCommon { | 31 | namespace VideoCommon { |
| 31 | 32 | ||
| 32 | using MapInterval = std::shared_ptr<MapIntervalBase>; | ||
| 33 | |||
| 34 | template <typename OwnerBuffer, typename BufferType, typename StreamBuffer> | 33 | template <typename OwnerBuffer, typename BufferType, typename StreamBuffer> |
| 35 | class BufferCache { | 34 | class BufferCache { |
| 35 | using IntervalSet = boost::icl::interval_set<VAddr>; | ||
| 36 | using IntervalType = typename IntervalSet::interval_type; | ||
| 37 | using VectorMapInterval = boost::container::small_vector<MapInterval*, 1>; | ||
| 38 | |||
| 36 | public: | 39 | public: |
| 37 | using BufferInfo = std::pair<BufferType, u64>; | 40 | using BufferInfo = std::pair<BufferType, u64>; |
| 38 | 41 | ||
| @@ -40,14 +43,12 @@ public: | |||
| 40 | bool is_written = false, bool use_fast_cbuf = false) { | 43 | bool is_written = false, bool use_fast_cbuf = false) { |
| 41 | std::lock_guard lock{mutex}; | 44 | std::lock_guard lock{mutex}; |
| 42 | 45 | ||
| 43 | const std::optional<VAddr> cpu_addr_opt = | 46 | const auto& memory_manager = system.GPU().MemoryManager(); |
| 44 | system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); | 47 | const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr); |
| 45 | |||
| 46 | if (!cpu_addr_opt) { | 48 | if (!cpu_addr_opt) { |
| 47 | return {GetEmptyBuffer(size), 0}; | 49 | return {GetEmptyBuffer(size), 0}; |
| 48 | } | 50 | } |
| 49 | 51 | const VAddr cpu_addr = *cpu_addr_opt; | |
| 50 | VAddr cpu_addr = *cpu_addr_opt; | ||
| 51 | 52 | ||
| 52 | // Cache management is a big overhead, so only cache entries with a given size. | 53 | // Cache management is a big overhead, so only cache entries with a given size. |
| 53 | // TODO: Figure out which size is the best for given games. | 54 | // TODO: Figure out which size is the best for given games. |
| @@ -77,16 +78,19 @@ public: | |||
| 77 | } | 78 | } |
| 78 | } | 79 | } |
| 79 | 80 | ||
| 80 | auto block = GetBlock(cpu_addr, size); | 81 | OwnerBuffer block = GetBlock(cpu_addr, size); |
| 81 | auto map = MapAddress(block, gpu_addr, cpu_addr, size); | 82 | MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size); |
| 83 | if (!map) { | ||
| 84 | return {GetEmptyBuffer(size), 0}; | ||
| 85 | } | ||
| 82 | if (is_written) { | 86 | if (is_written) { |
| 83 | map->MarkAsModified(true, GetModifiedTicks()); | 87 | map->MarkAsModified(true, GetModifiedTicks()); |
| 84 | if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) { | 88 | if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) { |
| 85 | MarkForAsyncFlush(map); | 89 | MarkForAsyncFlush(map); |
| 86 | } | 90 | } |
| 87 | if (!map->IsWritten()) { | 91 | if (!map->is_written) { |
| 88 | map->MarkAsWritten(true); | 92 | map->is_written = true; |
| 89 | MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); | 93 | MarkRegionAsWritten(map->start, map->end - 1); |
| 90 | } | 94 | } |
| 91 | } | 95 | } |
| 92 | 96 | ||
| @@ -132,12 +136,11 @@ public: | |||
| 132 | void FlushRegion(VAddr addr, std::size_t size) { | 136 | void FlushRegion(VAddr addr, std::size_t size) { |
| 133 | std::lock_guard lock{mutex}; | 137 | std::lock_guard lock{mutex}; |
| 134 | 138 | ||
| 135 | std::vector<MapInterval> objects = GetMapsInRange(addr, size); | 139 | VectorMapInterval objects = GetMapsInRange(addr, size); |
| 136 | std::sort(objects.begin(), objects.end(), [](const MapInterval& a, const MapInterval& b) { | 140 | std::sort(objects.begin(), objects.end(), |
| 137 | return a->GetModificationTick() < b->GetModificationTick(); | 141 | [](MapInterval* lhs, MapInterval* rhs) { return lhs->ticks < rhs->ticks; }); |
| 138 | }); | 142 | for (MapInterval* object : objects) { |
| 139 | for (auto& object : objects) { | 143 | if (object->is_modified && object->is_registered) { |
| 140 | if (object->IsModified() && object->IsRegistered()) { | ||
| 141 | mutex.unlock(); | 144 | mutex.unlock(); |
| 142 | FlushMap(object); | 145 | FlushMap(object); |
| 143 | mutex.lock(); | 146 | mutex.lock(); |
| @@ -148,9 +151,9 @@ public: | |||
| 148 | bool MustFlushRegion(VAddr addr, std::size_t size) { | 151 | bool MustFlushRegion(VAddr addr, std::size_t size) { |
| 149 | std::lock_guard lock{mutex}; | 152 | std::lock_guard lock{mutex}; |
| 150 | 153 | ||
| 151 | const std::vector<MapInterval> objects = GetMapsInRange(addr, size); | 154 | const VectorMapInterval objects = GetMapsInRange(addr, size); |
| 152 | return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval& map) { | 155 | return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval* map) { |
| 153 | return map->IsModified() && map->IsRegistered(); | 156 | return map->is_modified && map->is_registered; |
| 154 | }); | 157 | }); |
| 155 | } | 158 | } |
| 156 | 159 | ||
| @@ -158,9 +161,8 @@ public: | |||
| 158 | void InvalidateRegion(VAddr addr, u64 size) { | 161 | void InvalidateRegion(VAddr addr, u64 size) { |
| 159 | std::lock_guard lock{mutex}; | 162 | std::lock_guard lock{mutex}; |
| 160 | 163 | ||
| 161 | std::vector<MapInterval> objects = GetMapsInRange(addr, size); | 164 | for (auto& object : GetMapsInRange(addr, size)) { |
| 162 | for (auto& object : objects) { | 165 | if (object->is_registered) { |
| 163 | if (object->IsRegistered()) { | ||
| 164 | Unregister(object); | 166 | Unregister(object); |
| 165 | } | 167 | } |
| 166 | } | 168 | } |
| @@ -169,10 +171,10 @@ public: | |||
| 169 | void OnCPUWrite(VAddr addr, std::size_t size) { | 171 | void OnCPUWrite(VAddr addr, std::size_t size) { |
| 170 | std::lock_guard lock{mutex}; | 172 | std::lock_guard lock{mutex}; |
| 171 | 173 | ||
| 172 | for (const auto& object : GetMapsInRange(addr, size)) { | 174 | for (MapInterval* object : GetMapsInRange(addr, size)) { |
| 173 | if (object->IsMemoryMarked() && object->IsRegistered()) { | 175 | if (object->is_memory_marked && object->is_registered) { |
| 174 | UnmarkMemory(object); | 176 | UnmarkMemory(object); |
| 175 | object->SetSyncPending(true); | 177 | object->is_sync_pending = true; |
| 176 | marked_for_unregister.emplace_back(object); | 178 | marked_for_unregister.emplace_back(object); |
| 177 | } | 179 | } |
| 178 | } | 180 | } |
| @@ -181,9 +183,9 @@ public: | |||
| 181 | void SyncGuestHost() { | 183 | void SyncGuestHost() { |
| 182 | std::lock_guard lock{mutex}; | 184 | std::lock_guard lock{mutex}; |
| 183 | 185 | ||
| 184 | for (const auto& object : marked_for_unregister) { | 186 | for (auto& object : marked_for_unregister) { |
| 185 | if (object->IsRegistered()) { | 187 | if (object->is_registered) { |
| 186 | object->SetSyncPending(false); | 188 | object->is_sync_pending = false; |
| 187 | Unregister(object); | 189 | Unregister(object); |
| 188 | } | 190 | } |
| 189 | } | 191 | } |
| @@ -192,9 +194,9 @@ public: | |||
| 192 | 194 | ||
| 193 | void CommitAsyncFlushes() { | 195 | void CommitAsyncFlushes() { |
| 194 | if (uncommitted_flushes) { | 196 | if (uncommitted_flushes) { |
| 195 | auto commit_list = std::make_shared<std::list<MapInterval>>(); | 197 | auto commit_list = std::make_shared<std::list<MapInterval*>>(); |
| 196 | for (auto& map : *uncommitted_flushes) { | 198 | for (MapInterval* map : *uncommitted_flushes) { |
| 197 | if (map->IsRegistered() && map->IsModified()) { | 199 | if (map->is_registered && map->is_modified) { |
| 198 | // TODO(Blinkhawk): Implement backend asynchronous flushing | 200 | // TODO(Blinkhawk): Implement backend asynchronous flushing |
| 199 | // AsyncFlushMap(map) | 201 | // AsyncFlushMap(map) |
| 200 | commit_list->push_back(map); | 202 | commit_list->push_back(map); |
| @@ -228,8 +230,8 @@ public: | |||
| 228 | committed_flushes.pop_front(); | 230 | committed_flushes.pop_front(); |
| 229 | return; | 231 | return; |
| 230 | } | 232 | } |
| 231 | for (MapInterval& map : *flush_list) { | 233 | for (MapInterval* map : *flush_list) { |
| 232 | if (map->IsRegistered()) { | 234 | if (map->is_registered) { |
| 233 | // TODO(Blinkhawk): Replace this for reading the asynchronous flush | 235 | // TODO(Blinkhawk): Replace this for reading the asynchronous flush |
| 234 | FlushMap(map); | 236 | FlushMap(map); |
| 235 | } | 237 | } |
| @@ -265,61 +267,60 @@ protected: | |||
| 265 | } | 267 | } |
| 266 | 268 | ||
| 267 | /// Register an object into the cache | 269 | /// Register an object into the cache |
| 268 | void Register(const MapInterval& new_map, bool inherit_written = false) { | 270 | MapInterval* Register(MapInterval new_map, bool inherit_written = false) { |
| 269 | const VAddr cpu_addr = new_map->GetStart(); | 271 | const VAddr cpu_addr = new_map.start; |
| 270 | if (!cpu_addr) { | 272 | if (!cpu_addr) { |
| 271 | LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}", | 273 | LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}", |
| 272 | new_map->GetGpuAddress()); | 274 | new_map.gpu_addr); |
| 273 | return; | 275 | return nullptr; |
| 274 | } | 276 | } |
| 275 | const std::size_t size = new_map->GetEnd() - new_map->GetStart(); | 277 | const std::size_t size = new_map.end - new_map.start; |
| 276 | new_map->MarkAsRegistered(true); | 278 | new_map.is_registered = true; |
| 277 | const IntervalType interval{new_map->GetStart(), new_map->GetEnd()}; | ||
| 278 | mapped_addresses.insert({interval, new_map}); | ||
| 279 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); | 279 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); |
| 280 | new_map->SetMemoryMarked(true); | 280 | new_map.is_memory_marked = true; |
| 281 | if (inherit_written) { | 281 | if (inherit_written) { |
| 282 | MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1); | 282 | MarkRegionAsWritten(new_map.start, new_map.end - 1); |
| 283 | new_map->MarkAsWritten(true); | 283 | new_map.is_written = true; |
| 284 | } | 284 | } |
| 285 | MapInterval* const storage = mapped_addresses_allocator.Allocate(); | ||
| 286 | *storage = new_map; | ||
| 287 | mapped_addresses.insert(*storage); | ||
| 288 | return storage; | ||
| 285 | } | 289 | } |
| 286 | 290 | ||
| 287 | void UnmarkMemory(const MapInterval& map) { | 291 | void UnmarkMemory(MapInterval* map) { |
| 288 | if (!map->IsMemoryMarked()) { | 292 | if (!map->is_memory_marked) { |
| 289 | return; | 293 | return; |
| 290 | } | 294 | } |
| 291 | const std::size_t size = map->GetEnd() - map->GetStart(); | 295 | const std::size_t size = map->end - map->start; |
| 292 | rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1); | 296 | rasterizer.UpdatePagesCachedCount(map->start, size, -1); |
| 293 | map->SetMemoryMarked(false); | 297 | map->is_memory_marked = false; |
| 294 | } | 298 | } |
| 295 | 299 | ||
| 296 | /// Unregisters an object from the cache | 300 | /// Unregisters an object from the cache |
| 297 | void Unregister(const MapInterval& map) { | 301 | void Unregister(MapInterval* map) { |
| 298 | UnmarkMemory(map); | 302 | UnmarkMemory(map); |
| 299 | map->MarkAsRegistered(false); | 303 | map->is_registered = false; |
| 300 | if (map->IsSyncPending()) { | 304 | if (map->is_sync_pending) { |
| 305 | map->is_sync_pending = false; | ||
| 301 | marked_for_unregister.remove(map); | 306 | marked_for_unregister.remove(map); |
| 302 | map->SetSyncPending(false); | ||
| 303 | } | 307 | } |
| 304 | if (map->IsWritten()) { | 308 | if (map->is_written) { |
| 305 | UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); | 309 | UnmarkRegionAsWritten(map->start, map->end - 1); |
| 306 | } | 310 | } |
| 307 | const IntervalType delete_interval{map->GetStart(), map->GetEnd()}; | 311 | const auto it = mapped_addresses.find(*map); |
| 308 | mapped_addresses.erase(delete_interval); | 312 | ASSERT(it != mapped_addresses.end()); |
| 313 | mapped_addresses.erase(it); | ||
| 314 | mapped_addresses_allocator.Release(map); | ||
| 309 | } | 315 | } |
| 310 | 316 | ||
| 311 | private: | 317 | private: |
| 312 | MapInterval CreateMap(const VAddr start, const VAddr end, const GPUVAddr gpu_addr) { | 318 | MapInterval* MapAddress(const OwnerBuffer& block, GPUVAddr gpu_addr, VAddr cpu_addr, |
| 313 | return std::make_shared<MapIntervalBase>(start, end, gpu_addr); | 319 | std::size_t size) { |
| 314 | } | 320 | const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size); |
| 315 | |||
| 316 | MapInterval MapAddress(const OwnerBuffer& block, const GPUVAddr gpu_addr, const VAddr cpu_addr, | ||
| 317 | const std::size_t size) { | ||
| 318 | std::vector<MapInterval> overlaps = GetMapsInRange(cpu_addr, size); | ||
| 319 | if (overlaps.empty()) { | 321 | if (overlaps.empty()) { |
| 320 | auto& memory_manager = system.GPU().MemoryManager(); | 322 | auto& memory_manager = system.GPU().MemoryManager(); |
| 321 | const VAddr cpu_addr_end = cpu_addr + size; | 323 | const VAddr cpu_addr_end = cpu_addr + size; |
| 322 | MapInterval new_map = CreateMap(cpu_addr, cpu_addr_end, gpu_addr); | ||
| 323 | if (memory_manager.IsGranularRange(gpu_addr, size)) { | 324 | if (memory_manager.IsGranularRange(gpu_addr, size)) { |
| 324 | u8* host_ptr = memory_manager.GetPointer(gpu_addr); | 325 | u8* host_ptr = memory_manager.GetPointer(gpu_addr); |
| 325 | UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr); | 326 | UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr); |
| @@ -328,13 +329,12 @@ private: | |||
| 328 | memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); | 329 | memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); |
| 329 | UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data()); | 330 | UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data()); |
| 330 | } | 331 | } |
| 331 | Register(new_map); | 332 | return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr)); |
| 332 | return new_map; | ||
| 333 | } | 333 | } |
| 334 | 334 | ||
| 335 | const VAddr cpu_addr_end = cpu_addr + size; | 335 | const VAddr cpu_addr_end = cpu_addr + size; |
| 336 | if (overlaps.size() == 1) { | 336 | if (overlaps.size() == 1) { |
| 337 | MapInterval& current_map = overlaps[0]; | 337 | MapInterval* const current_map = overlaps[0]; |
| 338 | if (current_map->IsInside(cpu_addr, cpu_addr_end)) { | 338 | if (current_map->IsInside(cpu_addr, cpu_addr_end)) { |
| 339 | return current_map; | 339 | return current_map; |
| 340 | } | 340 | } |
| @@ -344,35 +344,39 @@ private: | |||
| 344 | bool write_inheritance = false; | 344 | bool write_inheritance = false; |
| 345 | bool modified_inheritance = false; | 345 | bool modified_inheritance = false; |
| 346 | // Calculate new buffer parameters | 346 | // Calculate new buffer parameters |
| 347 | for (auto& overlap : overlaps) { | 347 | for (MapInterval* overlap : overlaps) { |
| 348 | new_start = std::min(overlap->GetStart(), new_start); | 348 | new_start = std::min(overlap->start, new_start); |
| 349 | new_end = std::max(overlap->GetEnd(), new_end); | 349 | new_end = std::max(overlap->end, new_end); |
| 350 | write_inheritance |= overlap->IsWritten(); | 350 | write_inheritance |= overlap->is_written; |
| 351 | modified_inheritance |= overlap->IsModified(); | 351 | modified_inheritance |= overlap->is_modified; |
| 352 | } | 352 | } |
| 353 | GPUVAddr new_gpu_addr = gpu_addr + new_start - cpu_addr; | 353 | GPUVAddr new_gpu_addr = gpu_addr + new_start - cpu_addr; |
| 354 | for (auto& overlap : overlaps) { | 354 | for (auto& overlap : overlaps) { |
| 355 | Unregister(overlap); | 355 | Unregister(overlap); |
| 356 | } | 356 | } |
| 357 | UpdateBlock(block, new_start, new_end, overlaps); | 357 | UpdateBlock(block, new_start, new_end, overlaps); |
| 358 | MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr); | 358 | |
| 359 | const MapInterval new_map{new_start, new_end, new_gpu_addr}; | ||
| 360 | MapInterval* const map = Register(new_map, write_inheritance); | ||
| 361 | if (!map) { | ||
| 362 | return nullptr; | ||
| 363 | } | ||
| 359 | if (modified_inheritance) { | 364 | if (modified_inheritance) { |
| 360 | new_map->MarkAsModified(true, GetModifiedTicks()); | 365 | map->MarkAsModified(true, GetModifiedTicks()); |
| 361 | if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) { | 366 | if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) { |
| 362 | MarkForAsyncFlush(new_map); | 367 | MarkForAsyncFlush(map); |
| 363 | } | 368 | } |
| 364 | } | 369 | } |
| 365 | Register(new_map, write_inheritance); | 370 | return map; |
| 366 | return new_map; | ||
| 367 | } | 371 | } |
| 368 | 372 | ||
| 369 | void UpdateBlock(const OwnerBuffer& block, VAddr start, VAddr end, | 373 | void UpdateBlock(const OwnerBuffer& block, VAddr start, VAddr end, |
| 370 | std::vector<MapInterval>& overlaps) { | 374 | const VectorMapInterval& overlaps) { |
| 371 | const IntervalType base_interval{start, end}; | 375 | const IntervalType base_interval{start, end}; |
| 372 | IntervalSet interval_set{}; | 376 | IntervalSet interval_set{}; |
| 373 | interval_set.add(base_interval); | 377 | interval_set.add(base_interval); |
| 374 | for (auto& overlap : overlaps) { | 378 | for (auto& overlap : overlaps) { |
| 375 | const IntervalType subtract{overlap->GetStart(), overlap->GetEnd()}; | 379 | const IntervalType subtract{overlap->start, overlap->end}; |
| 376 | interval_set.subtract(subtract); | 380 | interval_set.subtract(subtract); |
| 377 | } | 381 | } |
| 378 | for (auto& interval : interval_set) { | 382 | for (auto& interval : interval_set) { |
| @@ -386,18 +390,24 @@ private: | |||
| 386 | } | 390 | } |
| 387 | } | 391 | } |
| 388 | 392 | ||
| 389 | std::vector<MapInterval> GetMapsInRange(VAddr addr, std::size_t size) { | 393 | VectorMapInterval GetMapsInRange(VAddr addr, std::size_t size) { |
| 394 | VectorMapInterval result; | ||
| 390 | if (size == 0) { | 395 | if (size == 0) { |
| 391 | return {}; | 396 | return result; |
| 392 | } | 397 | } |
| 393 | 398 | ||
| 394 | std::vector<MapInterval> objects{}; | 399 | const VAddr addr_end = addr + size; |
| 395 | const IntervalType interval{addr, addr + size}; | 400 | auto it = mapped_addresses.lower_bound(addr); |
| 396 | for (auto& pair : boost::make_iterator_range(mapped_addresses.equal_range(interval))) { | 401 | if (it != mapped_addresses.begin()) { |
| 397 | objects.push_back(pair.second); | 402 | --it; |
| 398 | } | 403 | } |
| 399 | 404 | while (it != mapped_addresses.end() && it->start < addr_end) { | |
| 400 | return objects; | 405 | if (it->Overlaps(addr, addr_end)) { |
| 406 | result.push_back(&*it); | ||
| 407 | } | ||
| 408 | ++it; | ||
| 409 | } | ||
| 410 | return result; | ||
| 401 | } | 411 | } |
| 402 | 412 | ||
| 403 | /// Returns a ticks counter used for tracking when cached objects were last modified | 413 | /// Returns a ticks counter used for tracking when cached objects were last modified |
| @@ -405,12 +415,12 @@ private: | |||
| 405 | return ++modified_ticks; | 415 | return ++modified_ticks; |
| 406 | } | 416 | } |
| 407 | 417 | ||
| 408 | void FlushMap(MapInterval map) { | 418 | void FlushMap(MapInterval* map) { |
| 409 | std::size_t size = map->GetEnd() - map->GetStart(); | 419 | const std::size_t size = map->end - map->start; |
| 410 | OwnerBuffer block = blocks[map->GetStart() >> block_page_bits]; | 420 | OwnerBuffer block = blocks[map->start >> block_page_bits]; |
| 411 | staging_buffer.resize(size); | 421 | staging_buffer.resize(size); |
| 412 | DownloadBlockData(block, block->GetOffset(map->GetStart()), size, staging_buffer.data()); | 422 | DownloadBlockData(block, block->GetOffset(map->start), size, staging_buffer.data()); |
| 413 | system.Memory().WriteBlockUnsafe(map->GetStart(), staging_buffer.data(), size); | 423 | system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size); |
| 414 | map->MarkAsModified(false, 0); | 424 | map->MarkAsModified(false, 0); |
| 415 | } | 425 | } |
| 416 | 426 | ||
| @@ -515,7 +525,7 @@ private: | |||
| 515 | } else { | 525 | } else { |
| 516 | written_pages[page_start] = 1; | 526 | written_pages[page_start] = 1; |
| 517 | } | 527 | } |
| 518 | page_start++; | 528 | ++page_start; |
| 519 | } | 529 | } |
| 520 | } | 530 | } |
| 521 | 531 | ||
| @@ -531,7 +541,7 @@ private: | |||
| 531 | written_pages.erase(it); | 541 | written_pages.erase(it); |
| 532 | } | 542 | } |
| 533 | } | 543 | } |
| 534 | page_start++; | 544 | ++page_start; |
| 535 | } | 545 | } |
| 536 | } | 546 | } |
| 537 | 547 | ||
| @@ -542,14 +552,14 @@ private: | |||
| 542 | if (written_pages.count(page_start) > 0) { | 552 | if (written_pages.count(page_start) > 0) { |
| 543 | return true; | 553 | return true; |
| 544 | } | 554 | } |
| 545 | page_start++; | 555 | ++page_start; |
| 546 | } | 556 | } |
| 547 | return false; | 557 | return false; |
| 548 | } | 558 | } |
| 549 | 559 | ||
| 550 | void MarkForAsyncFlush(MapInterval& map) { | 560 | void MarkForAsyncFlush(MapInterval* map) { |
| 551 | if (!uncommitted_flushes) { | 561 | if (!uncommitted_flushes) { |
| 552 | uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval>>(); | 562 | uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval*>>(); |
| 553 | } | 563 | } |
| 554 | uncommitted_flushes->insert(map); | 564 | uncommitted_flushes->insert(map); |
| 555 | } | 565 | } |
| @@ -566,10 +576,9 @@ private: | |||
| 566 | u64 buffer_offset = 0; | 576 | u64 buffer_offset = 0; |
| 567 | u64 buffer_offset_base = 0; | 577 | u64 buffer_offset_base = 0; |
| 568 | 578 | ||
| 569 | using IntervalSet = boost::icl::interval_set<VAddr>; | 579 | MapIntervalAllocator mapped_addresses_allocator; |
| 570 | using IntervalCache = boost::icl::interval_map<VAddr, MapInterval>; | 580 | boost::intrusive::set<MapInterval, boost::intrusive::compare<MapIntervalCompare>> |
| 571 | using IntervalType = typename IntervalCache::interval_type; | 581 | mapped_addresses; |
| 572 | IntervalCache mapped_addresses; | ||
| 573 | 582 | ||
| 574 | static constexpr u64 write_page_bit = 11; | 583 | static constexpr u64 write_page_bit = 11; |
| 575 | std::unordered_map<u64, u32> written_pages; | 584 | std::unordered_map<u64, u32> written_pages; |
| @@ -583,10 +592,10 @@ private: | |||
| 583 | u64 modified_ticks = 0; | 592 | u64 modified_ticks = 0; |
| 584 | 593 | ||
| 585 | std::vector<u8> staging_buffer; | 594 | std::vector<u8> staging_buffer; |
| 586 | std::list<MapInterval> marked_for_unregister; | 595 | std::list<MapInterval*> marked_for_unregister; |
| 587 | 596 | ||
| 588 | std::shared_ptr<std::unordered_set<MapInterval>> uncommitted_flushes{}; | 597 | std::shared_ptr<std::unordered_set<MapInterval*>> uncommitted_flushes; |
| 589 | std::list<std::shared_ptr<std::list<MapInterval>>> committed_flushes; | 598 | std::list<std::shared_ptr<std::list<MapInterval*>>> committed_flushes; |
| 590 | 599 | ||
| 591 | std::recursive_mutex mutex; | 600 | std::recursive_mutex mutex; |
| 592 | }; | 601 | }; |
diff --git a/src/video_core/buffer_cache/map_interval.cpp b/src/video_core/buffer_cache/map_interval.cpp new file mode 100644 index 000000000..62587e18a --- /dev/null +++ b/src/video_core/buffer_cache/map_interval.cpp | |||
| @@ -0,0 +1,33 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <memory> | ||
| 9 | |||
| 10 | #include "video_core/buffer_cache/map_interval.h" | ||
| 11 | |||
| 12 | namespace VideoCommon { | ||
| 13 | |||
| 14 | MapIntervalAllocator::MapIntervalAllocator() { | ||
| 15 | FillFreeList(first_chunk); | ||
| 16 | } | ||
| 17 | |||
| 18 | MapIntervalAllocator::~MapIntervalAllocator() = default; | ||
| 19 | |||
| 20 | void MapIntervalAllocator::AllocateNewChunk() { | ||
| 21 | *new_chunk = std::make_unique<Chunk>(); | ||
| 22 | FillFreeList(**new_chunk); | ||
| 23 | new_chunk = &(*new_chunk)->next; | ||
| 24 | } | ||
| 25 | |||
| 26 | void MapIntervalAllocator::FillFreeList(Chunk& chunk) { | ||
| 27 | const std::size_t old_size = free_list.size(); | ||
| 28 | free_list.resize(old_size + chunk.data.size()); | ||
| 29 | std::transform(chunk.data.rbegin(), chunk.data.rend(), free_list.begin() + old_size, | ||
| 30 | [](MapInterval& interval) { return &interval; }); | ||
| 31 | } | ||
| 32 | |||
| 33 | } // namespace VideoCommon | ||
diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h index 29d8b26f3..fe0bcd1d8 100644 --- a/src/video_core/buffer_cache/map_interval.h +++ b/src/video_core/buffer_cache/map_interval.h | |||
| @@ -4,104 +4,89 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | ||
| 8 | #include <cstddef> | ||
| 9 | #include <memory> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include <boost/intrusive/set_hook.hpp> | ||
| 13 | |||
| 7 | #include "common/common_types.h" | 14 | #include "common/common_types.h" |
| 8 | #include "video_core/gpu.h" | 15 | #include "video_core/gpu.h" |
| 9 | 16 | ||
| 10 | namespace VideoCommon { | 17 | namespace VideoCommon { |
| 11 | 18 | ||
| 12 | class MapIntervalBase { | 19 | struct MapInterval : public boost::intrusive::set_base_hook<boost::intrusive::optimize_size<true>> { |
| 13 | public: | 20 | MapInterval() = default; |
| 14 | MapIntervalBase(const VAddr start, const VAddr end, const GPUVAddr gpu_addr) | ||
| 15 | : start{start}, end{end}, gpu_addr{gpu_addr} {} | ||
| 16 | |||
| 17 | void SetCpuAddress(VAddr new_cpu_addr) { | ||
| 18 | cpu_addr = new_cpu_addr; | ||
| 19 | } | ||
| 20 | |||
| 21 | VAddr GetCpuAddress() const { | ||
| 22 | return cpu_addr; | ||
| 23 | } | ||
| 24 | |||
| 25 | GPUVAddr GetGpuAddress() const { | ||
| 26 | return gpu_addr; | ||
| 27 | } | ||
| 28 | |||
| 29 | bool IsInside(const VAddr other_start, const VAddr other_end) const { | ||
| 30 | return (start <= other_start && other_end <= end); | ||
| 31 | } | ||
| 32 | |||
| 33 | bool operator==(const MapIntervalBase& rhs) const { | ||
| 34 | return std::tie(start, end) == std::tie(rhs.start, rhs.end); | ||
| 35 | } | ||
| 36 | |||
| 37 | bool operator!=(const MapIntervalBase& rhs) const { | ||
| 38 | return !operator==(rhs); | ||
| 39 | } | ||
| 40 | 21 | ||
| 41 | void MarkAsRegistered(const bool registered) { | 22 | /*implicit*/ MapInterval(VAddr start_) noexcept : start{start_} {} |
| 42 | is_registered = registered; | ||
| 43 | } | ||
| 44 | 23 | ||
| 45 | bool IsRegistered() const { | 24 | explicit MapInterval(VAddr start_, VAddr end_, GPUVAddr gpu_addr_) noexcept |
| 46 | return is_registered; | 25 | : start{start_}, end{end_}, gpu_addr{gpu_addr_} {} |
| 47 | } | ||
| 48 | 26 | ||
| 49 | void SetMemoryMarked(bool is_memory_marked_) { | 27 | bool IsInside(VAddr other_start, VAddr other_end) const noexcept { |
| 50 | is_memory_marked = is_memory_marked_; | 28 | return start <= other_start && other_end <= end; |
| 51 | } | 29 | } |
| 52 | 30 | ||
| 53 | bool IsMemoryMarked() const { | 31 | bool Overlaps(VAddr other_start, VAddr other_end) const noexcept { |
| 54 | return is_memory_marked; | 32 | return start < other_end && other_start < end; |
| 55 | } | 33 | } |
| 56 | 34 | ||
| 57 | void SetSyncPending(bool is_sync_pending_) { | 35 | void MarkAsModified(bool is_modified_, u64 ticks_) noexcept { |
| 58 | is_sync_pending = is_sync_pending_; | 36 | is_modified = is_modified_; |
| 59 | } | 37 | ticks = ticks_; |
| 38 | } | ||
| 39 | |||
| 40 | boost::intrusive::set_member_hook<> member_hook_; | ||
| 41 | VAddr start = 0; | ||
| 42 | VAddr end = 0; | ||
| 43 | GPUVAddr gpu_addr = 0; | ||
| 44 | u64 ticks = 0; | ||
| 45 | bool is_written = false; | ||
| 46 | bool is_modified = false; | ||
| 47 | bool is_registered = false; | ||
| 48 | bool is_memory_marked = false; | ||
| 49 | bool is_sync_pending = false; | ||
| 50 | }; | ||
| 60 | 51 | ||
| 61 | bool IsSyncPending() const { | 52 | struct MapIntervalCompare { |
| 62 | return is_sync_pending; | 53 | constexpr bool operator()(const MapInterval& lhs, const MapInterval& rhs) const noexcept { |
| 54 | return lhs.start < rhs.start; | ||
| 63 | } | 55 | } |
| 56 | }; | ||
| 64 | 57 | ||
| 65 | VAddr GetStart() const { | 58 | class MapIntervalAllocator { |
| 66 | return start; | 59 | public: |
| 67 | } | 60 | MapIntervalAllocator(); |
| 61 | ~MapIntervalAllocator(); | ||
| 68 | 62 | ||
| 69 | VAddr GetEnd() const { | 63 | MapInterval* Allocate() { |
| 70 | return end; | 64 | if (free_list.empty()) { |
| 65 | AllocateNewChunk(); | ||
| 66 | } | ||
| 67 | MapInterval* const interval = free_list.back(); | ||
| 68 | free_list.pop_back(); | ||
| 69 | return interval; | ||
| 71 | } | 70 | } |
| 72 | 71 | ||
| 73 | void MarkAsModified(const bool is_modified_, const u64 tick) { | 72 | void Release(MapInterval* interval) { |
| 74 | is_modified = is_modified_; | 73 | free_list.push_back(interval); |
| 75 | ticks = tick; | ||
| 76 | } | 74 | } |
| 77 | 75 | ||
| 78 | bool IsModified() const { | 76 | private: |
| 79 | return is_modified; | 77 | struct Chunk { |
| 80 | } | 78 | std::unique_ptr<Chunk> next; |
| 79 | std::array<MapInterval, 0x8000> data; | ||
| 80 | }; | ||
| 81 | 81 | ||
| 82 | u64 GetModificationTick() const { | 82 | void AllocateNewChunk(); |
| 83 | return ticks; | ||
| 84 | } | ||
| 85 | 83 | ||
| 86 | void MarkAsWritten(const bool is_written_) { | 84 | void FillFreeList(Chunk& chunk); |
| 87 | is_written = is_written_; | ||
| 88 | } | ||
| 89 | 85 | ||
| 90 | bool IsWritten() const { | 86 | std::vector<MapInterval*> free_list; |
| 91 | return is_written; | 87 | std::unique_ptr<Chunk>* new_chunk = &first_chunk.next; |
| 92 | } | ||
| 93 | 88 | ||
| 94 | private: | 89 | Chunk first_chunk; |
| 95 | VAddr start; | ||
| 96 | VAddr end; | ||
| 97 | GPUVAddr gpu_addr; | ||
| 98 | VAddr cpu_addr{}; | ||
| 99 | bool is_written{}; | ||
| 100 | bool is_modified{}; | ||
| 101 | bool is_registered{}; | ||
| 102 | bool is_memory_marked{}; | ||
| 103 | bool is_sync_pending{}; | ||
| 104 | u64 ticks{}; | ||
| 105 | }; | 90 | }; |
| 106 | 91 | ||
| 107 | } // namespace VideoCommon | 92 | } // namespace VideoCommon |
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h index 22987751e..096ee337c 100644 --- a/src/video_core/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache.h | |||
| @@ -56,9 +56,27 @@ public: | |||
| 56 | last_modified_ticks = cache.GetModifiedTicks(); | 56 | last_modified_ticks = cache.GetModifiedTicks(); |
| 57 | } | 57 | } |
| 58 | 58 | ||
| 59 | void SetMemoryMarked(bool is_memory_marked_) { | ||
| 60 | is_memory_marked = is_memory_marked_; | ||
| 61 | } | ||
| 62 | |||
| 63 | bool IsMemoryMarked() const { | ||
| 64 | return is_memory_marked; | ||
| 65 | } | ||
| 66 | |||
| 67 | void SetSyncPending(bool is_sync_pending_) { | ||
| 68 | is_sync_pending = is_sync_pending_; | ||
| 69 | } | ||
| 70 | |||
| 71 | bool IsSyncPending() const { | ||
| 72 | return is_sync_pending; | ||
| 73 | } | ||
| 74 | |||
| 59 | private: | 75 | private: |
| 60 | bool is_registered{}; ///< Whether the object is currently registered with the cache | 76 | bool is_registered{}; ///< Whether the object is currently registered with the cache |
| 61 | bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory) | 77 | bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory) |
| 78 | bool is_memory_marked{}; ///< Whether the object is marking rasterizer memory. | ||
| 79 | bool is_sync_pending{}; ///< Whether the object is pending deletion. | ||
| 62 | u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing | 80 | u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing |
| 63 | VAddr cpu_addr{}; ///< Cpu address memory, unique from emulated virtual address space | 81 | VAddr cpu_addr{}; ///< Cpu address memory, unique from emulated virtual address space |
| 64 | }; | 82 | }; |
| @@ -94,6 +112,30 @@ public: | |||
| 94 | } | 112 | } |
| 95 | } | 113 | } |
| 96 | 114 | ||
| 115 | void OnCPUWrite(VAddr addr, std::size_t size) { | ||
| 116 | std::lock_guard lock{mutex}; | ||
| 117 | |||
| 118 | for (const auto& object : GetSortedObjectsFromRegion(addr, size)) { | ||
| 119 | if (object->IsRegistered()) { | ||
| 120 | UnmarkMemory(object); | ||
| 121 | object->SetSyncPending(true); | ||
| 122 | marked_for_unregister.emplace_back(object); | ||
| 123 | } | ||
| 124 | } | ||
| 125 | } | ||
| 126 | |||
| 127 | void SyncGuestHost() { | ||
| 128 | std::lock_guard lock{mutex}; | ||
| 129 | |||
| 130 | for (const auto& object : marked_for_unregister) { | ||
| 131 | if (object->IsRegistered()) { | ||
| 132 | object->SetSyncPending(false); | ||
| 133 | Unregister(object); | ||
| 134 | } | ||
| 135 | } | ||
| 136 | marked_for_unregister.clear(); | ||
| 137 | } | ||
| 138 | |||
| 97 | /// Invalidates everything in the cache | 139 | /// Invalidates everything in the cache |
| 98 | void InvalidateAll() { | 140 | void InvalidateAll() { |
| 99 | std::lock_guard lock{mutex}; | 141 | std::lock_guard lock{mutex}; |
| @@ -120,19 +162,32 @@ protected: | |||
| 120 | interval_cache.add({GetInterval(object), ObjectSet{object}}); | 162 | interval_cache.add({GetInterval(object), ObjectSet{object}}); |
| 121 | map_cache.insert({object->GetCpuAddr(), object}); | 163 | map_cache.insert({object->GetCpuAddr(), object}); |
| 122 | rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1); | 164 | rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1); |
| 165 | object->SetMemoryMarked(true); | ||
| 123 | } | 166 | } |
| 124 | 167 | ||
| 125 | /// Unregisters an object from the cache | 168 | /// Unregisters an object from the cache |
| 126 | virtual void Unregister(const T& object) { | 169 | virtual void Unregister(const T& object) { |
| 127 | std::lock_guard lock{mutex}; | 170 | std::lock_guard lock{mutex}; |
| 128 | 171 | ||
| 172 | UnmarkMemory(object); | ||
| 129 | object->SetIsRegistered(false); | 173 | object->SetIsRegistered(false); |
| 130 | rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1); | 174 | if (object->IsSyncPending()) { |
| 175 | marked_for_unregister.remove(object); | ||
| 176 | object->SetSyncPending(false); | ||
| 177 | } | ||
| 131 | const VAddr addr = object->GetCpuAddr(); | 178 | const VAddr addr = object->GetCpuAddr(); |
| 132 | interval_cache.subtract({GetInterval(object), ObjectSet{object}}); | 179 | interval_cache.subtract({GetInterval(object), ObjectSet{object}}); |
| 133 | map_cache.erase(addr); | 180 | map_cache.erase(addr); |
| 134 | } | 181 | } |
| 135 | 182 | ||
| 183 | void UnmarkMemory(const T& object) { | ||
| 184 | if (!object->IsMemoryMarked()) { | ||
| 185 | return; | ||
| 186 | } | ||
| 187 | rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1); | ||
| 188 | object->SetMemoryMarked(false); | ||
| 189 | } | ||
| 190 | |||
| 136 | /// Returns a ticks counter used for tracking when cached objects were last modified | 191 | /// Returns a ticks counter used for tracking when cached objects were last modified |
| 137 | u64 GetModifiedTicks() { | 192 | u64 GetModifiedTicks() { |
| 138 | std::lock_guard lock{mutex}; | 193 | std::lock_guard lock{mutex}; |
| @@ -194,4 +249,5 @@ private: | |||
| 194 | IntervalCache interval_cache; ///< Cache of objects | 249 | IntervalCache interval_cache; ///< Cache of objects |
| 195 | u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing | 250 | u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing |
| 196 | VideoCore::RasterizerInterface& rasterizer; | 251 | VideoCore::RasterizerInterface& rasterizer; |
| 252 | std::list<T> marked_for_unregister; | ||
| 197 | }; | 253 | }; |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index d2cab50bd..9964ea894 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | 8 | ||
| 9 | #include "common/assert.h" | 9 | #include "common/assert.h" |
| 10 | #include "common/microprofile.h" | 10 | #include "common/microprofile.h" |
| 11 | #include "video_core/buffer_cache/buffer_cache.h" | ||
| 11 | #include "video_core/engines/maxwell_3d.h" | 12 | #include "video_core/engines/maxwell_3d.h" |
| 12 | #include "video_core/rasterizer_interface.h" | 13 | #include "video_core/rasterizer_interface.h" |
| 13 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | 14 | #include "video_core/renderer_opengl/gl_buffer_cache.h" |
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index d83dca25a..466a911db 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | 13 | ||
| 14 | #include "common/logging/log.h" | 14 | #include "common/logging/log.h" |
| 15 | #include "common/scope_exit.h" | 15 | #include "common/scope_exit.h" |
| 16 | #include "core/settings.h" | ||
| 16 | #include "video_core/renderer_opengl/gl_device.h" | 17 | #include "video_core/renderer_opengl/gl_device.h" |
| 17 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 18 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 18 | 19 | ||
| @@ -183,10 +184,16 @@ Device::Device() : base_bindings{BuildBaseBindings()} { | |||
| 183 | has_precise_bug = TestPreciseBug(); | 184 | has_precise_bug = TestPreciseBug(); |
| 184 | has_broken_compute = is_intel_proprietary; | 185 | has_broken_compute = is_intel_proprietary; |
| 185 | has_fast_buffer_sub_data = is_nvidia; | 186 | has_fast_buffer_sub_data = is_nvidia; |
| 187 | use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && | ||
| 188 | GLAD_GL_NV_compute_program5; | ||
| 186 | 189 | ||
| 187 | LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); | 190 | LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); |
| 188 | LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); | 191 | LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); |
| 189 | LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug); | 192 | LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug); |
| 193 | |||
| 194 | if (Settings::values.use_assembly_shaders && !use_assembly_shaders) { | ||
| 195 | LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported"); | ||
| 196 | } | ||
| 190 | } | 197 | } |
| 191 | 198 | ||
| 192 | Device::Device(std::nullptr_t) { | 199 | Device::Device(std::nullptr_t) { |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index a55050cb5..e915dbd86 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -88,6 +88,10 @@ public: | |||
| 88 | return has_fast_buffer_sub_data; | 88 | return has_fast_buffer_sub_data; |
| 89 | } | 89 | } |
| 90 | 90 | ||
| 91 | bool UseAssemblyShaders() const { | ||
| 92 | return use_assembly_shaders; | ||
| 93 | } | ||
| 94 | |||
| 91 | private: | 95 | private: |
| 92 | static bool TestVariableAoffi(); | 96 | static bool TestVariableAoffi(); |
| 93 | static bool TestPreciseBug(); | 97 | static bool TestPreciseBug(); |
| @@ -107,6 +111,7 @@ private: | |||
| 107 | bool has_precise_bug{}; | 111 | bool has_precise_bug{}; |
| 108 | bool has_broken_compute{}; | 112 | bool has_broken_compute{}; |
| 109 | bool has_fast_buffer_sub_data{}; | 113 | bool has_fast_buffer_sub_data{}; |
| 114 | bool use_assembly_shaders{}; | ||
| 110 | }; | 115 | }; |
| 111 | 116 | ||
| 112 | } // namespace OpenGL | 117 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp index 99ddcb3f8..ec5421afa 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.cpp +++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #include "common/assert.h" | 5 | #include "common/assert.h" |
| 6 | 6 | ||
| 7 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | ||
| 7 | #include "video_core/renderer_opengl/gl_fence_manager.h" | 8 | #include "video_core/renderer_opengl/gl_fence_manager.h" |
| 8 | 9 | ||
| 9 | namespace OpenGL { | 10 | namespace OpenGL { |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 69dcf952f..8116a5daa 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -94,17 +94,30 @@ void oglEnable(GLenum cap, bool state) { | |||
| 94 | } // Anonymous namespace | 94 | } // Anonymous namespace |
| 95 | 95 | ||
| 96 | RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, | 96 | RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, |
| 97 | ScreenInfo& info, GLShader::ProgramManager& program_manager, | 97 | const Device& device, ScreenInfo& info, |
| 98 | StateTracker& state_tracker) | 98 | ProgramManager& program_manager, StateTracker& state_tracker) |
| 99 | : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device, state_tracker}, | 99 | : RasterizerAccelerated{system.Memory()}, device{device}, texture_cache{system, *this, device, |
| 100 | state_tracker}, | ||
| 100 | shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, | 101 | shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, |
| 101 | buffer_cache{*this, system, device, STREAM_BUFFER_SIZE}, | 102 | buffer_cache{*this, system, device, STREAM_BUFFER_SIZE}, |
| 102 | fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system}, | 103 | fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system}, |
| 103 | screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} { | 104 | screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} { |
| 104 | CheckExtensions(); | 105 | CheckExtensions(); |
| 106 | |||
| 107 | if (device.UseAssemblyShaders()) { | ||
| 108 | glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data()); | ||
| 109 | for (const GLuint cbuf : staging_cbufs) { | ||
| 110 | glNamedBufferStorage(cbuf, static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize), | ||
| 111 | nullptr, 0); | ||
| 112 | } | ||
| 113 | } | ||
| 105 | } | 114 | } |
| 106 | 115 | ||
| 107 | RasterizerOpenGL::~RasterizerOpenGL() {} | 116 | RasterizerOpenGL::~RasterizerOpenGL() { |
| 117 | if (device.UseAssemblyShaders()) { | ||
| 118 | glDeleteBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data()); | ||
| 119 | } | ||
| 120 | } | ||
| 108 | 121 | ||
| 109 | void RasterizerOpenGL::CheckExtensions() { | 122 | void RasterizerOpenGL::CheckExtensions() { |
| 110 | if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) { | 123 | if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) { |
| @@ -230,6 +243,7 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() { | |||
| 230 | void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | 243 | void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { |
| 231 | MICROPROFILE_SCOPE(OpenGL_Shader); | 244 | MICROPROFILE_SCOPE(OpenGL_Shader); |
| 232 | auto& gpu = system.GPU().Maxwell3D(); | 245 | auto& gpu = system.GPU().Maxwell3D(); |
| 246 | std::size_t num_ssbos = 0; | ||
| 233 | u32 clip_distances = 0; | 247 | u32 clip_distances = 0; |
| 234 | 248 | ||
| 235 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | 249 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |
| @@ -261,6 +275,14 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 261 | 275 | ||
| 262 | Shader shader{shader_cache.GetStageProgram(program)}; | 276 | Shader shader{shader_cache.GetStageProgram(program)}; |
| 263 | 277 | ||
| 278 | if (device.UseAssemblyShaders()) { | ||
| 279 | // Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this | ||
| 280 | // all stages share the same bindings. | ||
| 281 | const std::size_t num_stage_ssbos = shader->GetEntries().global_memory_entries.size(); | ||
| 282 | ASSERT_MSG(num_stage_ssbos == 0 || num_ssbos == 0, "SSBOs on more than one stage"); | ||
| 283 | num_ssbos += num_stage_ssbos; | ||
| 284 | } | ||
| 285 | |||
| 264 | // Stage indices are 0 - 5 | 286 | // Stage indices are 0 - 5 |
| 265 | const std::size_t stage = index == 0 ? 0 : index - 1; | 287 | const std::size_t stage = index == 0 ? 0 : index - 1; |
| 266 | SetupDrawConstBuffers(stage, shader); | 288 | SetupDrawConstBuffers(stage, shader); |
| @@ -526,6 +548,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 526 | SyncFramebufferSRGB(); | 548 | SyncFramebufferSRGB(); |
| 527 | 549 | ||
| 528 | buffer_cache.Acquire(); | 550 | buffer_cache.Acquire(); |
| 551 | current_cbuf = 0; | ||
| 529 | 552 | ||
| 530 | std::size_t buffer_size = CalculateVertexArraysSize(); | 553 | std::size_t buffer_size = CalculateVertexArraysSize(); |
| 531 | 554 | ||
| @@ -535,9 +558,9 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 535 | } | 558 | } |
| 536 | 559 | ||
| 537 | // Uniform space for the 5 shader stages | 560 | // Uniform space for the 5 shader stages |
| 538 | buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + | 561 | buffer_size = |
| 539 | (sizeof(GLShader::MaxwellUniformData) + device.GetUniformBufferAlignment()) * | 562 | Common::AlignUp<std::size_t>(buffer_size, 4) + |
| 540 | Maxwell::MaxShaderStage; | 563 | (sizeof(MaxwellUniformData) + device.GetUniformBufferAlignment()) * Maxwell::MaxShaderStage; |
| 541 | 564 | ||
| 542 | // Add space for at least 18 constant buffers | 565 | // Add space for at least 18 constant buffers |
| 543 | buffer_size += Maxwell::MaxConstBuffers * | 566 | buffer_size += Maxwell::MaxConstBuffers * |
| @@ -558,12 +581,14 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 558 | } | 581 | } |
| 559 | 582 | ||
| 560 | // Setup emulation uniform buffer. | 583 | // Setup emulation uniform buffer. |
| 561 | GLShader::MaxwellUniformData ubo; | 584 | if (!device.UseAssemblyShaders()) { |
| 562 | ubo.SetFromRegs(gpu); | 585 | MaxwellUniformData ubo; |
| 563 | const auto [buffer, offset] = | 586 | ubo.SetFromRegs(gpu); |
| 564 | buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); | 587 | const auto [buffer, offset] = |
| 565 | glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset, | 588 | buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); |
| 566 | static_cast<GLsizeiptr>(sizeof(ubo))); | 589 | glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset, |
| 590 | static_cast<GLsizeiptr>(sizeof(ubo))); | ||
| 591 | } | ||
| 567 | 592 | ||
| 568 | // Setup shaders and their used resources. | 593 | // Setup shaders and their used resources. |
| 569 | texture_cache.GuardSamplers(true); | 594 | texture_cache.GuardSamplers(true); |
| @@ -635,11 +660,11 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | |||
| 635 | } | 660 | } |
| 636 | 661 | ||
| 637 | buffer_cache.Acquire(); | 662 | buffer_cache.Acquire(); |
| 663 | current_cbuf = 0; | ||
| 638 | 664 | ||
| 639 | auto kernel = shader_cache.GetComputeKernel(code_addr); | 665 | auto kernel = shader_cache.GetComputeKernel(code_addr); |
| 640 | SetupComputeTextures(kernel); | 666 | SetupComputeTextures(kernel); |
| 641 | SetupComputeImages(kernel); | 667 | SetupComputeImages(kernel); |
| 642 | program_manager.BindComputeShader(kernel->GetHandle()); | ||
| 643 | 668 | ||
| 644 | const std::size_t buffer_size = | 669 | const std::size_t buffer_size = |
| 645 | Tegra::Engines::KeplerCompute::NumConstBuffers * | 670 | Tegra::Engines::KeplerCompute::NumConstBuffers * |
| @@ -652,6 +677,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | |||
| 652 | buffer_cache.Unmap(); | 677 | buffer_cache.Unmap(); |
| 653 | 678 | ||
| 654 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | 679 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; |
| 680 | program_manager.BindCompute(kernel->GetHandle()); | ||
| 655 | glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); | 681 | glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); |
| 656 | ++num_queued_commands; | 682 | ++num_queued_commands; |
| 657 | } | 683 | } |
| @@ -701,15 +727,15 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { | |||
| 701 | return; | 727 | return; |
| 702 | } | 728 | } |
| 703 | texture_cache.OnCPUWrite(addr, size); | 729 | texture_cache.OnCPUWrite(addr, size); |
| 704 | shader_cache.InvalidateRegion(addr, size); | 730 | shader_cache.OnCPUWrite(addr, size); |
| 705 | buffer_cache.OnCPUWrite(addr, size); | 731 | buffer_cache.OnCPUWrite(addr, size); |
| 706 | query_cache.InvalidateRegion(addr, size); | ||
| 707 | } | 732 | } |
| 708 | 733 | ||
| 709 | void RasterizerOpenGL::SyncGuestHost() { | 734 | void RasterizerOpenGL::SyncGuestHost() { |
| 710 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 735 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 711 | texture_cache.SyncGuestHost(); | 736 | texture_cache.SyncGuestHost(); |
| 712 | buffer_cache.SyncGuestHost(); | 737 | buffer_cache.SyncGuestHost(); |
| 738 | shader_cache.SyncGuestHost(); | ||
| 713 | } | 739 | } |
| 714 | 740 | ||
| 715 | void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) { | 741 | void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) { |
| @@ -812,14 +838,20 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 812 | } | 838 | } |
| 813 | 839 | ||
| 814 | void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) { | 840 | void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) { |
| 841 | static constexpr std::array PARAMETER_LUT = { | ||
| 842 | GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, | ||
| 843 | GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV, | ||
| 844 | GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV}; | ||
| 845 | |||
| 815 | MICROPROFILE_SCOPE(OpenGL_UBO); | 846 | MICROPROFILE_SCOPE(OpenGL_UBO); |
| 816 | const auto& stages = system.GPU().Maxwell3D().state.shader_stages; | 847 | const auto& stages = system.GPU().Maxwell3D().state.shader_stages; |
| 817 | const auto& shader_stage = stages[stage_index]; | 848 | const auto& shader_stage = stages[stage_index]; |
| 818 | 849 | ||
| 819 | u32 binding = device.GetBaseBindings(stage_index).uniform_buffer; | 850 | u32 binding = |
| 851 | device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).uniform_buffer; | ||
| 820 | for (const auto& entry : shader->GetEntries().const_buffers) { | 852 | for (const auto& entry : shader->GetEntries().const_buffers) { |
| 821 | const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; | 853 | const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; |
| 822 | SetupConstBuffer(binding++, buffer, entry); | 854 | SetupConstBuffer(PARAMETER_LUT[stage_index], binding++, buffer, entry); |
| 823 | } | 855 | } |
| 824 | } | 856 | } |
| 825 | 857 | ||
| @@ -835,16 +867,21 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { | |||
| 835 | buffer.address = config.Address(); | 867 | buffer.address = config.Address(); |
| 836 | buffer.size = config.size; | 868 | buffer.size = config.size; |
| 837 | buffer.enabled = mask[entry.GetIndex()]; | 869 | buffer.enabled = mask[entry.GetIndex()]; |
| 838 | SetupConstBuffer(binding++, buffer, entry); | 870 | SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding++, buffer, entry); |
| 839 | } | 871 | } |
| 840 | } | 872 | } |
| 841 | 873 | ||
| 842 | void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, | 874 | void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding, |
| 875 | const Tegra::Engines::ConstBufferInfo& buffer, | ||
| 843 | const ConstBufferEntry& entry) { | 876 | const ConstBufferEntry& entry) { |
| 844 | if (!buffer.enabled) { | 877 | if (!buffer.enabled) { |
| 845 | // Set values to zero to unbind buffers | 878 | // Set values to zero to unbind buffers |
| 846 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0, | 879 | if (device.UseAssemblyShaders()) { |
| 847 | sizeof(float)); | 880 | glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0); |
| 881 | } else { | ||
| 882 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, | ||
| 883 | buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float)); | ||
| 884 | } | ||
| 848 | return; | 885 | return; |
| 849 | } | 886 | } |
| 850 | 887 | ||
| @@ -853,9 +890,19 @@ void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::Const | |||
| 853 | const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4)); | 890 | const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4)); |
| 854 | 891 | ||
| 855 | const auto alignment = device.GetUniformBufferAlignment(); | 892 | const auto alignment = device.GetUniformBufferAlignment(); |
| 856 | const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false, | 893 | auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false, |
| 857 | device.HasFastBufferSubData()); | 894 | device.HasFastBufferSubData()); |
| 858 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size); | 895 | if (!device.UseAssemblyShaders()) { |
| 896 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size); | ||
| 897 | return; | ||
| 898 | } | ||
| 899 | if (offset != 0) { | ||
| 900 | const GLuint staging_cbuf = staging_cbufs[current_cbuf++]; | ||
| 901 | glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size); | ||
| 902 | cbuf = staging_cbuf; | ||
| 903 | offset = 0; | ||
| 904 | } | ||
| 905 | glBindBufferRangeNV(stage, binding, cbuf, offset, size); | ||
| 859 | } | 906 | } |
| 860 | 907 | ||
| 861 | void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) { | 908 | void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) { |
| @@ -863,7 +910,8 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shad | |||
| 863 | auto& memory_manager{gpu.MemoryManager()}; | 910 | auto& memory_manager{gpu.MemoryManager()}; |
| 864 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; | 911 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; |
| 865 | 912 | ||
| 866 | u32 binding = device.GetBaseBindings(stage_index).shader_storage_buffer; | 913 | u32 binding = |
| 914 | device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer; | ||
| 867 | for (const auto& entry : shader->GetEntries().global_memory_entries) { | 915 | for (const auto& entry : shader->GetEntries().global_memory_entries) { |
| 868 | const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset}; | 916 | const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset}; |
| 869 | const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)}; | 917 | const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)}; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index b94c65907..87f7fe159 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -56,8 +56,8 @@ struct DrawParameters; | |||
| 56 | class RasterizerOpenGL : public VideoCore::RasterizerAccelerated { | 56 | class RasterizerOpenGL : public VideoCore::RasterizerAccelerated { |
| 57 | public: | 57 | public: |
| 58 | explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, | 58 | explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, |
| 59 | ScreenInfo& info, GLShader::ProgramManager& program_manager, | 59 | const Device& device, ScreenInfo& info, |
| 60 | StateTracker& state_tracker); | 60 | ProgramManager& program_manager, StateTracker& state_tracker); |
| 61 | ~RasterizerOpenGL() override; | 61 | ~RasterizerOpenGL() override; |
| 62 | 62 | ||
| 63 | void Draw(bool is_indexed, bool is_instanced) override; | 63 | void Draw(bool is_indexed, bool is_instanced) override; |
| @@ -106,7 +106,7 @@ private: | |||
| 106 | void SetupComputeConstBuffers(const Shader& kernel); | 106 | void SetupComputeConstBuffers(const Shader& kernel); |
| 107 | 107 | ||
| 108 | /// Configures a constant buffer. | 108 | /// Configures a constant buffer. |
| 109 | void SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, | 109 | void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, |
| 110 | const ConstBufferEntry& entry); | 110 | const ConstBufferEntry& entry); |
| 111 | 111 | ||
| 112 | /// Configures the current global memory entries to use for the draw command. | 112 | /// Configures the current global memory entries to use for the draw command. |
| @@ -224,7 +224,7 @@ private: | |||
| 224 | 224 | ||
| 225 | void SetupShaders(GLenum primitive_mode); | 225 | void SetupShaders(GLenum primitive_mode); |
| 226 | 226 | ||
| 227 | const Device device; | 227 | const Device& device; |
| 228 | 228 | ||
| 229 | TextureCacheOpenGL texture_cache; | 229 | TextureCacheOpenGL texture_cache; |
| 230 | ShaderCacheOpenGL shader_cache; | 230 | ShaderCacheOpenGL shader_cache; |
| @@ -236,7 +236,7 @@ private: | |||
| 236 | 236 | ||
| 237 | Core::System& system; | 237 | Core::System& system; |
| 238 | ScreenInfo& screen_info; | 238 | ScreenInfo& screen_info; |
| 239 | GLShader::ProgramManager& program_manager; | 239 | ProgramManager& program_manager; |
| 240 | StateTracker& state_tracker; | 240 | StateTracker& state_tracker; |
| 241 | 241 | ||
| 242 | static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; | 242 | static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; |
| @@ -248,6 +248,12 @@ private: | |||
| 248 | std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> | 248 | std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> |
| 249 | enabled_transform_feedback_buffers; | 249 | enabled_transform_feedback_buffers; |
| 250 | 250 | ||
| 251 | static constexpr std::size_t NUM_CONSTANT_BUFFERS = | ||
| 252 | Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers * | ||
| 253 | Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; | ||
| 254 | std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{}; | ||
| 255 | std::size_t current_cbuf = 0; | ||
| 256 | |||
| 251 | /// Number of commands queued to the OpenGL driver. Reseted on flush. | 257 | /// Number of commands queued to the OpenGL driver. Reseted on flush. |
| 252 | std::size_t num_queued_commands = 0; | 258 | std::size_t num_queued_commands = 0; |
| 253 | 259 | ||
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index 97803d480..a787e27d2 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp | |||
| @@ -125,6 +125,15 @@ void OGLProgram::Release() { | |||
| 125 | handle = 0; | 125 | handle = 0; |
| 126 | } | 126 | } |
| 127 | 127 | ||
| 128 | void OGLAssemblyProgram::Release() { | ||
| 129 | if (handle == 0) { | ||
| 130 | return; | ||
| 131 | } | ||
| 132 | MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); | ||
| 133 | glDeleteProgramsARB(1, &handle); | ||
| 134 | handle = 0; | ||
| 135 | } | ||
| 136 | |||
| 128 | void OGLPipeline::Create() { | 137 | void OGLPipeline::Create() { |
| 129 | if (handle != 0) | 138 | if (handle != 0) |
| 130 | return; | 139 | return; |
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index de93f4212..f8b322227 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h | |||
| @@ -167,6 +167,22 @@ public: | |||
| 167 | GLuint handle = 0; | 167 | GLuint handle = 0; |
| 168 | }; | 168 | }; |
| 169 | 169 | ||
| 170 | class OGLAssemblyProgram : private NonCopyable { | ||
| 171 | public: | ||
| 172 | OGLAssemblyProgram() = default; | ||
| 173 | |||
| 174 | OGLAssemblyProgram(OGLAssemblyProgram&& o) noexcept : handle(std::exchange(o.handle, 0)) {} | ||
| 175 | |||
| 176 | ~OGLAssemblyProgram() { | ||
| 177 | Release(); | ||
| 178 | } | ||
| 179 | |||
| 180 | /// Deletes the internal OpenGL resource | ||
| 181 | void Release(); | ||
| 182 | |||
| 183 | GLuint handle = 0; | ||
| 184 | }; | ||
| 185 | |||
| 170 | class OGLPipeline : private NonCopyable { | 186 | class OGLPipeline : private NonCopyable { |
| 171 | public: | 187 | public: |
| 172 | OGLPipeline() = default; | 188 | OGLPipeline() = default; |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 9759a7078..4cd0f36cf 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -97,6 +97,24 @@ constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) { | |||
| 97 | return {}; | 97 | return {}; |
| 98 | } | 98 | } |
| 99 | 99 | ||
| 100 | constexpr GLenum AssemblyEnum(ShaderType shader_type) { | ||
| 101 | switch (shader_type) { | ||
| 102 | case ShaderType::Vertex: | ||
| 103 | return GL_VERTEX_PROGRAM_NV; | ||
| 104 | case ShaderType::TesselationControl: | ||
| 105 | return GL_TESS_CONTROL_PROGRAM_NV; | ||
| 106 | case ShaderType::TesselationEval: | ||
| 107 | return GL_TESS_EVALUATION_PROGRAM_NV; | ||
| 108 | case ShaderType::Geometry: | ||
| 109 | return GL_GEOMETRY_PROGRAM_NV; | ||
| 110 | case ShaderType::Fragment: | ||
| 111 | return GL_FRAGMENT_PROGRAM_NV; | ||
| 112 | case ShaderType::Compute: | ||
| 113 | return GL_COMPUTE_PROGRAM_NV; | ||
| 114 | } | ||
| 115 | return {}; | ||
| 116 | } | ||
| 117 | |||
| 100 | std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) { | 118 | std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) { |
| 101 | return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier); | 119 | return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier); |
| 102 | } | 120 | } |
| @@ -120,18 +138,43 @@ std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) { | |||
| 120 | return registry; | 138 | return registry; |
| 121 | } | 139 | } |
| 122 | 140 | ||
| 123 | std::shared_ptr<OGLProgram> BuildShader(const Device& device, ShaderType shader_type, | 141 | ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier, |
| 124 | u64 unique_identifier, const ShaderIR& ir, | 142 | const ShaderIR& ir, const Registry& registry, |
| 125 | const Registry& registry, bool hint_retrievable = false) { | 143 | bool hint_retrievable = false) { |
| 126 | const std::string shader_id = MakeShaderID(unique_identifier, shader_type); | 144 | const std::string shader_id = MakeShaderID(unique_identifier, shader_type); |
| 127 | LOG_INFO(Render_OpenGL, "{}", shader_id); | 145 | LOG_INFO(Render_OpenGL, "{}", shader_id); |
| 128 | 146 | ||
| 129 | const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id); | 147 | auto program = std::make_shared<ProgramHandle>(); |
| 130 | OGLShader shader; | 148 | |
| 131 | shader.Create(glsl.c_str(), GetGLShaderType(shader_type)); | 149 | if (device.UseAssemblyShaders()) { |
| 150 | const std::string arb = "Not implemented"; | ||
| 151 | |||
| 152 | GLuint& arb_prog = program->assembly_program.handle; | ||
| 153 | |||
| 154 | // Commented out functions signal OpenGL errors but are compatible with apitrace. | ||
| 155 | // Use them only to capture and replay on apitrace. | ||
| 156 | #if 0 | ||
| 157 | glGenProgramsNV(1, &arb_prog); | ||
| 158 | glLoadProgramNV(AssemblyEnum(shader_type), arb_prog, static_cast<GLsizei>(arb.size()), | ||
| 159 | reinterpret_cast<const GLubyte*>(arb.data())); | ||
| 160 | #else | ||
| 161 | glGenProgramsARB(1, &arb_prog); | ||
| 162 | glNamedProgramStringEXT(arb_prog, AssemblyEnum(shader_type), GL_PROGRAM_FORMAT_ASCII_ARB, | ||
| 163 | static_cast<GLsizei>(arb.size()), arb.data()); | ||
| 164 | #endif | ||
| 165 | const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV)); | ||
| 166 | if (err && *err) { | ||
| 167 | LOG_CRITICAL(Render_OpenGL, "{}", err); | ||
| 168 | LOG_INFO(Render_OpenGL, "\n{}", arb); | ||
| 169 | } | ||
| 170 | } else { | ||
| 171 | const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id); | ||
| 172 | OGLShader shader; | ||
| 173 | shader.Create(glsl.c_str(), GetGLShaderType(shader_type)); | ||
| 174 | |||
| 175 | program->source_program.Create(true, hint_retrievable, shader.handle); | ||
| 176 | } | ||
| 132 | 177 | ||
| 133 | auto program = std::make_shared<OGLProgram>(); | ||
| 134 | program->Create(true, hint_retrievable, shader.handle); | ||
| 135 | return program; | 178 | return program; |
| 136 | } | 179 | } |
| 137 | 180 | ||
| @@ -153,15 +196,22 @@ std::unordered_set<GLenum> GetSupportedFormats() { | |||
| 153 | 196 | ||
| 154 | CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, | 197 | CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, |
| 155 | std::shared_ptr<VideoCommon::Shader::Registry> registry, | 198 | std::shared_ptr<VideoCommon::Shader::Registry> registry, |
| 156 | ShaderEntries entries, std::shared_ptr<OGLProgram> program) | 199 | ShaderEntries entries, ProgramSharedPtr program_) |
| 157 | : RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)}, | 200 | : RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)}, |
| 158 | size_in_bytes{size_in_bytes}, program{std::move(program)} {} | 201 | size_in_bytes{size_in_bytes}, program{std::move(program_)} { |
| 202 | // Assign either the assembly program or source program. We can't have both. | ||
| 203 | handle = program->assembly_program.handle; | ||
| 204 | if (handle == 0) { | ||
| 205 | handle = program->source_program.handle; | ||
| 206 | } | ||
| 207 | ASSERT(handle != 0); | ||
| 208 | } | ||
| 159 | 209 | ||
| 160 | CachedShader::~CachedShader() = default; | 210 | CachedShader::~CachedShader() = default; |
| 161 | 211 | ||
| 162 | GLuint CachedShader::GetHandle() const { | 212 | GLuint CachedShader::GetHandle() const { |
| 163 | DEBUG_ASSERT(registry->IsConsistent()); | 213 | DEBUG_ASSERT(registry->IsConsistent()); |
| 164 | return program->handle; | 214 | return handle; |
| 165 | } | 215 | } |
| 166 | 216 | ||
| 167 | Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, | 217 | Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, |
| @@ -239,7 +289,11 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 239 | return; | 289 | return; |
| 240 | } | 290 | } |
| 241 | 291 | ||
| 242 | const std::vector gl_cache = disk_cache.LoadPrecompiled(); | 292 | std::vector<ShaderDiskCachePrecompiled> gl_cache; |
| 293 | if (!device.UseAssemblyShaders()) { | ||
| 294 | // Only load precompiled cache when we are not using assembly shaders | ||
| 295 | gl_cache = disk_cache.LoadPrecompiled(); | ||
| 296 | } | ||
| 243 | const auto supported_formats = GetSupportedFormats(); | 297 | const auto supported_formats = GetSupportedFormats(); |
| 244 | 298 | ||
| 245 | // Track if precompiled cache was altered during loading to know if we have to | 299 | // Track if precompiled cache was altered during loading to know if we have to |
| @@ -278,7 +332,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 278 | auto registry = MakeRegistry(entry); | 332 | auto registry = MakeRegistry(entry); |
| 279 | const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry); | 333 | const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry); |
| 280 | 334 | ||
| 281 | std::shared_ptr<OGLProgram> program; | 335 | ProgramSharedPtr program; |
| 282 | if (precompiled_entry) { | 336 | if (precompiled_entry) { |
| 283 | // If the shader is precompiled, attempt to load it with | 337 | // If the shader is precompiled, attempt to load it with |
| 284 | program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats); | 338 | program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats); |
| @@ -332,6 +386,11 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 332 | return; | 386 | return; |
| 333 | } | 387 | } |
| 334 | 388 | ||
| 389 | if (device.UseAssemblyShaders()) { | ||
| 390 | // Don't store precompiled binaries for assembly shaders. | ||
| 391 | return; | ||
| 392 | } | ||
| 393 | |||
| 335 | // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw | 394 | // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw |
| 336 | // before precompiling them | 395 | // before precompiling them |
| 337 | 396 | ||
| @@ -339,7 +398,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 339 | const u64 id = (*transferable)[i].unique_identifier; | 398 | const u64 id = (*transferable)[i].unique_identifier; |
| 340 | const auto it = find_precompiled(id); | 399 | const auto it = find_precompiled(id); |
| 341 | if (it == gl_cache.end()) { | 400 | if (it == gl_cache.end()) { |
| 342 | const GLuint program = runtime_cache.at(id).program->handle; | 401 | const GLuint program = runtime_cache.at(id).program->source_program.handle; |
| 343 | disk_cache.SavePrecompiled(id, program); | 402 | disk_cache.SavePrecompiled(id, program); |
| 344 | precompiled_cache_altered = true; | 403 | precompiled_cache_altered = true; |
| 345 | } | 404 | } |
| @@ -350,7 +409,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 350 | } | 409 | } |
| 351 | } | 410 | } |
| 352 | 411 | ||
| 353 | std::shared_ptr<OGLProgram> ShaderCacheOpenGL::GeneratePrecompiledProgram( | 412 | ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram( |
| 354 | const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, | 413 | const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, |
| 355 | const std::unordered_set<GLenum>& supported_formats) { | 414 | const std::unordered_set<GLenum>& supported_formats) { |
| 356 | if (supported_formats.find(precompiled_entry.binary_format) == supported_formats.end()) { | 415 | if (supported_formats.find(precompiled_entry.binary_format) == supported_formats.end()) { |
| @@ -358,15 +417,15 @@ std::shared_ptr<OGLProgram> ShaderCacheOpenGL::GeneratePrecompiledProgram( | |||
| 358 | return {}; | 417 | return {}; |
| 359 | } | 418 | } |
| 360 | 419 | ||
| 361 | auto program = std::make_shared<OGLProgram>(); | 420 | auto program = std::make_shared<ProgramHandle>(); |
| 362 | program->handle = glCreateProgram(); | 421 | GLuint& handle = program->source_program.handle; |
| 363 | glProgramParameteri(program->handle, GL_PROGRAM_SEPARABLE, GL_TRUE); | 422 | handle = glCreateProgram(); |
| 364 | glProgramBinary(program->handle, precompiled_entry.binary_format, | 423 | glProgramParameteri(handle, GL_PROGRAM_SEPARABLE, GL_TRUE); |
| 365 | precompiled_entry.binary.data(), | 424 | glProgramBinary(handle, precompiled_entry.binary_format, precompiled_entry.binary.data(), |
| 366 | static_cast<GLsizei>(precompiled_entry.binary.size())); | 425 | static_cast<GLsizei>(precompiled_entry.binary.size())); |
| 367 | 426 | ||
| 368 | GLint link_status; | 427 | GLint link_status; |
| 369 | glGetProgramiv(program->handle, GL_LINK_STATUS, &link_status); | 428 | glGetProgramiv(handle, GL_LINK_STATUS, &link_status); |
| 370 | if (link_status == GL_FALSE) { | 429 | if (link_status == GL_FALSE) { |
| 371 | LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing"); | 430 | LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing"); |
| 372 | return {}; | 431 | return {}; |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 91690b470..b2ae8d7f9 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -43,8 +43,14 @@ struct UnspecializedShader; | |||
| 43 | using Shader = std::shared_ptr<CachedShader>; | 43 | using Shader = std::shared_ptr<CachedShader>; |
| 44 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 44 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 45 | 45 | ||
| 46 | struct ProgramHandle { | ||
| 47 | OGLProgram source_program; | ||
| 48 | OGLAssemblyProgram assembly_program; | ||
| 49 | }; | ||
| 50 | using ProgramSharedPtr = std::shared_ptr<ProgramHandle>; | ||
| 51 | |||
| 46 | struct PrecompiledShader { | 52 | struct PrecompiledShader { |
| 47 | std::shared_ptr<OGLProgram> program; | 53 | ProgramSharedPtr program; |
| 48 | std::shared_ptr<VideoCommon::Shader::Registry> registry; | 54 | std::shared_ptr<VideoCommon::Shader::Registry> registry; |
| 49 | ShaderEntries entries; | 55 | ShaderEntries entries; |
| 50 | }; | 56 | }; |
| @@ -87,12 +93,13 @@ public: | |||
| 87 | private: | 93 | private: |
| 88 | explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, | 94 | explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, |
| 89 | std::shared_ptr<VideoCommon::Shader::Registry> registry, | 95 | std::shared_ptr<VideoCommon::Shader::Registry> registry, |
| 90 | ShaderEntries entries, std::shared_ptr<OGLProgram> program); | 96 | ShaderEntries entries, ProgramSharedPtr program); |
| 91 | 97 | ||
| 92 | std::shared_ptr<VideoCommon::Shader::Registry> registry; | 98 | std::shared_ptr<VideoCommon::Shader::Registry> registry; |
| 93 | ShaderEntries entries; | 99 | ShaderEntries entries; |
| 94 | std::size_t size_in_bytes = 0; | 100 | std::size_t size_in_bytes = 0; |
| 95 | std::shared_ptr<OGLProgram> program; | 101 | ProgramSharedPtr program; |
| 102 | GLuint handle = 0; | ||
| 96 | }; | 103 | }; |
| 97 | 104 | ||
| 98 | class ShaderCacheOpenGL final : public RasterizerCache<Shader> { | 105 | class ShaderCacheOpenGL final : public RasterizerCache<Shader> { |
| @@ -115,7 +122,7 @@ protected: | |||
| 115 | void FlushObjectInner(const Shader& object) override {} | 122 | void FlushObjectInner(const Shader& object) override {} |
| 116 | 123 | ||
| 117 | private: | 124 | private: |
| 118 | std::shared_ptr<OGLProgram> GeneratePrecompiledProgram( | 125 | ProgramSharedPtr GeneratePrecompiledProgram( |
| 119 | const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, | 126 | const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, |
| 120 | const std::unordered_set<GLenum>& supported_formats); | 127 | const std::unordered_set<GLenum>& supported_formats); |
| 121 | 128 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 960ebf1a1..253484968 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -1538,7 +1538,9 @@ private: | |||
| 1538 | Expression target; | 1538 | Expression target; |
| 1539 | if (const auto gpr = std::get_if<GprNode>(&*dest)) { | 1539 | if (const auto gpr = std::get_if<GprNode>(&*dest)) { |
| 1540 | if (gpr->GetIndex() == Register::ZeroIndex) { | 1540 | if (gpr->GetIndex() == Register::ZeroIndex) { |
| 1541 | // Writing to Register::ZeroIndex is a no op | 1541 | // Writing to Register::ZeroIndex is a no op but we still have to visit the source |
| 1542 | // as it might have side effects. | ||
| 1543 | code.AddLine("{};", Visit(src).GetCode()); | ||
| 1542 | return {}; | 1544 | return {}; |
| 1543 | } | 1545 | } |
| 1544 | target = {GetRegister(gpr->GetIndex()), Type::Float}; | 1546 | target = {GetRegister(gpr->GetIndex()), Type::Float}; |
| @@ -2309,6 +2311,18 @@ private: | |||
| 2309 | return {"gl_SubGroupInvocationARB", Type::Uint}; | 2311 | return {"gl_SubGroupInvocationARB", Type::Uint}; |
| 2310 | } | 2312 | } |
| 2311 | 2313 | ||
| 2314 | template <const std::string_view& comparison> | ||
| 2315 | Expression ThreadMask(Operation) { | ||
| 2316 | if (device.HasWarpIntrinsics()) { | ||
| 2317 | return {fmt::format("gl_Thread{}MaskNV", comparison), Type::Uint}; | ||
| 2318 | } | ||
| 2319 | if (device.HasShaderBallot()) { | ||
| 2320 | return {fmt::format("uint(gl_SubGroup{}MaskARB)", comparison), Type::Uint}; | ||
| 2321 | } | ||
| 2322 | LOG_ERROR(Render_OpenGL, "Thread mask intrinsics are required by the shader"); | ||
| 2323 | return {"0U", Type::Uint}; | ||
| 2324 | } | ||
| 2325 | |||
| 2312 | Expression ShuffleIndexed(Operation operation) { | 2326 | Expression ShuffleIndexed(Operation operation) { |
| 2313 | std::string value = VisitOperand(operation, 0).AsFloat(); | 2327 | std::string value = VisitOperand(operation, 0).AsFloat(); |
| 2314 | 2328 | ||
| @@ -2321,6 +2335,15 @@ private: | |||
| 2321 | return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float}; | 2335 | return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float}; |
| 2322 | } | 2336 | } |
| 2323 | 2337 | ||
| 2338 | Expression Barrier(Operation) { | ||
| 2339 | if (!ir.IsDecompiled()) { | ||
| 2340 | LOG_ERROR(Render_OpenGL, "barrier() used but shader is not decompiled"); | ||
| 2341 | return {}; | ||
| 2342 | } | ||
| 2343 | code.AddLine("barrier();"); | ||
| 2344 | return {}; | ||
| 2345 | } | ||
| 2346 | |||
| 2324 | Expression MemoryBarrierGL(Operation) { | 2347 | Expression MemoryBarrierGL(Operation) { |
| 2325 | code.AddLine("memoryBarrier();"); | 2348 | code.AddLine("memoryBarrier();"); |
| 2326 | return {}; | 2349 | return {}; |
| @@ -2337,6 +2360,12 @@ private: | |||
| 2337 | static constexpr std::string_view NotEqual = "!="; | 2360 | static constexpr std::string_view NotEqual = "!="; |
| 2338 | static constexpr std::string_view GreaterEqual = ">="; | 2361 | static constexpr std::string_view GreaterEqual = ">="; |
| 2339 | 2362 | ||
| 2363 | static constexpr std::string_view Eq = "Eq"; | ||
| 2364 | static constexpr std::string_view Ge = "Ge"; | ||
| 2365 | static constexpr std::string_view Gt = "Gt"; | ||
| 2366 | static constexpr std::string_view Le = "Le"; | ||
| 2367 | static constexpr std::string_view Lt = "Lt"; | ||
| 2368 | |||
| 2340 | static constexpr std::string_view Add = "Add"; | 2369 | static constexpr std::string_view Add = "Add"; |
| 2341 | static constexpr std::string_view Min = "Min"; | 2370 | static constexpr std::string_view Min = "Min"; |
| 2342 | static constexpr std::string_view Max = "Max"; | 2371 | static constexpr std::string_view Max = "Max"; |
| @@ -2554,8 +2583,14 @@ private: | |||
| 2554 | &GLSLDecompiler::VoteEqual, | 2583 | &GLSLDecompiler::VoteEqual, |
| 2555 | 2584 | ||
| 2556 | &GLSLDecompiler::ThreadId, | 2585 | &GLSLDecompiler::ThreadId, |
| 2586 | &GLSLDecompiler::ThreadMask<Func::Eq>, | ||
| 2587 | &GLSLDecompiler::ThreadMask<Func::Ge>, | ||
| 2588 | &GLSLDecompiler::ThreadMask<Func::Gt>, | ||
| 2589 | &GLSLDecompiler::ThreadMask<Func::Le>, | ||
| 2590 | &GLSLDecompiler::ThreadMask<Func::Lt>, | ||
| 2557 | &GLSLDecompiler::ShuffleIndexed, | 2591 | &GLSLDecompiler::ShuffleIndexed, |
| 2558 | 2592 | ||
| 2593 | &GLSLDecompiler::Barrier, | ||
| 2559 | &GLSLDecompiler::MemoryBarrierGL, | 2594 | &GLSLDecompiler::MemoryBarrierGL, |
| 2560 | }; | 2595 | }; |
| 2561 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); | 2596 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); |
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 9c7b0adbd..96605db84 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp | |||
| @@ -6,45 +6,105 @@ | |||
| 6 | 6 | ||
| 7 | #include "common/common_types.h" | 7 | #include "common/common_types.h" |
| 8 | #include "video_core/engines/maxwell_3d.h" | 8 | #include "video_core/engines/maxwell_3d.h" |
| 9 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 9 | #include "video_core/renderer_opengl/gl_shader_manager.h" | 10 | #include "video_core/renderer_opengl/gl_shader_manager.h" |
| 10 | 11 | ||
| 11 | namespace OpenGL::GLShader { | 12 | namespace OpenGL { |
| 12 | 13 | ||
| 13 | ProgramManager::ProgramManager() = default; | 14 | ProgramManager::ProgramManager(const Device& device) { |
| 15 | use_assembly_programs = device.UseAssemblyShaders(); | ||
| 16 | if (use_assembly_programs) { | ||
| 17 | glEnable(GL_COMPUTE_PROGRAM_NV); | ||
| 18 | } else { | ||
| 19 | graphics_pipeline.Create(); | ||
| 20 | glBindProgramPipeline(graphics_pipeline.handle); | ||
| 21 | } | ||
| 22 | } | ||
| 14 | 23 | ||
| 15 | ProgramManager::~ProgramManager() = default; | 24 | ProgramManager::~ProgramManager() = default; |
| 16 | 25 | ||
| 17 | void ProgramManager::Create() { | 26 | void ProgramManager::BindCompute(GLuint program) { |
| 18 | graphics_pipeline.Create(); | 27 | if (use_assembly_programs) { |
| 19 | glBindProgramPipeline(graphics_pipeline.handle); | 28 | glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program); |
| 29 | } else { | ||
| 30 | is_graphics_bound = false; | ||
| 31 | glUseProgram(program); | ||
| 32 | } | ||
| 20 | } | 33 | } |
| 21 | 34 | ||
| 22 | void ProgramManager::BindGraphicsPipeline() { | 35 | void ProgramManager::BindGraphicsPipeline() { |
| 23 | if (!is_graphics_bound) { | 36 | if (use_assembly_programs) { |
| 24 | is_graphics_bound = true; | 37 | UpdateAssemblyPrograms(); |
| 25 | glUseProgram(0); | 38 | } else { |
| 39 | UpdateSourcePrograms(); | ||
| 26 | } | 40 | } |
| 41 | } | ||
| 27 | 42 | ||
| 28 | // Avoid updating the pipeline when values have no changed | 43 | void ProgramManager::BindHostPipeline(GLuint pipeline) { |
| 29 | if (old_state == current_state) { | 44 | if (use_assembly_programs) { |
| 30 | return; | 45 | if (geometry_enabled) { |
| 46 | geometry_enabled = false; | ||
| 47 | old_state.geometry = 0; | ||
| 48 | glDisable(GL_GEOMETRY_PROGRAM_NV); | ||
| 49 | } | ||
| 31 | } | 50 | } |
| 51 | glBindProgramPipeline(pipeline); | ||
| 52 | } | ||
| 32 | 53 | ||
| 33 | // Workaround for AMD bug | 54 | void ProgramManager::RestoreGuestPipeline() { |
| 34 | static constexpr GLenum all_used_stages{GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | | 55 | if (use_assembly_programs) { |
| 35 | GL_FRAGMENT_SHADER_BIT}; | 56 | glBindProgramPipeline(0); |
| 36 | const GLuint handle = graphics_pipeline.handle; | 57 | } else { |
| 37 | glUseProgramStages(handle, all_used_stages, 0); | 58 | glBindProgramPipeline(graphics_pipeline.handle); |
| 38 | glUseProgramStages(handle, GL_VERTEX_SHADER_BIT, current_state.vertex_shader); | 59 | } |
| 39 | glUseProgramStages(handle, GL_GEOMETRY_SHADER_BIT, current_state.geometry_shader); | 60 | } |
| 40 | glUseProgramStages(handle, GL_FRAGMENT_SHADER_BIT, current_state.fragment_shader); | 61 | |
| 62 | void ProgramManager::UpdateAssemblyPrograms() { | ||
| 63 | const auto update_state = [](GLenum stage, bool& enabled, GLuint current, GLuint old) { | ||
| 64 | if (current == old) { | ||
| 65 | return; | ||
| 66 | } | ||
| 67 | if (current == 0) { | ||
| 68 | if (enabled) { | ||
| 69 | enabled = false; | ||
| 70 | glDisable(stage); | ||
| 71 | } | ||
| 72 | return; | ||
| 73 | } | ||
| 74 | if (!enabled) { | ||
| 75 | enabled = true; | ||
| 76 | glEnable(stage); | ||
| 77 | } | ||
| 78 | glBindProgramARB(stage, current); | ||
| 79 | }; | ||
| 80 | |||
| 81 | update_state(GL_VERTEX_PROGRAM_NV, vertex_enabled, current_state.vertex, old_state.vertex); | ||
| 82 | update_state(GL_GEOMETRY_PROGRAM_NV, geometry_enabled, current_state.geometry, | ||
| 83 | old_state.geometry); | ||
| 84 | update_state(GL_FRAGMENT_PROGRAM_NV, fragment_enabled, current_state.fragment, | ||
| 85 | old_state.fragment); | ||
| 41 | 86 | ||
| 42 | old_state = current_state; | 87 | old_state = current_state; |
| 43 | } | 88 | } |
| 44 | 89 | ||
| 45 | void ProgramManager::BindComputeShader(GLuint program) { | 90 | void ProgramManager::UpdateSourcePrograms() { |
| 46 | is_graphics_bound = false; | 91 | if (!is_graphics_bound) { |
| 47 | glUseProgram(program); | 92 | is_graphics_bound = true; |
| 93 | glUseProgram(0); | ||
| 94 | } | ||
| 95 | |||
| 96 | const GLuint handle = graphics_pipeline.handle; | ||
| 97 | const auto update_state = [handle](GLenum stage, GLuint current, GLuint old) { | ||
| 98 | if (current == old) { | ||
| 99 | return; | ||
| 100 | } | ||
| 101 | glUseProgramStages(handle, stage, current); | ||
| 102 | }; | ||
| 103 | update_state(GL_VERTEX_SHADER_BIT, current_state.vertex, old_state.vertex); | ||
| 104 | update_state(GL_GEOMETRY_SHADER_BIT, current_state.geometry, old_state.geometry); | ||
| 105 | update_state(GL_FRAGMENT_SHADER_BIT, current_state.fragment, old_state.fragment); | ||
| 106 | |||
| 107 | old_state = current_state; | ||
| 48 | } | 108 | } |
| 49 | 109 | ||
| 50 | void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) { | 110 | void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) { |
| @@ -54,4 +114,4 @@ void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) { | |||
| 54 | y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f; | 114 | y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f; |
| 55 | } | 115 | } |
| 56 | 116 | ||
| 57 | } // namespace OpenGL::GLShader | 117 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index d2e47f2a9..0f03b4f12 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h | |||
| @@ -11,7 +11,9 @@ | |||
| 11 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 11 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 12 | #include "video_core/renderer_opengl/maxwell_to_gl.h" | 12 | #include "video_core/renderer_opengl/maxwell_to_gl.h" |
| 13 | 13 | ||
| 14 | namespace OpenGL::GLShader { | 14 | namespace OpenGL { |
| 15 | |||
| 16 | class Device; | ||
| 15 | 17 | ||
| 16 | /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned | 18 | /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned |
| 17 | /// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at | 19 | /// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at |
| @@ -28,50 +30,58 @@ static_assert(sizeof(MaxwellUniformData) < 16384, | |||
| 28 | 30 | ||
| 29 | class ProgramManager { | 31 | class ProgramManager { |
| 30 | public: | 32 | public: |
| 31 | explicit ProgramManager(); | 33 | explicit ProgramManager(const Device& device); |
| 32 | ~ProgramManager(); | 34 | ~ProgramManager(); |
| 33 | 35 | ||
| 34 | void Create(); | 36 | /// Binds a compute program |
| 37 | void BindCompute(GLuint program); | ||
| 35 | 38 | ||
| 36 | /// Updates the graphics pipeline and binds it. | 39 | /// Updates bound programs. |
| 37 | void BindGraphicsPipeline(); | 40 | void BindGraphicsPipeline(); |
| 38 | 41 | ||
| 39 | /// Binds a compute shader. | 42 | /// Binds an OpenGL pipeline object unsynchronized with the guest state. |
| 40 | void BindComputeShader(GLuint program); | 43 | void BindHostPipeline(GLuint pipeline); |
| 44 | |||
| 45 | /// Rewinds BindHostPipeline state changes. | ||
| 46 | void RestoreGuestPipeline(); | ||
| 41 | 47 | ||
| 42 | void UseVertexShader(GLuint program) { | 48 | void UseVertexShader(GLuint program) { |
| 43 | current_state.vertex_shader = program; | 49 | current_state.vertex = program; |
| 44 | } | 50 | } |
| 45 | 51 | ||
| 46 | void UseGeometryShader(GLuint program) { | 52 | void UseGeometryShader(GLuint program) { |
| 47 | current_state.geometry_shader = program; | 53 | current_state.geometry = program; |
| 48 | } | 54 | } |
| 49 | 55 | ||
| 50 | void UseFragmentShader(GLuint program) { | 56 | void UseFragmentShader(GLuint program) { |
| 51 | current_state.fragment_shader = program; | 57 | current_state.fragment = program; |
| 52 | } | 58 | } |
| 53 | 59 | ||
| 54 | private: | 60 | private: |
| 55 | struct PipelineState { | 61 | struct PipelineState { |
| 56 | bool operator==(const PipelineState& rhs) const noexcept { | 62 | GLuint vertex = 0; |
| 57 | return vertex_shader == rhs.vertex_shader && fragment_shader == rhs.fragment_shader && | 63 | GLuint geometry = 0; |
| 58 | geometry_shader == rhs.geometry_shader; | 64 | GLuint fragment = 0; |
| 59 | } | ||
| 60 | |||
| 61 | bool operator!=(const PipelineState& rhs) const noexcept { | ||
| 62 | return !operator==(rhs); | ||
| 63 | } | ||
| 64 | |||
| 65 | GLuint vertex_shader = 0; | ||
| 66 | GLuint fragment_shader = 0; | ||
| 67 | GLuint geometry_shader = 0; | ||
| 68 | }; | 65 | }; |
| 69 | 66 | ||
| 67 | /// Update NV_gpu_program5 programs. | ||
| 68 | void UpdateAssemblyPrograms(); | ||
| 69 | |||
| 70 | /// Update GLSL programs. | ||
| 71 | void UpdateSourcePrograms(); | ||
| 72 | |||
| 70 | OGLPipeline graphics_pipeline; | 73 | OGLPipeline graphics_pipeline; |
| 71 | OGLPipeline compute_pipeline; | 74 | |
| 72 | PipelineState current_state; | 75 | PipelineState current_state; |
| 73 | PipelineState old_state; | 76 | PipelineState old_state; |
| 77 | |||
| 78 | bool use_assembly_programs = false; | ||
| 79 | |||
| 74 | bool is_graphics_bound = true; | 80 | bool is_graphics_bound = true; |
| 81 | |||
| 82 | bool vertex_enabled = false; | ||
| 83 | bool geometry_enabled = false; | ||
| 84 | bool fragment_enabled = false; | ||
| 75 | }; | 85 | }; |
| 76 | 86 | ||
| 77 | } // namespace OpenGL::GLShader | 87 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index b2a179746..6b489e6db 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -316,7 +316,7 @@ public: | |||
| 316 | RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system, | 316 | RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system, |
| 317 | Core::Frontend::GraphicsContext& context) | 317 | Core::Frontend::GraphicsContext& context) |
| 318 | : RendererBase{emu_window}, emu_window{emu_window}, system{system}, context{context}, | 318 | : RendererBase{emu_window}, emu_window{emu_window}, system{system}, context{context}, |
| 319 | has_debug_tool{HasDebugTool()} {} | 319 | program_manager{device}, has_debug_tool{HasDebugTool()} {} |
| 320 | 320 | ||
| 321 | RendererOpenGL::~RendererOpenGL() = default; | 321 | RendererOpenGL::~RendererOpenGL() = default; |
| 322 | 322 | ||
| @@ -468,8 +468,9 @@ void RendererOpenGL::InitOpenGLObjects() { | |||
| 468 | vertex_program.Create(true, false, vertex_shader.handle); | 468 | vertex_program.Create(true, false, vertex_shader.handle); |
| 469 | fragment_program.Create(true, false, fragment_shader.handle); | 469 | fragment_program.Create(true, false, fragment_shader.handle); |
| 470 | 470 | ||
| 471 | // Create program pipeline | 471 | pipeline.Create(); |
| 472 | program_manager.Create(); | 472 | glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle); |
| 473 | glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle); | ||
| 473 | 474 | ||
| 474 | // Generate VBO handle for drawing | 475 | // Generate VBO handle for drawing |
| 475 | vertex_buffer.Create(); | 476 | vertex_buffer.Create(); |
| @@ -508,7 +509,7 @@ void RendererOpenGL::CreateRasterizer() { | |||
| 508 | if (rasterizer) { | 509 | if (rasterizer) { |
| 509 | return; | 510 | return; |
| 510 | } | 511 | } |
| 511 | rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info, | 512 | rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, device, screen_info, |
| 512 | program_manager, state_tracker); | 513 | program_manager, state_tracker); |
| 513 | } | 514 | } |
| 514 | 515 | ||
| @@ -620,10 +621,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { | |||
| 620 | state_tracker.NotifyClipControl(); | 621 | state_tracker.NotifyClipControl(); |
| 621 | state_tracker.NotifyAlphaTest(); | 622 | state_tracker.NotifyAlphaTest(); |
| 622 | 623 | ||
| 623 | program_manager.UseVertexShader(vertex_program.handle); | 624 | program_manager.BindHostPipeline(pipeline.handle); |
| 624 | program_manager.UseGeometryShader(0); | ||
| 625 | program_manager.UseFragmentShader(fragment_program.handle); | ||
| 626 | program_manager.BindGraphicsPipeline(); | ||
| 627 | 625 | ||
| 628 | glEnable(GL_CULL_FACE); | 626 | glEnable(GL_CULL_FACE); |
| 629 | if (screen_info.display_srgb) { | 627 | if (screen_info.display_srgb) { |
| @@ -665,6 +663,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { | |||
| 665 | 663 | ||
| 666 | glClear(GL_COLOR_BUFFER_BIT); | 664 | glClear(GL_COLOR_BUFFER_BIT); |
| 667 | glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); | 665 | glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); |
| 666 | |||
| 667 | program_manager.RestoreGuestPipeline(); | ||
| 668 | } | 668 | } |
| 669 | 669 | ||
| 670 | bool RendererOpenGL::TryPresent(int timeout_ms) { | 670 | bool RendererOpenGL::TryPresent(int timeout_ms) { |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 50b647661..61bf507f4 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "common/math_util.h" | 10 | #include "common/math_util.h" |
| 11 | #include "video_core/renderer_base.h" | 11 | #include "video_core/renderer_base.h" |
| 12 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 12 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 13 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 13 | #include "video_core/renderer_opengl/gl_shader_manager.h" | 14 | #include "video_core/renderer_opengl/gl_shader_manager.h" |
| 14 | #include "video_core/renderer_opengl/gl_state_tracker.h" | 15 | #include "video_core/renderer_opengl/gl_state_tracker.h" |
| @@ -95,6 +96,7 @@ private: | |||
| 95 | Core::Frontend::EmuWindow& emu_window; | 96 | Core::Frontend::EmuWindow& emu_window; |
| 96 | Core::System& system; | 97 | Core::System& system; |
| 97 | Core::Frontend::GraphicsContext& context; | 98 | Core::Frontend::GraphicsContext& context; |
| 99 | const Device device; | ||
| 98 | 100 | ||
| 99 | StateTracker state_tracker{system}; | 101 | StateTracker state_tracker{system}; |
| 100 | 102 | ||
| @@ -102,13 +104,14 @@ private: | |||
| 102 | OGLBuffer vertex_buffer; | 104 | OGLBuffer vertex_buffer; |
| 103 | OGLProgram vertex_program; | 105 | OGLProgram vertex_program; |
| 104 | OGLProgram fragment_program; | 106 | OGLProgram fragment_program; |
| 107 | OGLPipeline pipeline; | ||
| 105 | OGLFramebuffer screenshot_framebuffer; | 108 | OGLFramebuffer screenshot_framebuffer; |
| 106 | 109 | ||
| 107 | /// Display information for Switch screen | 110 | /// Display information for Switch screen |
| 108 | ScreenInfo screen_info; | 111 | ScreenInfo screen_info; |
| 109 | 112 | ||
| 110 | /// Global dummy shader pipeline | 113 | /// Global dummy shader pipeline |
| 111 | GLShader::ProgramManager program_manager; | 114 | ProgramManager program_manager; |
| 112 | 115 | ||
| 113 | /// OpenGL framebuffer data | 116 | /// OpenGL framebuffer data |
| 114 | std::vector<u8> gl_framebuffer_data; | 117 | std::vector<u8> gl_framebuffer_data; |
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 12be691a5..2871035f5 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp | |||
| @@ -142,7 +142,7 @@ struct FormatTuple { | |||
| 142 | {VK_FORMAT_BC6H_UFLOAT_BLOCK}, // BC6H_UF16 | 142 | {VK_FORMAT_BC6H_UFLOAT_BLOCK}, // BC6H_UF16 |
| 143 | {VK_FORMAT_BC6H_SFLOAT_BLOCK}, // BC6H_SF16 | 143 | {VK_FORMAT_BC6H_SFLOAT_BLOCK}, // BC6H_SF16 |
| 144 | {VK_FORMAT_ASTC_4x4_UNORM_BLOCK}, // ASTC_2D_4X4 | 144 | {VK_FORMAT_ASTC_4x4_UNORM_BLOCK}, // ASTC_2D_4X4 |
| 145 | {VK_FORMAT_B8G8R8A8_UNORM}, // BGRA8 | 145 | {VK_FORMAT_B8G8R8A8_UNORM, Attachable}, // BGRA8 |
| 146 | {VK_FORMAT_R32G32B32A32_SFLOAT, Attachable | Storage}, // RGBA32F | 146 | {VK_FORMAT_R32G32B32A32_SFLOAT, Attachable | Storage}, // RGBA32F |
| 147 | {VK_FORMAT_R32G32_SFLOAT, Attachable | Storage}, // RG32F | 147 | {VK_FORMAT_R32G32_SFLOAT, Attachable | Storage}, // RG32F |
| 148 | {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32F | 148 | {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32F |
| @@ -168,7 +168,7 @@ struct FormatTuple { | |||
| 168 | {VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8 | 168 | {VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8 |
| 169 | {VK_FORMAT_UNDEFINED}, // ASTC_2D_8X5 | 169 | {VK_FORMAT_UNDEFINED}, // ASTC_2D_8X5 |
| 170 | {VK_FORMAT_UNDEFINED}, // ASTC_2D_5X4 | 170 | {VK_FORMAT_UNDEFINED}, // ASTC_2D_5X4 |
| 171 | {VK_FORMAT_UNDEFINED}, // BGRA8_SRGB | 171 | {VK_FORMAT_B8G8R8A8_SRGB, Attachable}, // BGRA8_SRGB |
| 172 | {VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // DXT1_SRGB | 172 | {VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // DXT1_SRGB |
| 173 | {VK_FORMAT_BC2_SRGB_BLOCK}, // DXT23_SRGB | 173 | {VK_FORMAT_BC2_SRGB_BLOCK}, // DXT23_SRGB |
| 174 | {VK_FORMAT_BC3_SRGB_BLOCK}, // DXT45_SRGB | 174 | {VK_FORMAT_BC3_SRGB_BLOCK}, // DXT45_SRGB |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 5b494da8c..5f33d9e40 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | 8 | ||
| 9 | #include "core/core.h" | 9 | #include "core/core.h" |
| 10 | #include "video_core/buffer_cache/buffer_cache.h" | ||
| 10 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 11 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 11 | #include "video_core/renderer_vulkan/vk_device.h" | 12 | #include "video_core/renderer_vulkan/vk_device.h" |
| 12 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 13 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index f0c491d00..750e5a0ca 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp | |||
| @@ -104,6 +104,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties( | |||
| 104 | VK_FORMAT_R16_SFLOAT, | 104 | VK_FORMAT_R16_SFLOAT, |
| 105 | VK_FORMAT_R16G16B16A16_SFLOAT, | 105 | VK_FORMAT_R16G16B16A16_SFLOAT, |
| 106 | VK_FORMAT_B8G8R8A8_UNORM, | 106 | VK_FORMAT_B8G8R8A8_UNORM, |
| 107 | VK_FORMAT_B8G8R8A8_SRGB, | ||
| 107 | VK_FORMAT_R4G4B4A4_UNORM_PACK16, | 108 | VK_FORMAT_R4G4B4A4_UNORM_PACK16, |
| 108 | VK_FORMAT_D32_SFLOAT, | 109 | VK_FORMAT_D32_SFLOAT, |
| 109 | VK_FORMAT_D16_UNORM, | 110 | VK_FORMAT_D16_UNORM, |
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h index 04d07fe6a..043fe7947 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.h +++ b/src/video_core/renderer_vulkan/vk_fence_manager.h | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | 8 | ||
| 9 | #include "video_core/fence_manager.h" | 9 | #include "video_core/fence_manager.h" |
| 10 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||
| 10 | #include "video_core/renderer_vulkan/wrapper.h" | 11 | #include "video_core/renderer_vulkan/wrapper.h" |
| 11 | 12 | ||
| 12 | namespace Core { | 13 | namespace Core { |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index fe45ed269..a5c7b7945 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -329,8 +329,7 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { | |||
| 329 | 329 | ||
| 330 | const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum); | 330 | const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum); |
| 331 | const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); | 331 | const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); |
| 332 | ASSERT(cpu_addr); | 332 | const auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader; |
| 333 | const auto shader = TryGet(*cpu_addr); | ||
| 334 | ASSERT(shader); | 333 | ASSERT(shader); |
| 335 | 334 | ||
| 336 | const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 | 335 | const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 17a2efe8e..be5b77fae 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -532,14 +532,14 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { | |||
| 532 | return; | 532 | return; |
| 533 | } | 533 | } |
| 534 | texture_cache.OnCPUWrite(addr, size); | 534 | texture_cache.OnCPUWrite(addr, size); |
| 535 | pipeline_cache.InvalidateRegion(addr, size); | 535 | pipeline_cache.OnCPUWrite(addr, size); |
| 536 | buffer_cache.OnCPUWrite(addr, size); | 536 | buffer_cache.OnCPUWrite(addr, size); |
| 537 | query_cache.InvalidateRegion(addr, size); | ||
| 538 | } | 537 | } |
| 539 | 538 | ||
| 540 | void RasterizerVulkan::SyncGuestHost() { | 539 | void RasterizerVulkan::SyncGuestHost() { |
| 541 | texture_cache.SyncGuestHost(); | 540 | texture_cache.SyncGuestHost(); |
| 542 | buffer_cache.SyncGuestHost(); | 541 | buffer_cache.SyncGuestHost(); |
| 542 | pipeline_cache.SyncGuestHost(); | ||
| 543 | } | 543 | } |
| 544 | 544 | ||
| 545 | void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { | 545 | void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 167e20e91..890f34a2c 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -515,6 +515,16 @@ private: | |||
| 515 | void DeclareCommon() { | 515 | void DeclareCommon() { |
| 516 | thread_id = | 516 | thread_id = |
| 517 | DeclareInputBuiltIn(spv::BuiltIn::SubgroupLocalInvocationId, t_in_uint, "thread_id"); | 517 | DeclareInputBuiltIn(spv::BuiltIn::SubgroupLocalInvocationId, t_in_uint, "thread_id"); |
| 518 | thread_masks[0] = | ||
| 519 | DeclareInputBuiltIn(spv::BuiltIn::SubgroupEqMask, t_in_uint4, "thread_eq_mask"); | ||
| 520 | thread_masks[1] = | ||
| 521 | DeclareInputBuiltIn(spv::BuiltIn::SubgroupGeMask, t_in_uint4, "thread_ge_mask"); | ||
| 522 | thread_masks[2] = | ||
| 523 | DeclareInputBuiltIn(spv::BuiltIn::SubgroupGtMask, t_in_uint4, "thread_gt_mask"); | ||
| 524 | thread_masks[3] = | ||
| 525 | DeclareInputBuiltIn(spv::BuiltIn::SubgroupLeMask, t_in_uint4, "thread_le_mask"); | ||
| 526 | thread_masks[4] = | ||
| 527 | DeclareInputBuiltIn(spv::BuiltIn::SubgroupLtMask, t_in_uint4, "thread_lt_mask"); | ||
| 518 | } | 528 | } |
| 519 | 529 | ||
| 520 | void DeclareVertex() { | 530 | void DeclareVertex() { |
| @@ -1071,8 +1081,7 @@ private: | |||
| 1071 | 1081 | ||
| 1072 | void VisitBasicBlock(const NodeBlock& bb) { | 1082 | void VisitBasicBlock(const NodeBlock& bb) { |
| 1073 | for (const auto& node : bb) { | 1083 | for (const auto& node : bb) { |
| 1074 | [[maybe_unused]] const Type type = Visit(node).type; | 1084 | Visit(node); |
| 1075 | ASSERT(type == Type::Void); | ||
| 1076 | } | 1085 | } |
| 1077 | } | 1086 | } |
| 1078 | 1087 | ||
| @@ -1362,7 +1371,9 @@ private: | |||
| 1362 | Expression target{}; | 1371 | Expression target{}; |
| 1363 | if (const auto gpr = std::get_if<GprNode>(&*dest)) { | 1372 | if (const auto gpr = std::get_if<GprNode>(&*dest)) { |
| 1364 | if (gpr->GetIndex() == Register::ZeroIndex) { | 1373 | if (gpr->GetIndex() == Register::ZeroIndex) { |
| 1365 | // Writing to Register::ZeroIndex is a no op | 1374 | // Writing to Register::ZeroIndex is a no op but we still have to visit its source |
| 1375 | // because it might have side effects. | ||
| 1376 | Visit(src); | ||
| 1366 | return {}; | 1377 | return {}; |
| 1367 | } | 1378 | } |
| 1368 | target = {registers.at(gpr->GetIndex()), Type::Float}; | 1379 | target = {registers.at(gpr->GetIndex()), Type::Float}; |
| @@ -2175,12 +2186,35 @@ private: | |||
| 2175 | return {OpLoad(t_uint, thread_id), Type::Uint}; | 2186 | return {OpLoad(t_uint, thread_id), Type::Uint}; |
| 2176 | } | 2187 | } |
| 2177 | 2188 | ||
| 2189 | template <std::size_t index> | ||
| 2190 | Expression ThreadMask(Operation) { | ||
| 2191 | // TODO(Rodrigo): Handle devices with different warp sizes | ||
| 2192 | const Id mask = thread_masks[index]; | ||
| 2193 | return {OpLoad(t_uint, AccessElement(t_in_uint, mask, 0)), Type::Uint}; | ||
| 2194 | } | ||
| 2195 | |||
| 2178 | Expression ShuffleIndexed(Operation operation) { | 2196 | Expression ShuffleIndexed(Operation operation) { |
| 2179 | const Id value = AsFloat(Visit(operation[0])); | 2197 | const Id value = AsFloat(Visit(operation[0])); |
| 2180 | const Id index = AsUint(Visit(operation[1])); | 2198 | const Id index = AsUint(Visit(operation[1])); |
| 2181 | return {OpSubgroupReadInvocationKHR(t_float, value, index), Type::Float}; | 2199 | return {OpSubgroupReadInvocationKHR(t_float, value, index), Type::Float}; |
| 2182 | } | 2200 | } |
| 2183 | 2201 | ||
| 2202 | Expression Barrier(Operation) { | ||
| 2203 | if (!ir.IsDecompiled()) { | ||
| 2204 | LOG_ERROR(Render_Vulkan, "OpBarrier used by shader is not decompiled"); | ||
| 2205 | return {}; | ||
| 2206 | } | ||
| 2207 | |||
| 2208 | const auto scope = spv::Scope::Workgroup; | ||
| 2209 | const auto memory = spv::Scope::Workgroup; | ||
| 2210 | const auto semantics = | ||
| 2211 | spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AcquireRelease; | ||
| 2212 | OpControlBarrier(Constant(t_uint, static_cast<u32>(scope)), | ||
| 2213 | Constant(t_uint, static_cast<u32>(memory)), | ||
| 2214 | Constant(t_uint, static_cast<u32>(semantics))); | ||
| 2215 | return {}; | ||
| 2216 | } | ||
| 2217 | |||
| 2184 | Expression MemoryBarrierGL(Operation) { | 2218 | Expression MemoryBarrierGL(Operation) { |
| 2185 | const auto scope = spv::Scope::Device; | 2219 | const auto scope = spv::Scope::Device; |
| 2186 | const auto semantics = | 2220 | const auto semantics = |
| @@ -2639,8 +2673,14 @@ private: | |||
| 2639 | &SPIRVDecompiler::Vote<&Module::OpSubgroupAllEqualKHR>, | 2673 | &SPIRVDecompiler::Vote<&Module::OpSubgroupAllEqualKHR>, |
| 2640 | 2674 | ||
| 2641 | &SPIRVDecompiler::ThreadId, | 2675 | &SPIRVDecompiler::ThreadId, |
| 2676 | &SPIRVDecompiler::ThreadMask<0>, // Eq | ||
| 2677 | &SPIRVDecompiler::ThreadMask<1>, // Ge | ||
| 2678 | &SPIRVDecompiler::ThreadMask<2>, // Gt | ||
| 2679 | &SPIRVDecompiler::ThreadMask<3>, // Le | ||
| 2680 | &SPIRVDecompiler::ThreadMask<4>, // Lt | ||
| 2642 | &SPIRVDecompiler::ShuffleIndexed, | 2681 | &SPIRVDecompiler::ShuffleIndexed, |
| 2643 | 2682 | ||
| 2683 | &SPIRVDecompiler::Barrier, | ||
| 2644 | &SPIRVDecompiler::MemoryBarrierGL, | 2684 | &SPIRVDecompiler::MemoryBarrierGL, |
| 2645 | }; | 2685 | }; |
| 2646 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); | 2686 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); |
| @@ -2763,6 +2803,7 @@ private: | |||
| 2763 | Id workgroup_id{}; | 2803 | Id workgroup_id{}; |
| 2764 | Id local_invocation_id{}; | 2804 | Id local_invocation_id{}; |
| 2765 | Id thread_id{}; | 2805 | Id thread_id{}; |
| 2806 | std::array<Id, 5> thread_masks{}; // eq, ge, gt, le, lt | ||
| 2766 | 2807 | ||
| 2767 | VertexIndices in_indices; | 2808 | VertexIndices in_indices; |
| 2768 | VertexIndices out_indices; | 2809 | VertexIndices out_indices; |
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 9392f065b..63adbc4a3 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp | |||
| @@ -387,7 +387,6 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 387 | } | 387 | } |
| 388 | case OpCode::Id::RED: { | 388 | case OpCode::Id::RED: { |
| 389 | UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32); | 389 | UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32); |
| 390 | UNIMPLEMENTED_IF_MSG(instr.red.operation != AtomicOp::Add); | ||
| 391 | const auto [real_address, base_address, descriptor] = | 390 | const auto [real_address, base_address, descriptor] = |
| 392 | TrackGlobalMemory(bb, instr, true, true); | 391 | TrackGlobalMemory(bb, instr, true, true); |
| 393 | if (!real_address || !base_address) { | 392 | if (!real_address || !base_address) { |
| @@ -396,7 +395,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 396 | } | 395 | } |
| 397 | Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | 396 | Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); |
| 398 | Node value = GetRegister(instr.gpr0); | 397 | Node value = GetRegister(instr.gpr0); |
| 399 | bb.push_back(Operation(OperationCode::ReduceIAdd, move(gmem), move(value))); | 398 | bb.push_back(Operation(GetAtomOperation(instr.red.operation), move(gmem), move(value))); |
| 400 | break; | 399 | break; |
| 401 | } | 400 | } |
| 402 | case OpCode::Id::ATOM: { | 401 | case OpCode::Id::ATOM: { |
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index d4f95b18c..694b325e1 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp | |||
| @@ -109,6 +109,27 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 109 | return Operation(OperationCode::WorkGroupIdY); | 109 | return Operation(OperationCode::WorkGroupIdY); |
| 110 | case SystemVariable::CtaIdZ: | 110 | case SystemVariable::CtaIdZ: |
| 111 | return Operation(OperationCode::WorkGroupIdZ); | 111 | return Operation(OperationCode::WorkGroupIdZ); |
| 112 | case SystemVariable::EqMask: | ||
| 113 | case SystemVariable::LtMask: | ||
| 114 | case SystemVariable::LeMask: | ||
| 115 | case SystemVariable::GtMask: | ||
| 116 | case SystemVariable::GeMask: | ||
| 117 | uses_warps = true; | ||
| 118 | switch (instr.sys20) { | ||
| 119 | case SystemVariable::EqMask: | ||
| 120 | return Operation(OperationCode::ThreadEqMask); | ||
| 121 | case SystemVariable::LtMask: | ||
| 122 | return Operation(OperationCode::ThreadLtMask); | ||
| 123 | case SystemVariable::LeMask: | ||
| 124 | return Operation(OperationCode::ThreadLeMask); | ||
| 125 | case SystemVariable::GtMask: | ||
| 126 | return Operation(OperationCode::ThreadGtMask); | ||
| 127 | case SystemVariable::GeMask: | ||
| 128 | return Operation(OperationCode::ThreadGeMask); | ||
| 129 | default: | ||
| 130 | UNREACHABLE(); | ||
| 131 | return Immediate(0u); | ||
| 132 | } | ||
| 112 | default: | 133 | default: |
| 113 | UNIMPLEMENTED_MSG("Unhandled system move: {}", | 134 | UNIMPLEMENTED_MSG("Unhandled system move: {}", |
| 114 | static_cast<u32>(instr.sys20.Value())); | 135 | static_cast<u32>(instr.sys20.Value())); |
| @@ -272,6 +293,11 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 272 | SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8)); | 293 | SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8)); |
| 273 | break; | 294 | break; |
| 274 | } | 295 | } |
| 296 | case OpCode::Id::BAR: { | ||
| 297 | UNIMPLEMENTED_IF_MSG(instr.value != 0xF0A81B8000070000ULL, "BAR is not BAR.SYNC 0x0"); | ||
| 298 | bb.push_back(Operation(OperationCode::Barrier)); | ||
| 299 | break; | ||
| 300 | } | ||
| 275 | case OpCode::Id::MEMBAR: { | 301 | case OpCode::Id::MEMBAR: { |
| 276 | UNIMPLEMENTED_IF(instr.membar.type != Tegra::Shader::MembarType::GL); | 302 | UNIMPLEMENTED_IF(instr.membar.type != Tegra::Shader::MembarType::GL); |
| 277 | UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default); | 303 | UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default); |
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index f75b62240..c06512413 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h | |||
| @@ -226,8 +226,14 @@ enum class OperationCode { | |||
| 226 | VoteEqual, /// (bool) -> bool | 226 | VoteEqual, /// (bool) -> bool |
| 227 | 227 | ||
| 228 | ThreadId, /// () -> uint | 228 | ThreadId, /// () -> uint |
| 229 | ThreadEqMask, /// () -> uint | ||
| 230 | ThreadGeMask, /// () -> uint | ||
| 231 | ThreadGtMask, /// () -> uint | ||
| 232 | ThreadLeMask, /// () -> uint | ||
| 233 | ThreadLtMask, /// () -> uint | ||
| 229 | ShuffleIndexed, /// (uint value, uint index) -> uint | 234 | ShuffleIndexed, /// (uint value, uint index) -> uint |
| 230 | 235 | ||
| 236 | Barrier, /// () -> void | ||
| 231 | MemoryBarrierGL, /// () -> void | 237 | MemoryBarrierGL, /// () -> void |
| 232 | 238 | ||
| 233 | Amount, | 239 | Amount, |
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 27775701d..b08b87426 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp | |||
| @@ -643,6 +643,8 @@ void Config::ReadRendererValues() { | |||
| 643 | Settings::values.use_asynchronous_gpu_emulation = | 643 | Settings::values.use_asynchronous_gpu_emulation = |
| 644 | ReadSetting(QStringLiteral("use_asynchronous_gpu_emulation"), false).toBool(); | 644 | ReadSetting(QStringLiteral("use_asynchronous_gpu_emulation"), false).toBool(); |
| 645 | Settings::values.use_vsync = ReadSetting(QStringLiteral("use_vsync"), true).toBool(); | 645 | Settings::values.use_vsync = ReadSetting(QStringLiteral("use_vsync"), true).toBool(); |
| 646 | Settings::values.use_assembly_shaders = | ||
| 647 | ReadSetting(QStringLiteral("use_assembly_shaders"), false).toBool(); | ||
| 646 | Settings::values.use_fast_gpu_time = | 648 | Settings::values.use_fast_gpu_time = |
| 647 | ReadSetting(QStringLiteral("use_fast_gpu_time"), true).toBool(); | 649 | ReadSetting(QStringLiteral("use_fast_gpu_time"), true).toBool(); |
| 648 | Settings::values.force_30fps_mode = | 650 | Settings::values.force_30fps_mode = |
| @@ -1090,6 +1092,8 @@ void Config::SaveRendererValues() { | |||
| 1090 | WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"), | 1092 | WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"), |
| 1091 | Settings::values.use_asynchronous_gpu_emulation, false); | 1093 | Settings::values.use_asynchronous_gpu_emulation, false); |
| 1092 | WriteSetting(QStringLiteral("use_vsync"), Settings::values.use_vsync, true); | 1094 | WriteSetting(QStringLiteral("use_vsync"), Settings::values.use_vsync, true); |
| 1095 | WriteSetting(QStringLiteral("use_assembly_shaders"), Settings::values.use_assembly_shaders, | ||
| 1096 | false); | ||
| 1093 | WriteSetting(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time, true); | 1097 | WriteSetting(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time, true); |
| 1094 | WriteSetting(QStringLiteral("force_30fps_mode"), Settings::values.force_30fps_mode, false); | 1098 | WriteSetting(QStringLiteral("force_30fps_mode"), Settings::values.force_30fps_mode, false); |
| 1095 | 1099 | ||
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp index 5bb2ae555..37aadf7f8 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.cpp +++ b/src/yuzu/configuration/configure_graphics_advanced.cpp | |||
| @@ -12,6 +12,9 @@ ConfigureGraphicsAdvanced::ConfigureGraphicsAdvanced(QWidget* parent) | |||
| 12 | 12 | ||
| 13 | ui->setupUi(this); | 13 | ui->setupUi(this); |
| 14 | 14 | ||
| 15 | // TODO: Remove this after assembly shaders are fully integrated | ||
| 16 | ui->use_assembly_shaders->setVisible(false); | ||
| 17 | |||
| 15 | SetConfiguration(); | 18 | SetConfiguration(); |
| 16 | } | 19 | } |
| 17 | 20 | ||
| @@ -22,6 +25,8 @@ void ConfigureGraphicsAdvanced::SetConfiguration() { | |||
| 22 | ui->gpu_accuracy->setCurrentIndex(static_cast<int>(Settings::values.gpu_accuracy)); | 25 | ui->gpu_accuracy->setCurrentIndex(static_cast<int>(Settings::values.gpu_accuracy)); |
| 23 | ui->use_vsync->setEnabled(runtime_lock); | 26 | ui->use_vsync->setEnabled(runtime_lock); |
| 24 | ui->use_vsync->setChecked(Settings::values.use_vsync); | 27 | ui->use_vsync->setChecked(Settings::values.use_vsync); |
| 28 | ui->use_assembly_shaders->setEnabled(runtime_lock); | ||
| 29 | ui->use_assembly_shaders->setChecked(Settings::values.use_assembly_shaders); | ||
| 25 | ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time); | 30 | ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time); |
| 26 | ui->force_30fps_mode->setEnabled(runtime_lock); | 31 | ui->force_30fps_mode->setEnabled(runtime_lock); |
| 27 | ui->force_30fps_mode->setChecked(Settings::values.force_30fps_mode); | 32 | ui->force_30fps_mode->setChecked(Settings::values.force_30fps_mode); |
| @@ -33,6 +38,7 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() { | |||
| 33 | auto gpu_accuracy = static_cast<Settings::GPUAccuracy>(ui->gpu_accuracy->currentIndex()); | 38 | auto gpu_accuracy = static_cast<Settings::GPUAccuracy>(ui->gpu_accuracy->currentIndex()); |
| 34 | Settings::values.gpu_accuracy = gpu_accuracy; | 39 | Settings::values.gpu_accuracy = gpu_accuracy; |
| 35 | Settings::values.use_vsync = ui->use_vsync->isChecked(); | 40 | Settings::values.use_vsync = ui->use_vsync->isChecked(); |
| 41 | Settings::values.use_assembly_shaders = ui->use_assembly_shaders->isChecked(); | ||
| 36 | Settings::values.use_fast_gpu_time = ui->use_fast_gpu_time->isChecked(); | 42 | Settings::values.use_fast_gpu_time = ui->use_fast_gpu_time->isChecked(); |
| 37 | Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked(); | 43 | Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked(); |
| 38 | Settings::values.max_anisotropy = ui->anisotropic_filtering_combobox->currentIndex(); | 44 | Settings::values.max_anisotropy = ui->anisotropic_filtering_combobox->currentIndex(); |
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index 770b80c50..0021607ac 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui | |||
| @@ -63,6 +63,16 @@ | |||
| 63 | </widget> | 63 | </widget> |
| 64 | </item> | 64 | </item> |
| 65 | <item> | 65 | <item> |
| 66 | <widget class="QCheckBox" name="use_assembly_shaders"> | ||
| 67 | <property name="toolTip"> | ||
| 68 | <string>Enabling this reduces shader stutter. Enables OpenGL assembly shaders on supported Nvidia devices (NV_gpu_program5 is required). This feature is experimental.</string> | ||
| 69 | </property> | ||
| 70 | <property name="text"> | ||
| 71 | <string>Use assembly shaders (experimental, Nvidia OpenGL only)</string> | ||
| 72 | </property> | ||
| 73 | </widget> | ||
| 74 | </item> | ||
| 75 | <item> | ||
| 66 | <widget class="QCheckBox" name="force_30fps_mode"> | 76 | <widget class="QCheckBox" name="force_30fps_mode"> |
| 67 | <property name="text"> | 77 | <property name="text"> |
| 68 | <string>Force 30 FPS mode</string> | 78 | <string>Force 30 FPS mode</string> |
diff --git a/src/yuzu/discord_impl.cpp b/src/yuzu/discord_impl.cpp index ea0079353..a93733b26 100644 --- a/src/yuzu/discord_impl.cpp +++ b/src/yuzu/discord_impl.cpp | |||
| @@ -18,7 +18,7 @@ DiscordImpl::DiscordImpl() { | |||
| 18 | 18 | ||
| 19 | // The number is the client ID for yuzu, it's used for images and the | 19 | // The number is the client ID for yuzu, it's used for images and the |
| 20 | // application name | 20 | // application name |
| 21 | Discord_Initialize("471872241299226636", &handlers, 1, nullptr); | 21 | Discord_Initialize("712465656758665259", &handlers, 1, nullptr); |
| 22 | } | 22 | } |
| 23 | 23 | ||
| 24 | DiscordImpl::~DiscordImpl() { | 24 | DiscordImpl::~DiscordImpl() { |
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 2348e6e0d..c20d48c42 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp | |||
| @@ -397,6 +397,8 @@ void Config::ReadValues() { | |||
| 397 | sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false); | 397 | sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false); |
| 398 | Settings::values.use_vsync = | 398 | Settings::values.use_vsync = |
| 399 | static_cast<u16>(sdl2_config->GetInteger("Renderer", "use_vsync", 1)); | 399 | static_cast<u16>(sdl2_config->GetInteger("Renderer", "use_vsync", 1)); |
| 400 | Settings::values.use_assembly_shaders = | ||
| 401 | sdl2_config->GetBoolean("Renderer", "use_assembly_shaders", false); | ||
| 400 | Settings::values.use_fast_gpu_time = | 402 | Settings::values.use_fast_gpu_time = |
| 401 | sdl2_config->GetBoolean("Renderer", "use_fast_gpu_time", true); | 403 | sdl2_config->GetBoolean("Renderer", "use_fast_gpu_time", true); |
| 402 | 404 | ||
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index ae94b51c4..abc6e6e65 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h | |||
| @@ -134,6 +134,10 @@ max_anisotropy = | |||
| 134 | # 0 (default): Off, 1: On | 134 | # 0 (default): Off, 1: On |
| 135 | use_vsync = | 135 | use_vsync = |
| 136 | 136 | ||
| 137 | # Whether to use OpenGL assembly shaders or not. NV_gpu_program5 is required. | ||
| 138 | # 0 (default): Off, 1: On | ||
| 139 | use_assembly_shaders = | ||
| 140 | |||
| 137 | # Turns on the frame limiter, which will limit frames output to the target game speed | 141 | # Turns on the frame limiter, which will limit frames output to the target game speed |
| 138 | # 0: Off, 1: On (default) | 142 | # 0: Off, 1: On (default) |
| 139 | use_frame_limit = | 143 | use_frame_limit = |