diff options
Diffstat (limited to 'src')
29 files changed, 514 insertions, 66 deletions
diff --git a/src/audio_core/stream.cpp b/src/audio_core/stream.cpp index ad6c587c2..5a30f55a7 100644 --- a/src/audio_core/stream.cpp +++ b/src/audio_core/stream.cpp | |||
| @@ -107,9 +107,12 @@ void Stream::PlayNextBuffer(std::chrono::nanoseconds ns_late) { | |||
| 107 | active_buffer = queued_buffers.front(); | 107 | active_buffer = queued_buffers.front(); |
| 108 | queued_buffers.pop(); | 108 | queued_buffers.pop(); |
| 109 | 109 | ||
| 110 | VolumeAdjustSamples(active_buffer->GetSamples(), game_volume); | 110 | auto& samples = active_buffer->GetSamples(); |
| 111 | 111 | ||
| 112 | sink_stream.EnqueueSamples(GetNumChannels(), active_buffer->GetSamples()); | 112 | VolumeAdjustSamples(samples, game_volume); |
| 113 | |||
| 114 | sink_stream.EnqueueSamples(GetNumChannels(), samples); | ||
| 115 | played_samples += samples.size(); | ||
| 113 | 116 | ||
| 114 | const auto buffer_release_ns = GetBufferReleaseNS(*active_buffer); | 117 | const auto buffer_release_ns = GetBufferReleaseNS(*active_buffer); |
| 115 | 118 | ||
diff --git a/src/audio_core/stream.h b/src/audio_core/stream.h index 559844b9b..dbd97ec9c 100644 --- a/src/audio_core/stream.h +++ b/src/audio_core/stream.h | |||
| @@ -89,6 +89,11 @@ public: | |||
| 89 | return sample_rate; | 89 | return sample_rate; |
| 90 | } | 90 | } |
| 91 | 91 | ||
| 92 | /// Gets the number of samples played so far | ||
| 93 | [[nodiscard]] u64 GetPlayedSampleCount() const { | ||
| 94 | return played_samples; | ||
| 95 | } | ||
| 96 | |||
| 92 | /// Gets the number of channels | 97 | /// Gets the number of channels |
| 93 | [[nodiscard]] u32 GetNumChannels() const; | 98 | [[nodiscard]] u32 GetNumChannels() const; |
| 94 | 99 | ||
| @@ -106,6 +111,7 @@ private: | |||
| 106 | [[nodiscard]] std::chrono::nanoseconds GetBufferReleaseNS(const Buffer& buffer) const; | 111 | [[nodiscard]] std::chrono::nanoseconds GetBufferReleaseNS(const Buffer& buffer) const; |
| 107 | 112 | ||
| 108 | u32 sample_rate; ///< Sample rate of the stream | 113 | u32 sample_rate; ///< Sample rate of the stream |
| 114 | u64 played_samples{}; ///< The current played sample count | ||
| 109 | Format format; ///< Format of the stream | 115 | Format format; ///< Format of the stream |
| 110 | float game_volume = 1.0f; ///< The volume the game currently has set | 116 | float game_volume = 1.0f; ///< The volume the game currently has set |
| 111 | ReleaseCallback release_callback; ///< Buffer release callback for the stream | 117 | ReleaseCallback release_callback; ///< Buffer release callback for the stream |
diff --git a/src/common/common_sizes.h b/src/common/common_sizes.h index 7e9fd968b..d07b7ee5a 100644 --- a/src/common/common_sizes.h +++ b/src/common/common_sizes.h | |||
| @@ -24,6 +24,7 @@ enum : u64 { | |||
| 24 | Size_128_MB = 128ULL * Size_1_MB, | 24 | Size_128_MB = 128ULL * Size_1_MB, |
| 25 | Size_448_MB = 448ULL * Size_1_MB, | 25 | Size_448_MB = 448ULL * Size_1_MB, |
| 26 | Size_507_MB = 507ULL * Size_1_MB, | 26 | Size_507_MB = 507ULL * Size_1_MB, |
| 27 | Size_512_MB = 512ULL * Size_1_MB, | ||
| 27 | Size_562_MB = 562ULL * Size_1_MB, | 28 | Size_562_MB = 562ULL * Size_1_MB, |
| 28 | Size_1554_MB = 1554ULL * Size_1_MB, | 29 | Size_1554_MB = 1554ULL * Size_1_MB, |
| 29 | Size_2048_MB = 2048ULL * Size_1_MB, | 30 | Size_2048_MB = 2048ULL * Size_1_MB, |
diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 6397308ec..e1bb4b7ff 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp | |||
| @@ -59,6 +59,7 @@ void LogSettings() { | |||
| 59 | log_setting("Renderer_UseVsync", values.use_vsync.GetValue()); | 59 | log_setting("Renderer_UseVsync", values.use_vsync.GetValue()); |
| 60 | log_setting("Renderer_UseAssemblyShaders", values.use_assembly_shaders.GetValue()); | 60 | log_setting("Renderer_UseAssemblyShaders", values.use_assembly_shaders.GetValue()); |
| 61 | log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue()); | 61 | log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue()); |
| 62 | log_setting("Renderer_UseGarbageCollection", values.use_caches_gc.GetValue()); | ||
| 62 | log_setting("Renderer_AnisotropicFilteringLevel", values.max_anisotropy.GetValue()); | 63 | log_setting("Renderer_AnisotropicFilteringLevel", values.max_anisotropy.GetValue()); |
| 63 | log_setting("Audio_OutputEngine", values.sink_id); | 64 | log_setting("Audio_OutputEngine", values.sink_id); |
| 64 | log_setting("Audio_EnableAudioStretching", values.enable_audio_stretching.GetValue()); | 65 | log_setting("Audio_EnableAudioStretching", values.enable_audio_stretching.GetValue()); |
| @@ -142,6 +143,7 @@ void RestoreGlobalState(bool is_powered_on) { | |||
| 142 | values.use_assembly_shaders.SetGlobal(true); | 143 | values.use_assembly_shaders.SetGlobal(true); |
| 143 | values.use_asynchronous_shaders.SetGlobal(true); | 144 | values.use_asynchronous_shaders.SetGlobal(true); |
| 144 | values.use_fast_gpu_time.SetGlobal(true); | 145 | values.use_fast_gpu_time.SetGlobal(true); |
| 146 | values.use_caches_gc.SetGlobal(true); | ||
| 145 | values.bg_red.SetGlobal(true); | 147 | values.bg_red.SetGlobal(true); |
| 146 | values.bg_green.SetGlobal(true); | 148 | values.bg_green.SetGlobal(true); |
| 147 | values.bg_blue.SetGlobal(true); | 149 | values.bg_blue.SetGlobal(true); |
diff --git a/src/common/settings.h b/src/common/settings.h index 85554eac4..82ec18e27 100644 --- a/src/common/settings.h +++ b/src/common/settings.h | |||
| @@ -154,6 +154,7 @@ struct Values { | |||
| 154 | Setting<bool> use_assembly_shaders; | 154 | Setting<bool> use_assembly_shaders; |
| 155 | Setting<bool> use_asynchronous_shaders; | 155 | Setting<bool> use_asynchronous_shaders; |
| 156 | Setting<bool> use_fast_gpu_time; | 156 | Setting<bool> use_fast_gpu_time; |
| 157 | Setting<bool> use_caches_gc; | ||
| 157 | 158 | ||
| 158 | Setting<float> bg_red; | 159 | Setting<float> bg_red; |
| 159 | Setting<float> bg_green; | 160 | Setting<float> bg_green; |
diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp index 804c6b10c..92d4510b1 100644 --- a/src/core/hle/service/audio/audout_u.cpp +++ b/src/core/hle/service/audio/audout_u.cpp | |||
| @@ -58,7 +58,7 @@ public: | |||
| 58 | {7, &IAudioOut::AppendAudioOutBufferImpl, "AppendAudioOutBufferAuto"}, | 58 | {7, &IAudioOut::AppendAudioOutBufferImpl, "AppendAudioOutBufferAuto"}, |
| 59 | {8, &IAudioOut::GetReleasedAudioOutBufferImpl, "GetReleasedAudioOutBufferAuto"}, | 59 | {8, &IAudioOut::GetReleasedAudioOutBufferImpl, "GetReleasedAudioOutBufferAuto"}, |
| 60 | {9, &IAudioOut::GetAudioOutBufferCount, "GetAudioOutBufferCount"}, | 60 | {9, &IAudioOut::GetAudioOutBufferCount, "GetAudioOutBufferCount"}, |
| 61 | {10, nullptr, "GetAudioOutPlayedSampleCount"}, | 61 | {10, &IAudioOut::GetAudioOutPlayedSampleCount, "GetAudioOutPlayedSampleCount"}, |
| 62 | {11, &IAudioOut::FlushAudioOutBuffers, "FlushAudioOutBuffers"}, | 62 | {11, &IAudioOut::FlushAudioOutBuffers, "FlushAudioOutBuffers"}, |
| 63 | {12, &IAudioOut::SetAudioOutVolume, "SetAudioOutVolume"}, | 63 | {12, &IAudioOut::SetAudioOutVolume, "SetAudioOutVolume"}, |
| 64 | {13, &IAudioOut::GetAudioOutVolume, "GetAudioOutVolume"}, | 64 | {13, &IAudioOut::GetAudioOutVolume, "GetAudioOutVolume"}, |
| @@ -186,6 +186,14 @@ private: | |||
| 186 | rb.Push(static_cast<u32>(stream->GetQueueSize())); | 186 | rb.Push(static_cast<u32>(stream->GetQueueSize())); |
| 187 | } | 187 | } |
| 188 | 188 | ||
| 189 | void GetAudioOutPlayedSampleCount(Kernel::HLERequestContext& ctx) { | ||
| 190 | LOG_DEBUG(Service_Audio, "called"); | ||
| 191 | |||
| 192 | IPC::ResponseBuilder rb{ctx, 4}; | ||
| 193 | rb.Push(ResultSuccess); | ||
| 194 | rb.Push(stream->GetPlayedSampleCount()); | ||
| 195 | } | ||
| 196 | |||
| 189 | void FlushAudioOutBuffers(Kernel::HLERequestContext& ctx) { | 197 | void FlushAudioOutBuffers(Kernel::HLERequestContext& ctx) { |
| 190 | LOG_DEBUG(Service_Audio, "called"); | 198 | LOG_DEBUG(Service_Audio, "called"); |
| 191 | 199 | ||
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h index a39505903..b121d36a3 100644 --- a/src/video_core/buffer_cache/buffer_base.h +++ b/src/video_core/buffer_cache/buffer_base.h | |||
| @@ -256,6 +256,16 @@ public: | |||
| 256 | stream_score += score; | 256 | stream_score += score; |
| 257 | } | 257 | } |
| 258 | 258 | ||
| 259 | /// Sets the new frame tick | ||
| 260 | void SetFrameTick(u64 new_frame_tick) noexcept { | ||
| 261 | frame_tick = new_frame_tick; | ||
| 262 | } | ||
| 263 | |||
| 264 | /// Returns the new frame tick | ||
| 265 | [[nodiscard]] u64 FrameTick() const noexcept { | ||
| 266 | return frame_tick; | ||
| 267 | } | ||
| 268 | |||
| 259 | /// Returns the likeliness of this being a stream buffer | 269 | /// Returns the likeliness of this being a stream buffer |
| 260 | [[nodiscard]] int StreamScore() const noexcept { | 270 | [[nodiscard]] int StreamScore() const noexcept { |
| 261 | return stream_score; | 271 | return stream_score; |
| @@ -586,6 +596,7 @@ private: | |||
| 586 | RasterizerInterface* rasterizer = nullptr; | 596 | RasterizerInterface* rasterizer = nullptr; |
| 587 | VAddr cpu_addr = 0; | 597 | VAddr cpu_addr = 0; |
| 588 | Words words; | 598 | Words words; |
| 599 | u64 frame_tick = 0; | ||
| 589 | BufferFlagBits flags{}; | 600 | BufferFlagBits flags{}; |
| 590 | int stream_score = 0; | 601 | int stream_score = 0; |
| 591 | }; | 602 | }; |
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index d371b842f..6d04d00da 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | 16 | ||
| 17 | #include <boost/container/small_vector.hpp> | 17 | #include <boost/container/small_vector.hpp> |
| 18 | 18 | ||
| 19 | #include "common/common_sizes.h" | ||
| 19 | #include "common/common_types.h" | 20 | #include "common/common_types.h" |
| 20 | #include "common/div_ceil.h" | 21 | #include "common/div_ceil.h" |
| 21 | #include "common/microprofile.h" | 22 | #include "common/microprofile.h" |
| @@ -65,6 +66,9 @@ class BufferCache { | |||
| 65 | 66 | ||
| 66 | static constexpr BufferId NULL_BUFFER_ID{0}; | 67 | static constexpr BufferId NULL_BUFFER_ID{0}; |
| 67 | 68 | ||
| 69 | static constexpr u64 EXPECTED_MEMORY = Common::Size_512_MB; | ||
| 70 | static constexpr u64 CRITICAL_MEMORY = Common::Size_1_GB; | ||
| 71 | |||
| 68 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 72 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 69 | 73 | ||
| 70 | using Runtime = typename P::Runtime; | 74 | using Runtime = typename P::Runtime; |
| @@ -102,6 +106,8 @@ public: | |||
| 102 | 106 | ||
| 103 | void TickFrame(); | 107 | void TickFrame(); |
| 104 | 108 | ||
| 109 | void RunGarbageCollector(); | ||
| 110 | |||
| 105 | void WriteMemory(VAddr cpu_addr, u64 size); | 111 | void WriteMemory(VAddr cpu_addr, u64 size); |
| 106 | 112 | ||
| 107 | void CachedWriteMemory(VAddr cpu_addr, u64 size); | 113 | void CachedWriteMemory(VAddr cpu_addr, u64 size); |
| @@ -243,6 +249,8 @@ private: | |||
| 243 | template <bool insert> | 249 | template <bool insert> |
| 244 | void ChangeRegister(BufferId buffer_id); | 250 | void ChangeRegister(BufferId buffer_id); |
| 245 | 251 | ||
| 252 | void TouchBuffer(Buffer& buffer) const noexcept; | ||
| 253 | |||
| 246 | bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); | 254 | bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); |
| 247 | 255 | ||
| 248 | bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); | 256 | bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); |
| @@ -255,6 +263,10 @@ private: | |||
| 255 | 263 | ||
| 256 | void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies); | 264 | void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies); |
| 257 | 265 | ||
| 266 | void DownloadBufferMemory(Buffer& buffer_id); | ||
| 267 | |||
| 268 | void DownloadBufferMemory(Buffer& buffer_id, VAddr cpu_addr, u64 size); | ||
| 269 | |||
| 258 | void DeleteBuffer(BufferId buffer_id); | 270 | void DeleteBuffer(BufferId buffer_id); |
| 259 | 271 | ||
| 260 | void ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id); | 272 | void ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id); |
| @@ -319,6 +331,10 @@ private: | |||
| 319 | size_t immediate_buffer_capacity = 0; | 331 | size_t immediate_buffer_capacity = 0; |
| 320 | std::unique_ptr<u8[]> immediate_buffer_alloc; | 332 | std::unique_ptr<u8[]> immediate_buffer_alloc; |
| 321 | 333 | ||
| 334 | typename SlotVector<Buffer>::Iterator deletion_iterator; | ||
| 335 | u64 frame_tick = 0; | ||
| 336 | u64 total_used_memory = 0; | ||
| 337 | |||
| 322 | std::array<BufferId, ((1ULL << 39) >> PAGE_BITS)> page_table; | 338 | std::array<BufferId, ((1ULL << 39) >> PAGE_BITS)> page_table; |
| 323 | }; | 339 | }; |
| 324 | 340 | ||
| @@ -332,6 +348,28 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, | |||
| 332 | gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} { | 348 | gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} { |
| 333 | // Ensure the first slot is used for the null buffer | 349 | // Ensure the first slot is used for the null buffer |
| 334 | void(slot_buffers.insert(runtime, NullBufferParams{})); | 350 | void(slot_buffers.insert(runtime, NullBufferParams{})); |
| 351 | deletion_iterator = slot_buffers.end(); | ||
| 352 | } | ||
| 353 | |||
| 354 | template <class P> | ||
| 355 | void BufferCache<P>::RunGarbageCollector() { | ||
| 356 | const bool aggressive_gc = total_used_memory >= CRITICAL_MEMORY; | ||
| 357 | const u64 ticks_to_destroy = aggressive_gc ? 60 : 120; | ||
| 358 | int num_iterations = aggressive_gc ? 64 : 32; | ||
| 359 | for (; num_iterations > 0; --num_iterations) { | ||
| 360 | if (deletion_iterator == slot_buffers.end()) { | ||
| 361 | deletion_iterator = slot_buffers.begin(); | ||
| 362 | } | ||
| 363 | ++deletion_iterator; | ||
| 364 | if (deletion_iterator == slot_buffers.end()) { | ||
| 365 | break; | ||
| 366 | } | ||
| 367 | const auto [buffer_id, buffer] = *deletion_iterator; | ||
| 368 | if (buffer->FrameTick() + ticks_to_destroy < frame_tick) { | ||
| 369 | DownloadBufferMemory(*buffer); | ||
| 370 | DeleteBuffer(buffer_id); | ||
| 371 | } | ||
| 372 | } | ||
| 335 | } | 373 | } |
| 336 | 374 | ||
| 337 | template <class P> | 375 | template <class P> |
| @@ -349,6 +387,10 @@ void BufferCache<P>::TickFrame() { | |||
| 349 | const bool skip_preferred = hits * 256 < shots * 251; | 387 | const bool skip_preferred = hits * 256 < shots * 251; |
| 350 | uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; | 388 | uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; |
| 351 | 389 | ||
| 390 | if (Settings::values.use_caches_gc.GetValue() && total_used_memory >= EXPECTED_MEMORY) { | ||
| 391 | RunGarbageCollector(); | ||
| 392 | } | ||
| 393 | ++frame_tick; | ||
| 352 | delayed_destruction_ring.Tick(); | 394 | delayed_destruction_ring.Tick(); |
| 353 | } | 395 | } |
| 354 | 396 | ||
| @@ -371,50 +413,8 @@ void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) { | |||
| 371 | 413 | ||
| 372 | template <class P> | 414 | template <class P> |
| 373 | void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { | 415 | void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { |
| 374 | ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { | 416 | ForEachBufferInRange(cpu_addr, size, |
| 375 | boost::container::small_vector<BufferCopy, 1> copies; | 417 | [&](BufferId, Buffer& buffer) { DownloadBufferMemory(buffer); }); |
| 376 | u64 total_size_bytes = 0; | ||
| 377 | u64 largest_copy = 0; | ||
| 378 | buffer.ForEachDownloadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) { | ||
| 379 | copies.push_back(BufferCopy{ | ||
| 380 | .src_offset = range_offset, | ||
| 381 | .dst_offset = total_size_bytes, | ||
| 382 | .size = range_size, | ||
| 383 | }); | ||
| 384 | total_size_bytes += range_size; | ||
| 385 | largest_copy = std::max(largest_copy, range_size); | ||
| 386 | }); | ||
| 387 | if (total_size_bytes == 0) { | ||
| 388 | return; | ||
| 389 | } | ||
| 390 | MICROPROFILE_SCOPE(GPU_DownloadMemory); | ||
| 391 | |||
| 392 | if constexpr (USE_MEMORY_MAPS) { | ||
| 393 | auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); | ||
| 394 | const u8* const mapped_memory = download_staging.mapped_span.data(); | ||
| 395 | const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size()); | ||
| 396 | for (BufferCopy& copy : copies) { | ||
| 397 | // Modify copies to have the staging offset in mind | ||
| 398 | copy.dst_offset += download_staging.offset; | ||
| 399 | } | ||
| 400 | runtime.CopyBuffer(download_staging.buffer, buffer, copies_span); | ||
| 401 | runtime.Finish(); | ||
| 402 | for (const BufferCopy& copy : copies) { | ||
| 403 | const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; | ||
| 404 | // Undo the modified offset | ||
| 405 | const u64 dst_offset = copy.dst_offset - download_staging.offset; | ||
| 406 | const u8* copy_mapped_memory = mapped_memory + dst_offset; | ||
| 407 | cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size); | ||
| 408 | } | ||
| 409 | } else { | ||
| 410 | const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); | ||
| 411 | for (const BufferCopy& copy : copies) { | ||
| 412 | buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); | ||
| 413 | const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; | ||
| 414 | cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size); | ||
| 415 | } | ||
| 416 | } | ||
| 417 | }); | ||
| 418 | } | 418 | } |
| 419 | 419 | ||
| 420 | template <class P> | 420 | template <class P> |
| @@ -640,6 +640,7 @@ bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { | |||
| 640 | template <class P> | 640 | template <class P> |
| 641 | void BufferCache<P>::BindHostIndexBuffer() { | 641 | void BufferCache<P>::BindHostIndexBuffer() { |
| 642 | Buffer& buffer = slot_buffers[index_buffer.buffer_id]; | 642 | Buffer& buffer = slot_buffers[index_buffer.buffer_id]; |
| 643 | TouchBuffer(buffer); | ||
| 643 | const u32 offset = buffer.Offset(index_buffer.cpu_addr); | 644 | const u32 offset = buffer.Offset(index_buffer.cpu_addr); |
| 644 | const u32 size = index_buffer.size; | 645 | const u32 size = index_buffer.size; |
| 645 | SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); | 646 | SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); |
| @@ -658,6 +659,7 @@ void BufferCache<P>::BindHostVertexBuffers() { | |||
| 658 | for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { | 659 | for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { |
| 659 | const Binding& binding = vertex_buffers[index]; | 660 | const Binding& binding = vertex_buffers[index]; |
| 660 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 661 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 662 | TouchBuffer(buffer); | ||
| 661 | SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); | 663 | SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); |
| 662 | if (!flags[Dirty::VertexBuffer0 + index]) { | 664 | if (!flags[Dirty::VertexBuffer0 + index]) { |
| 663 | continue; | 665 | continue; |
| @@ -693,6 +695,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 | |||
| 693 | const VAddr cpu_addr = binding.cpu_addr; | 695 | const VAddr cpu_addr = binding.cpu_addr; |
| 694 | const u32 size = binding.size; | 696 | const u32 size = binding.size; |
| 695 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 697 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 698 | TouchBuffer(buffer); | ||
| 696 | const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && | 699 | const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && |
| 697 | size <= uniform_buffer_skip_cache_size && | 700 | size <= uniform_buffer_skip_cache_size && |
| 698 | !buffer.IsRegionGpuModified(cpu_addr, size); | 701 | !buffer.IsRegionGpuModified(cpu_addr, size); |
| @@ -744,6 +747,7 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) { | |||
| 744 | ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) { | 747 | ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) { |
| 745 | const Binding& binding = storage_buffers[stage][index]; | 748 | const Binding& binding = storage_buffers[stage][index]; |
| 746 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 749 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 750 | TouchBuffer(buffer); | ||
| 747 | const u32 size = binding.size; | 751 | const u32 size = binding.size; |
| 748 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 752 | SynchronizeBuffer(buffer, binding.cpu_addr, size); |
| 749 | 753 | ||
| @@ -766,6 +770,7 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() { | |||
| 766 | for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { | 770 | for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { |
| 767 | const Binding& binding = transform_feedback_buffers[index]; | 771 | const Binding& binding = transform_feedback_buffers[index]; |
| 768 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 772 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 773 | TouchBuffer(buffer); | ||
| 769 | const u32 size = binding.size; | 774 | const u32 size = binding.size; |
| 770 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 775 | SynchronizeBuffer(buffer, binding.cpu_addr, size); |
| 771 | 776 | ||
| @@ -784,6 +789,7 @@ void BufferCache<P>::BindHostComputeUniformBuffers() { | |||
| 784 | ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { | 789 | ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { |
| 785 | const Binding& binding = compute_uniform_buffers[index]; | 790 | const Binding& binding = compute_uniform_buffers[index]; |
| 786 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 791 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 792 | TouchBuffer(buffer); | ||
| 787 | const u32 size = binding.size; | 793 | const u32 size = binding.size; |
| 788 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 794 | SynchronizeBuffer(buffer, binding.cpu_addr, size); |
| 789 | 795 | ||
| @@ -803,6 +809,7 @@ void BufferCache<P>::BindHostComputeStorageBuffers() { | |||
| 803 | ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { | 809 | ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { |
| 804 | const Binding& binding = compute_storage_buffers[index]; | 810 | const Binding& binding = compute_storage_buffers[index]; |
| 805 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 811 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 812 | TouchBuffer(buffer); | ||
| 806 | const u32 size = binding.size; | 813 | const u32 size = binding.size; |
| 807 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 814 | SynchronizeBuffer(buffer, binding.cpu_addr, size); |
| 808 | 815 | ||
| @@ -1101,6 +1108,7 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) { | |||
| 1101 | const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); | 1108 | const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); |
| 1102 | const u32 size = static_cast<u32>(overlap.end - overlap.begin); | 1109 | const u32 size = static_cast<u32>(overlap.end - overlap.begin); |
| 1103 | const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); | 1110 | const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); |
| 1111 | TouchBuffer(slot_buffers[new_buffer_id]); | ||
| 1104 | for (const BufferId overlap_id : overlap.ids) { | 1112 | for (const BufferId overlap_id : overlap.ids) { |
| 1105 | JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); | 1113 | JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); |
| 1106 | } | 1114 | } |
| @@ -1122,8 +1130,14 @@ template <class P> | |||
| 1122 | template <bool insert> | 1130 | template <bool insert> |
| 1123 | void BufferCache<P>::ChangeRegister(BufferId buffer_id) { | 1131 | void BufferCache<P>::ChangeRegister(BufferId buffer_id) { |
| 1124 | const Buffer& buffer = slot_buffers[buffer_id]; | 1132 | const Buffer& buffer = slot_buffers[buffer_id]; |
| 1133 | const auto size = buffer.SizeBytes(); | ||
| 1134 | if (insert) { | ||
| 1135 | total_used_memory += Common::AlignUp(size, 1024); | ||
| 1136 | } else { | ||
| 1137 | total_used_memory -= Common::AlignUp(size, 1024); | ||
| 1138 | } | ||
| 1125 | const VAddr cpu_addr_begin = buffer.CpuAddr(); | 1139 | const VAddr cpu_addr_begin = buffer.CpuAddr(); |
| 1126 | const VAddr cpu_addr_end = cpu_addr_begin + buffer.SizeBytes(); | 1140 | const VAddr cpu_addr_end = cpu_addr_begin + size; |
| 1127 | const u64 page_begin = cpu_addr_begin / PAGE_SIZE; | 1141 | const u64 page_begin = cpu_addr_begin / PAGE_SIZE; |
| 1128 | const u64 page_end = Common::DivCeil(cpu_addr_end, PAGE_SIZE); | 1142 | const u64 page_end = Common::DivCeil(cpu_addr_end, PAGE_SIZE); |
| 1129 | for (u64 page = page_begin; page != page_end; ++page) { | 1143 | for (u64 page = page_begin; page != page_end; ++page) { |
| @@ -1136,6 +1150,11 @@ void BufferCache<P>::ChangeRegister(BufferId buffer_id) { | |||
| 1136 | } | 1150 | } |
| 1137 | 1151 | ||
| 1138 | template <class P> | 1152 | template <class P> |
| 1153 | void BufferCache<P>::TouchBuffer(Buffer& buffer) const noexcept { | ||
| 1154 | buffer.SetFrameTick(frame_tick); | ||
| 1155 | } | ||
| 1156 | |||
| 1157 | template <class P> | ||
| 1139 | bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { | 1158 | bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { |
| 1140 | if (buffer.CpuAddr() == 0) { | 1159 | if (buffer.CpuAddr() == 0) { |
| 1141 | return true; | 1160 | return true; |
| @@ -1212,6 +1231,57 @@ void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, | |||
| 1212 | } | 1231 | } |
| 1213 | 1232 | ||
| 1214 | template <class P> | 1233 | template <class P> |
| 1234 | void BufferCache<P>::DownloadBufferMemory(Buffer& buffer) { | ||
| 1235 | DownloadBufferMemory(buffer, buffer.CpuAddr(), buffer.SizeBytes()); | ||
| 1236 | } | ||
| 1237 | |||
| 1238 | template <class P> | ||
| 1239 | void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 size) { | ||
| 1240 | boost::container::small_vector<BufferCopy, 1> copies; | ||
| 1241 | u64 total_size_bytes = 0; | ||
| 1242 | u64 largest_copy = 0; | ||
| 1243 | buffer.ForEachDownloadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) { | ||
| 1244 | copies.push_back(BufferCopy{ | ||
| 1245 | .src_offset = range_offset, | ||
| 1246 | .dst_offset = total_size_bytes, | ||
| 1247 | .size = range_size, | ||
| 1248 | }); | ||
| 1249 | total_size_bytes += range_size; | ||
| 1250 | largest_copy = std::max(largest_copy, range_size); | ||
| 1251 | }); | ||
| 1252 | if (total_size_bytes == 0) { | ||
| 1253 | return; | ||
| 1254 | } | ||
| 1255 | MICROPROFILE_SCOPE(GPU_DownloadMemory); | ||
| 1256 | |||
| 1257 | if constexpr (USE_MEMORY_MAPS) { | ||
| 1258 | auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); | ||
| 1259 | const u8* const mapped_memory = download_staging.mapped_span.data(); | ||
| 1260 | const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size()); | ||
| 1261 | for (BufferCopy& copy : copies) { | ||
| 1262 | // Modify copies to have the staging offset in mind | ||
| 1263 | copy.dst_offset += download_staging.offset; | ||
| 1264 | } | ||
| 1265 | runtime.CopyBuffer(download_staging.buffer, buffer, copies_span); | ||
| 1266 | runtime.Finish(); | ||
| 1267 | for (const BufferCopy& copy : copies) { | ||
| 1268 | const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; | ||
| 1269 | // Undo the modified offset | ||
| 1270 | const u64 dst_offset = copy.dst_offset - download_staging.offset; | ||
| 1271 | const u8* copy_mapped_memory = mapped_memory + dst_offset; | ||
| 1272 | cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size); | ||
| 1273 | } | ||
| 1274 | } else { | ||
| 1275 | const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); | ||
| 1276 | for (const BufferCopy& copy : copies) { | ||
| 1277 | buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); | ||
| 1278 | const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; | ||
| 1279 | cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size); | ||
| 1280 | } | ||
| 1281 | } | ||
| 1282 | } | ||
| 1283 | |||
| 1284 | template <class P> | ||
| 1215 | void BufferCache<P>::DeleteBuffer(BufferId buffer_id) { | 1285 | void BufferCache<P>::DeleteBuffer(BufferId buffer_id) { |
| 1216 | const auto scalar_replace = [buffer_id](Binding& binding) { | 1286 | const auto scalar_replace = [buffer_id](Binding& binding) { |
| 1217 | if (binding.buffer_id == buffer_id) { | 1287 | if (binding.buffer_id == buffer_id) { |
| @@ -1236,6 +1306,7 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id) { | |||
| 1236 | 1306 | ||
| 1237 | Unregister(buffer_id); | 1307 | Unregister(buffer_id); |
| 1238 | delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id])); | 1308 | delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id])); |
| 1309 | slot_buffers.erase(buffer_id); | ||
| 1239 | 1310 | ||
| 1240 | NotifyBufferDeletion(); | 1311 | NotifyBufferDeletion(); |
| 1241 | } | 1312 | } |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index ffed42a29..335383955 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -242,6 +242,7 @@ public: | |||
| 242 | return 4; | 242 | return 4; |
| 243 | default: | 243 | default: |
| 244 | UNREACHABLE(); | 244 | UNREACHABLE(); |
| 245 | return 1; | ||
| 245 | } | 246 | } |
| 246 | } | 247 | } |
| 247 | 248 | ||
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 9b4038615..23948feed 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -737,6 +737,8 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, | |||
| 737 | } | 737 | } |
| 738 | } | 738 | } |
| 739 | 739 | ||
| 740 | Image::~Image() = default; | ||
| 741 | |||
| 740 | void Image::UploadMemory(const ImageBufferMap& map, | 742 | void Image::UploadMemory(const ImageBufferMap& map, |
| 741 | std::span<const VideoCommon::BufferImageCopy> copies) { | 743 | std::span<const VideoCommon::BufferImageCopy> copies) { |
| 742 | glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer); | 744 | glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer); |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index df8be12ff..25fe61566 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h | |||
| @@ -143,6 +143,14 @@ public: | |||
| 143 | explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, | 143 | explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, |
| 144 | VAddr cpu_addr); | 144 | VAddr cpu_addr); |
| 145 | 145 | ||
| 146 | ~Image(); | ||
| 147 | |||
| 148 | Image(const Image&) = delete; | ||
| 149 | Image& operator=(const Image&) = delete; | ||
| 150 | |||
| 151 | Image(Image&&) = default; | ||
| 152 | Image& operator=(Image&&) = default; | ||
| 153 | |||
| 146 | void UploadMemory(const ImageBufferMap& map, | 154 | void UploadMemory(const ImageBufferMap& map, |
| 147 | std::span<const VideoCommon::BufferImageCopy> copies); | 155 | std::span<const VideoCommon::BufferImageCopy> copies); |
| 148 | 156 | ||
| @@ -235,6 +243,7 @@ struct TextureCacheParams { | |||
| 235 | static constexpr bool ENABLE_VALIDATION = true; | 243 | static constexpr bool ENABLE_VALIDATION = true; |
| 236 | static constexpr bool FRAMEBUFFER_BLITS = true; | 244 | static constexpr bool FRAMEBUFFER_BLITS = true; |
| 237 | static constexpr bool HAS_EMULATED_COPIES = true; | 245 | static constexpr bool HAS_EMULATED_COPIES = true; |
| 246 | static constexpr bool HAS_DEVICE_MEMORY_INFO = false; | ||
| 238 | 247 | ||
| 239 | using Runtime = OpenGL::TextureCacheRuntime; | 248 | using Runtime = OpenGL::TextureCacheRuntime; |
| 240 | using Image = OpenGL::Image; | 249 | using Image = OpenGL::Image; |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 52860b4cf..a2ab4d1ee 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp | |||
| @@ -818,6 +818,10 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src, | |||
| 818 | }); | 818 | }); |
| 819 | } | 819 | } |
| 820 | 820 | ||
| 821 | u64 TextureCacheRuntime::GetDeviceLocalMemory() const { | ||
| 822 | return device.GetDeviceLocalMemory(); | ||
| 823 | } | ||
| 824 | |||
| 821 | Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_, | 825 | Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_, |
| 822 | VAddr cpu_addr_) | 826 | VAddr cpu_addr_) |
| 823 | : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler}, | 827 | : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler}, |
| @@ -876,6 +880,8 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_ | |||
| 876 | } | 880 | } |
| 877 | } | 881 | } |
| 878 | 882 | ||
| 883 | Image::~Image() = default; | ||
| 884 | |||
| 879 | void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { | 885 | void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { |
| 880 | // TODO: Move this to another API | 886 | // TODO: Move this to another API |
| 881 | scheduler->RequestOutsideRenderPassOperationContext(); | 887 | scheduler->RequestOutsideRenderPassOperationContext(); |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 4a57d378b..172bcdf98 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h | |||
| @@ -97,6 +97,8 @@ struct TextureCacheRuntime { | |||
| 97 | // All known Vulkan drivers can natively handle BGR textures | 97 | // All known Vulkan drivers can natively handle BGR textures |
| 98 | return true; | 98 | return true; |
| 99 | } | 99 | } |
| 100 | |||
| 101 | u64 GetDeviceLocalMemory() const; | ||
| 100 | }; | 102 | }; |
| 101 | 103 | ||
| 102 | class Image : public VideoCommon::ImageBase { | 104 | class Image : public VideoCommon::ImageBase { |
| @@ -104,6 +106,14 @@ public: | |||
| 104 | explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, | 106 | explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, |
| 105 | VAddr cpu_addr); | 107 | VAddr cpu_addr); |
| 106 | 108 | ||
| 109 | ~Image(); | ||
| 110 | |||
| 111 | Image(const Image&) = delete; | ||
| 112 | Image& operator=(const Image&) = delete; | ||
| 113 | |||
| 114 | Image(Image&&) = default; | ||
| 115 | Image& operator=(Image&&) = default; | ||
| 116 | |||
| 107 | void UploadMemory(const StagingBufferRef& map, | 117 | void UploadMemory(const StagingBufferRef& map, |
| 108 | std::span<const VideoCommon::BufferImageCopy> copies); | 118 | std::span<const VideoCommon::BufferImageCopy> copies); |
| 109 | 119 | ||
| @@ -257,6 +267,7 @@ struct TextureCacheParams { | |||
| 257 | static constexpr bool ENABLE_VALIDATION = true; | 267 | static constexpr bool ENABLE_VALIDATION = true; |
| 258 | static constexpr bool FRAMEBUFFER_BLITS = false; | 268 | static constexpr bool FRAMEBUFFER_BLITS = false; |
| 259 | static constexpr bool HAS_EMULATED_COPIES = false; | 269 | static constexpr bool HAS_EMULATED_COPIES = false; |
| 270 | static constexpr bool HAS_DEVICE_MEMORY_INFO = true; | ||
| 260 | 271 | ||
| 261 | using Runtime = Vulkan::TextureCacheRuntime; | 272 | using Runtime = Vulkan::TextureCacheRuntime; |
| 262 | using Image = Vulkan::Image; | 273 | using Image = Vulkan::Image; |
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 6308aef94..eb1746265 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp | |||
| @@ -283,4 +283,11 @@ std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) { | |||
| 283 | return {DefaultBlockWidth(format), DefaultBlockHeight(format)}; | 283 | return {DefaultBlockWidth(format), DefaultBlockHeight(format)}; |
| 284 | } | 284 | } |
| 285 | 285 | ||
| 286 | u64 EstimatedDecompressedSize(u64 base_size, PixelFormat format) { | ||
| 287 | constexpr u64 RGBA8_PIXEL_SIZE = 4; | ||
| 288 | const u64 base_block_size = static_cast<u64>(DefaultBlockWidth(format)) * | ||
| 289 | static_cast<u64>(DefaultBlockHeight(format)) * RGBA8_PIXEL_SIZE; | ||
| 290 | return (base_size * base_block_size) / BytesPerBlock(format); | ||
| 291 | } | ||
| 292 | |||
| 286 | } // namespace VideoCore::Surface | 293 | } // namespace VideoCore::Surface |
diff --git a/src/video_core/surface.h b/src/video_core/surface.h index c40ab89d0..1503db81f 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h | |||
| @@ -462,4 +462,6 @@ bool IsPixelFormatSRGB(PixelFormat format); | |||
| 462 | 462 | ||
| 463 | std::pair<u32, u32> GetASTCBlockSize(PixelFormat format); | 463 | std::pair<u32, u32> GetASTCBlockSize(PixelFormat format); |
| 464 | 464 | ||
| 465 | u64 EstimatedDecompressedSize(u64 base_size, PixelFormat format); | ||
| 466 | |||
| 465 | } // namespace VideoCore::Surface | 467 | } // namespace VideoCore::Surface |
diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp index 9914926b3..ad69d32d1 100644 --- a/src/video_core/texture_cache/image_base.cpp +++ b/src/video_core/texture_cache/image_base.cpp | |||
| @@ -113,6 +113,43 @@ void ImageBase::InsertView(const ImageViewInfo& view_info, ImageViewId image_vie | |||
| 113 | image_view_ids.push_back(image_view_id); | 113 | image_view_ids.push_back(image_view_id); |
| 114 | } | 114 | } |
| 115 | 115 | ||
| 116 | bool ImageBase::IsSafeDownload() const noexcept { | ||
| 117 | // Skip images that were not modified from the GPU | ||
| 118 | if (False(flags & ImageFlagBits::GpuModified)) { | ||
| 119 | return false; | ||
| 120 | } | ||
| 121 | // Skip images that .are. modified from the CPU | ||
| 122 | // We don't want to write sensitive data from the guest | ||
| 123 | if (True(flags & ImageFlagBits::CpuModified)) { | ||
| 124 | return false; | ||
| 125 | } | ||
| 126 | if (info.num_samples > 1) { | ||
| 127 | LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented"); | ||
| 128 | return false; | ||
| 129 | } | ||
| 130 | return true; | ||
| 131 | } | ||
| 132 | |||
| 133 | void ImageBase::CheckBadOverlapState() { | ||
| 134 | if (False(flags & ImageFlagBits::BadOverlap)) { | ||
| 135 | return; | ||
| 136 | } | ||
| 137 | if (!overlapping_images.empty()) { | ||
| 138 | return; | ||
| 139 | } | ||
| 140 | flags &= ~ImageFlagBits::BadOverlap; | ||
| 141 | } | ||
| 142 | |||
| 143 | void ImageBase::CheckAliasState() { | ||
| 144 | if (False(flags & ImageFlagBits::Alias)) { | ||
| 145 | return; | ||
| 146 | } | ||
| 147 | if (!aliased_images.empty()) { | ||
| 148 | return; | ||
| 149 | } | ||
| 150 | flags &= ~ImageFlagBits::Alias; | ||
| 151 | } | ||
| 152 | |||
| 116 | void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) { | 153 | void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) { |
| 117 | static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format; | 154 | static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format; |
| 118 | ASSERT(lhs.info.type == rhs.info.type); | 155 | ASSERT(lhs.info.type == rhs.info.type); |
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index b7f3b7e43..e326cab71 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h | |||
| @@ -25,6 +25,12 @@ enum class ImageFlagBits : u32 { | |||
| 25 | Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted | 25 | Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted |
| 26 | Registered = 1 << 6, ///< True when the image is registered | 26 | Registered = 1 << 6, ///< True when the image is registered |
| 27 | Picked = 1 << 7, ///< Temporary flag to mark the image as picked | 27 | Picked = 1 << 7, ///< Temporary flag to mark the image as picked |
| 28 | |||
| 29 | // Garbage Collection Flags | ||
| 30 | BadOverlap = 1 << 8, ///< This image overlaps other but doesn't fit, has higher | ||
| 31 | ///< garbage collection priority | ||
| 32 | Alias = 1 << 9, ///< This image has aliases and has priority on garbage | ||
| 33 | ///< collection | ||
| 28 | }; | 34 | }; |
| 29 | DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) | 35 | DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) |
| 30 | 36 | ||
| @@ -44,11 +50,16 @@ struct ImageBase { | |||
| 44 | 50 | ||
| 45 | void InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id); | 51 | void InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id); |
| 46 | 52 | ||
| 53 | [[nodiscard]] bool IsSafeDownload() const noexcept; | ||
| 54 | |||
| 47 | [[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept { | 55 | [[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept { |
| 48 | const VAddr overlap_end = overlap_cpu_addr + overlap_size; | 56 | const VAddr overlap_end = overlap_cpu_addr + overlap_size; |
| 49 | return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end; | 57 | return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end; |
| 50 | } | 58 | } |
| 51 | 59 | ||
| 60 | void CheckBadOverlapState(); | ||
| 61 | void CheckAliasState(); | ||
| 62 | |||
| 52 | ImageInfo info; | 63 | ImageInfo info; |
| 53 | 64 | ||
| 54 | u32 guest_size_bytes = 0; | 65 | u32 guest_size_bytes = 0; |
| @@ -72,6 +83,7 @@ struct ImageBase { | |||
| 72 | std::vector<SubresourceBase> slice_subresources; | 83 | std::vector<SubresourceBase> slice_subresources; |
| 73 | 84 | ||
| 74 | std::vector<AliasedImage> aliased_images; | 85 | std::vector<AliasedImage> aliased_images; |
| 86 | std::vector<ImageId> overlapping_images; | ||
| 75 | }; | 87 | }; |
| 76 | 88 | ||
| 77 | struct ImageAllocBase { | 89 | struct ImageAllocBase { |
diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h index eae3be6ea..6180b8c0e 100644 --- a/src/video_core/texture_cache/slot_vector.h +++ b/src/video_core/texture_cache/slot_vector.h | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <bit> | ||
| 8 | #include <concepts> | 9 | #include <concepts> |
| 9 | #include <numeric> | 10 | #include <numeric> |
| 10 | #include <type_traits> | 11 | #include <type_traits> |
| @@ -32,6 +33,60 @@ template <class T> | |||
| 32 | requires std::is_nothrow_move_assignable_v<T>&& | 33 | requires std::is_nothrow_move_assignable_v<T>&& |
| 33 | std::is_nothrow_move_constructible_v<T> class SlotVector { | 34 | std::is_nothrow_move_constructible_v<T> class SlotVector { |
| 34 | public: | 35 | public: |
| 36 | class Iterator { | ||
| 37 | friend SlotVector<T>; | ||
| 38 | |||
| 39 | public: | ||
| 40 | constexpr Iterator() = default; | ||
| 41 | |||
| 42 | Iterator& operator++() noexcept { | ||
| 43 | const u64* const bitset = slot_vector->stored_bitset.data(); | ||
| 44 | const u32 size = static_cast<u32>(slot_vector->stored_bitset.size()) * 64; | ||
| 45 | if (id.index < size) { | ||
| 46 | do { | ||
| 47 | ++id.index; | ||
| 48 | } while (id.index < size && !IsValid(bitset)); | ||
| 49 | if (id.index == size) { | ||
| 50 | id.index = SlotId::INVALID_INDEX; | ||
| 51 | } | ||
| 52 | } | ||
| 53 | return *this; | ||
| 54 | } | ||
| 55 | |||
| 56 | Iterator operator++(int) noexcept { | ||
| 57 | const Iterator copy{*this}; | ||
| 58 | ++*this; | ||
| 59 | return copy; | ||
| 60 | } | ||
| 61 | |||
| 62 | bool operator==(const Iterator& other) const noexcept { | ||
| 63 | return id.index == other.id.index; | ||
| 64 | } | ||
| 65 | |||
| 66 | bool operator!=(const Iterator& other) const noexcept { | ||
| 67 | return id.index != other.id.index; | ||
| 68 | } | ||
| 69 | |||
| 70 | std::pair<SlotId, T*> operator*() const noexcept { | ||
| 71 | return {id, std::addressof((*slot_vector)[id])}; | ||
| 72 | } | ||
| 73 | |||
| 74 | T* operator->() const noexcept { | ||
| 75 | return std::addressof((*slot_vector)[id]); | ||
| 76 | } | ||
| 77 | |||
| 78 | private: | ||
| 79 | Iterator(SlotVector<T>* slot_vector_, SlotId id_) noexcept | ||
| 80 | : slot_vector{slot_vector_}, id{id_} {} | ||
| 81 | |||
| 82 | bool IsValid(const u64* bitset) const noexcept { | ||
| 83 | return ((bitset[id.index / 64] >> (id.index % 64)) & 1) != 0; | ||
| 84 | } | ||
| 85 | |||
| 86 | SlotVector<T>* slot_vector; | ||
| 87 | SlotId id; | ||
| 88 | }; | ||
| 89 | |||
| 35 | ~SlotVector() noexcept { | 90 | ~SlotVector() noexcept { |
| 36 | size_t index = 0; | 91 | size_t index = 0; |
| 37 | for (u64 bits : stored_bitset) { | 92 | for (u64 bits : stored_bitset) { |
| @@ -70,6 +125,20 @@ public: | |||
| 70 | ResetStorageBit(id.index); | 125 | ResetStorageBit(id.index); |
| 71 | } | 126 | } |
| 72 | 127 | ||
| 128 | [[nodiscard]] Iterator begin() noexcept { | ||
| 129 | const auto it = std::ranges::find_if(stored_bitset, [](u64 value) { return value != 0; }); | ||
| 130 | if (it == stored_bitset.end()) { | ||
| 131 | return end(); | ||
| 132 | } | ||
| 133 | const u32 word_index = static_cast<u32>(std::distance(it, stored_bitset.begin())); | ||
| 134 | const SlotId first_id{word_index * 64 + static_cast<u32>(std::countr_zero(*it))}; | ||
| 135 | return Iterator(this, first_id); | ||
| 136 | } | ||
| 137 | |||
| 138 | [[nodiscard]] Iterator end() noexcept { | ||
| 139 | return Iterator(this, SlotId{SlotId::INVALID_INDEX}); | ||
| 140 | } | ||
| 141 | |||
| 73 | private: | 142 | private: |
| 74 | struct NonTrivialDummy { | 143 | struct NonTrivialDummy { |
| 75 | NonTrivialDummy() noexcept {} | 144 | NonTrivialDummy() noexcept {} |
| @@ -140,7 +209,6 @@ private: | |||
| 140 | 209 | ||
| 141 | Entry* values = nullptr; | 210 | Entry* values = nullptr; |
| 142 | size_t values_capacity = 0; | 211 | size_t values_capacity = 0; |
| 143 | size_t values_size = 0; | ||
| 144 | 212 | ||
| 145 | std::vector<u64> stored_bitset; | 213 | std::vector<u64> stored_bitset; |
| 146 | std::vector<u32> free_list; | 214 | std::vector<u32> free_list; |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 59b7c678b..e7f8478b4 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -20,8 +20,10 @@ | |||
| 20 | 20 | ||
| 21 | #include "common/alignment.h" | 21 | #include "common/alignment.h" |
| 22 | #include "common/common_funcs.h" | 22 | #include "common/common_funcs.h" |
| 23 | #include "common/common_sizes.h" | ||
| 23 | #include "common/common_types.h" | 24 | #include "common/common_types.h" |
| 24 | #include "common/logging/log.h" | 25 | #include "common/logging/log.h" |
| 26 | #include "common/settings.h" | ||
| 25 | #include "video_core/compatible_formats.h" | 27 | #include "video_core/compatible_formats.h" |
| 26 | #include "video_core/delayed_destruction_ring.h" | 28 | #include "video_core/delayed_destruction_ring.h" |
| 27 | #include "video_core/dirty_flags.h" | 29 | #include "video_core/dirty_flags.h" |
| @@ -69,12 +71,17 @@ class TextureCache { | |||
| 69 | static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS; | 71 | static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS; |
| 70 | /// True when some copies have to be emulated | 72 | /// True when some copies have to be emulated |
| 71 | static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES; | 73 | static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES; |
| 74 | /// True when the API can provide info about the memory of the device. | ||
| 75 | static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; | ||
| 72 | 76 | ||
| 73 | /// Image view ID for null descriptors | 77 | /// Image view ID for null descriptors |
| 74 | static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0}; | 78 | static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0}; |
| 75 | /// Sampler ID for bugged sampler ids | 79 | /// Sampler ID for bugged sampler ids |
| 76 | static constexpr SamplerId NULL_SAMPLER_ID{0}; | 80 | static constexpr SamplerId NULL_SAMPLER_ID{0}; |
| 77 | 81 | ||
| 82 | static constexpr u64 DEFAULT_EXPECTED_MEMORY = Common::Size_1_GB; | ||
| 83 | static constexpr u64 DEFAULT_CRITICAL_MEMORY = Common::Size_2_GB; | ||
| 84 | |||
| 78 | using Runtime = typename P::Runtime; | 85 | using Runtime = typename P::Runtime; |
| 79 | using Image = typename P::Image; | 86 | using Image = typename P::Image; |
| 80 | using ImageAlloc = typename P::ImageAlloc; | 87 | using ImageAlloc = typename P::ImageAlloc; |
| @@ -103,6 +110,9 @@ public: | |||
| 103 | /// Notify the cache that a new frame has been queued | 110 | /// Notify the cache that a new frame has been queued |
| 104 | void TickFrame(); | 111 | void TickFrame(); |
| 105 | 112 | ||
| 113 | /// Runs the Garbage Collector. | ||
| 114 | void RunGarbageCollector(); | ||
| 115 | |||
| 106 | /// Return a constant reference to the given image view id | 116 | /// Return a constant reference to the given image view id |
| 107 | [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept; | 117 | [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept; |
| 108 | 118 | ||
| @@ -333,6 +343,10 @@ private: | |||
| 333 | std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table; | 343 | std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table; |
| 334 | 344 | ||
| 335 | bool has_deleted_images = false; | 345 | bool has_deleted_images = false; |
| 346 | u64 total_used_memory = 0; | ||
| 347 | u64 minimum_memory; | ||
| 348 | u64 expected_memory; | ||
| 349 | u64 critical_memory; | ||
| 336 | 350 | ||
| 337 | SlotVector<Image> slot_images; | 351 | SlotVector<Image> slot_images; |
| 338 | SlotVector<ImageView> slot_image_views; | 352 | SlotVector<ImageView> slot_image_views; |
| @@ -353,6 +367,7 @@ private: | |||
| 353 | 367 | ||
| 354 | u64 modification_tick = 0; | 368 | u64 modification_tick = 0; |
| 355 | u64 frame_tick = 0; | 369 | u64 frame_tick = 0; |
| 370 | typename SlotVector<Image>::Iterator deletion_iterator; | ||
| 356 | }; | 371 | }; |
| 357 | 372 | ||
| 358 | template <class P> | 373 | template <class P> |
| @@ -373,11 +388,94 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& | |||
| 373 | // This way the null resource becomes a compile time constant | 388 | // This way the null resource becomes a compile time constant |
| 374 | void(slot_image_views.insert(runtime, NullImageParams{})); | 389 | void(slot_image_views.insert(runtime, NullImageParams{})); |
| 375 | void(slot_samplers.insert(runtime, sampler_descriptor)); | 390 | void(slot_samplers.insert(runtime, sampler_descriptor)); |
| 391 | |||
| 392 | deletion_iterator = slot_images.begin(); | ||
| 393 | |||
| 394 | if constexpr (HAS_DEVICE_MEMORY_INFO) { | ||
| 395 | const auto device_memory = runtime.GetDeviceLocalMemory(); | ||
| 396 | const u64 possible_expected_memory = (device_memory * 3) / 10; | ||
| 397 | const u64 possible_critical_memory = (device_memory * 6) / 10; | ||
| 398 | expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY); | ||
| 399 | critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY); | ||
| 400 | minimum_memory = 0; | ||
| 401 | } else { | ||
| 402 | // on OGL we can be more conservatives as the driver takes care. | ||
| 403 | expected_memory = DEFAULT_EXPECTED_MEMORY + Common::Size_512_MB; | ||
| 404 | critical_memory = DEFAULT_CRITICAL_MEMORY + Common::Size_1_GB; | ||
| 405 | minimum_memory = expected_memory; | ||
| 406 | } | ||
| 407 | } | ||
| 408 | |||
| 409 | template <class P> | ||
| 410 | void TextureCache<P>::RunGarbageCollector() { | ||
| 411 | const bool high_priority_mode = total_used_memory >= expected_memory; | ||
| 412 | const bool aggressive_mode = total_used_memory >= critical_memory; | ||
| 413 | const u64 ticks_to_destroy = high_priority_mode ? 60 : 100; | ||
| 414 | int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64); | ||
| 415 | for (; num_iterations > 0; --num_iterations) { | ||
| 416 | if (deletion_iterator == slot_images.end()) { | ||
| 417 | deletion_iterator = slot_images.begin(); | ||
| 418 | if (deletion_iterator == slot_images.end()) { | ||
| 419 | break; | ||
| 420 | } | ||
| 421 | } | ||
| 422 | auto [image_id, image_tmp] = *deletion_iterator; | ||
| 423 | Image* image = image_tmp; // fix clang error. | ||
| 424 | const bool is_alias = True(image->flags & ImageFlagBits::Alias); | ||
| 425 | const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap); | ||
| 426 | const bool must_download = image->IsSafeDownload(); | ||
| 427 | bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download); | ||
| 428 | const u64 ticks_needed = | ||
| 429 | is_bad_overlap | ||
| 430 | ? ticks_to_destroy >> 4 | ||
| 431 | : ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy); | ||
| 432 | should_care |= aggressive_mode; | ||
| 433 | if (should_care && image->frame_tick + ticks_needed < frame_tick) { | ||
| 434 | if (is_bad_overlap) { | ||
| 435 | const bool overlap_check = std::ranges::all_of( | ||
| 436 | image->overlapping_images, [&, image](const ImageId& overlap_id) { | ||
| 437 | auto& overlap = slot_images[overlap_id]; | ||
| 438 | return overlap.frame_tick >= image->frame_tick; | ||
| 439 | }); | ||
| 440 | if (!overlap_check) { | ||
| 441 | ++deletion_iterator; | ||
| 442 | continue; | ||
| 443 | } | ||
| 444 | } | ||
| 445 | if (!is_bad_overlap && must_download) { | ||
| 446 | const bool alias_check = std::ranges::none_of( | ||
| 447 | image->aliased_images, [&, image](const AliasedImage& alias) { | ||
| 448 | auto& alias_image = slot_images[alias.id]; | ||
| 449 | return (alias_image.frame_tick < image->frame_tick) || | ||
| 450 | (alias_image.modification_tick < image->modification_tick); | ||
| 451 | }); | ||
| 452 | |||
| 453 | if (alias_check) { | ||
| 454 | auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes); | ||
| 455 | const auto copies = FullDownloadCopies(image->info); | ||
| 456 | image->DownloadMemory(map, copies); | ||
| 457 | runtime.Finish(); | ||
| 458 | SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span); | ||
| 459 | } | ||
| 460 | } | ||
| 461 | if (True(image->flags & ImageFlagBits::Tracked)) { | ||
| 462 | UntrackImage(*image); | ||
| 463 | } | ||
| 464 | UnregisterImage(image_id); | ||
| 465 | DeleteImage(image_id); | ||
| 466 | if (is_bad_overlap) { | ||
| 467 | ++num_iterations; | ||
| 468 | } | ||
| 469 | } | ||
| 470 | ++deletion_iterator; | ||
| 471 | } | ||
| 376 | } | 472 | } |
| 377 | 473 | ||
| 378 | template <class P> | 474 | template <class P> |
| 379 | void TextureCache<P>::TickFrame() { | 475 | void TextureCache<P>::TickFrame() { |
| 380 | // Tick sentenced resources in this order to ensure they are destroyed in the right order | 476 | if (Settings::values.use_caches_gc.GetValue() && total_used_memory > minimum_memory) { |
| 477 | RunGarbageCollector(); | ||
| 478 | } | ||
| 381 | sentenced_images.Tick(); | 479 | sentenced_images.Tick(); |
| 382 | sentenced_framebuffers.Tick(); | 480 | sentenced_framebuffers.Tick(); |
| 383 | sentenced_image_view.Tick(); | 481 | sentenced_image_view.Tick(); |
| @@ -568,17 +666,7 @@ template <class P> | |||
| 568 | void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { | 666 | void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { |
| 569 | std::vector<ImageId> images; | 667 | std::vector<ImageId> images; |
| 570 | ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) { | 668 | ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) { |
| 571 | // Skip images that were not modified from the GPU | 669 | if (!image.IsSafeDownload()) { |
| 572 | if (False(image.flags & ImageFlagBits::GpuModified)) { | ||
| 573 | return; | ||
| 574 | } | ||
| 575 | // Skip images that .are. modified from the CPU | ||
| 576 | // We don't want to write sensitive data from the guest | ||
| 577 | if (True(image.flags & ImageFlagBits::CpuModified)) { | ||
| 578 | return; | ||
| 579 | } | ||
| 580 | if (image.info.num_samples > 1) { | ||
| 581 | LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented"); | ||
| 582 | return; | 670 | return; |
| 583 | } | 671 | } |
| 584 | image.flags &= ~ImageFlagBits::GpuModified; | 672 | image.flags &= ~ImageFlagBits::GpuModified; |
| @@ -967,6 +1055,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA | |||
| 967 | std::vector<ImageId> overlap_ids; | 1055 | std::vector<ImageId> overlap_ids; |
| 968 | std::vector<ImageId> left_aliased_ids; | 1056 | std::vector<ImageId> left_aliased_ids; |
| 969 | std::vector<ImageId> right_aliased_ids; | 1057 | std::vector<ImageId> right_aliased_ids; |
| 1058 | std::vector<ImageId> bad_overlap_ids; | ||
| 970 | ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) { | 1059 | ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) { |
| 971 | if (info.type != overlap.info.type) { | 1060 | if (info.type != overlap.info.type) { |
| 972 | return; | 1061 | return; |
| @@ -992,9 +1081,14 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA | |||
| 992 | const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); | 1081 | const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); |
| 993 | if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) { | 1082 | if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) { |
| 994 | left_aliased_ids.push_back(overlap_id); | 1083 | left_aliased_ids.push_back(overlap_id); |
| 1084 | overlap.flags |= ImageFlagBits::Alias; | ||
| 995 | } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, | 1085 | } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, |
| 996 | broken_views, native_bgr)) { | 1086 | broken_views, native_bgr)) { |
| 997 | right_aliased_ids.push_back(overlap_id); | 1087 | right_aliased_ids.push_back(overlap_id); |
| 1088 | overlap.flags |= ImageFlagBits::Alias; | ||
| 1089 | } else { | ||
| 1090 | bad_overlap_ids.push_back(overlap_id); | ||
| 1091 | overlap.flags |= ImageFlagBits::BadOverlap; | ||
| 998 | } | 1092 | } |
| 999 | }); | 1093 | }); |
| 1000 | const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); | 1094 | const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); |
| @@ -1022,10 +1116,18 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA | |||
| 1022 | for (const ImageId aliased_id : right_aliased_ids) { | 1116 | for (const ImageId aliased_id : right_aliased_ids) { |
| 1023 | ImageBase& aliased = slot_images[aliased_id]; | 1117 | ImageBase& aliased = slot_images[aliased_id]; |
| 1024 | AddImageAlias(new_image_base, aliased, new_image_id, aliased_id); | 1118 | AddImageAlias(new_image_base, aliased, new_image_id, aliased_id); |
| 1119 | new_image.flags |= ImageFlagBits::Alias; | ||
| 1025 | } | 1120 | } |
| 1026 | for (const ImageId aliased_id : left_aliased_ids) { | 1121 | for (const ImageId aliased_id : left_aliased_ids) { |
| 1027 | ImageBase& aliased = slot_images[aliased_id]; | 1122 | ImageBase& aliased = slot_images[aliased_id]; |
| 1028 | AddImageAlias(aliased, new_image_base, aliased_id, new_image_id); | 1123 | AddImageAlias(aliased, new_image_base, aliased_id, new_image_id); |
| 1124 | new_image.flags |= ImageFlagBits::Alias; | ||
| 1125 | } | ||
| 1126 | for (const ImageId aliased_id : bad_overlap_ids) { | ||
| 1127 | ImageBase& aliased = slot_images[aliased_id]; | ||
| 1128 | aliased.overlapping_images.push_back(new_image_id); | ||
| 1129 | new_image.overlapping_images.push_back(aliased_id); | ||
| 1130 | new_image.flags |= ImageFlagBits::BadOverlap; | ||
| 1029 | } | 1131 | } |
| 1030 | RegisterImage(new_image_id); | 1132 | RegisterImage(new_image_id); |
| 1031 | return new_image_id; | 1133 | return new_image_id; |
| @@ -1195,6 +1297,13 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { | |||
| 1195 | image.flags |= ImageFlagBits::Registered; | 1297 | image.flags |= ImageFlagBits::Registered; |
| 1196 | ForEachPage(image.cpu_addr, image.guest_size_bytes, | 1298 | ForEachPage(image.cpu_addr, image.guest_size_bytes, |
| 1197 | [this, image_id](u64 page) { page_table[page].push_back(image_id); }); | 1299 | [this, image_id](u64 page) { page_table[page].push_back(image_id); }); |
| 1300 | u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); | ||
| 1301 | if ((IsPixelFormatASTC(image.info.format) && | ||
| 1302 | True(image.flags & ImageFlagBits::AcceleratedUpload)) || | ||
| 1303 | True(image.flags & ImageFlagBits::Converted)) { | ||
| 1304 | tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); | ||
| 1305 | } | ||
| 1306 | total_used_memory += Common::AlignUp(tentative_size, 1024); | ||
| 1198 | } | 1307 | } |
| 1199 | 1308 | ||
| 1200 | template <class P> | 1309 | template <class P> |
| @@ -1203,6 +1312,14 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) { | |||
| 1203 | ASSERT_MSG(True(image.flags & ImageFlagBits::Registered), | 1312 | ASSERT_MSG(True(image.flags & ImageFlagBits::Registered), |
| 1204 | "Trying to unregister an already registered image"); | 1313 | "Trying to unregister an already registered image"); |
| 1205 | image.flags &= ~ImageFlagBits::Registered; | 1314 | image.flags &= ~ImageFlagBits::Registered; |
| 1315 | image.flags &= ~ImageFlagBits::BadOverlap; | ||
| 1316 | u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); | ||
| 1317 | if ((IsPixelFormatASTC(image.info.format) && | ||
| 1318 | True(image.flags & ImageFlagBits::AcceleratedUpload)) || | ||
| 1319 | True(image.flags & ImageFlagBits::Converted)) { | ||
| 1320 | tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); | ||
| 1321 | } | ||
| 1322 | total_used_memory -= Common::AlignUp(tentative_size, 1024); | ||
| 1206 | ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { | 1323 | ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { |
| 1207 | const auto page_it = page_table.find(page); | 1324 | const auto page_it = page_table.find(page); |
| 1208 | if (page_it == page_table.end()) { | 1325 | if (page_it == page_table.end()) { |
| @@ -1276,9 +1393,19 @@ void TextureCache<P>::DeleteImage(ImageId image_id) { | |||
| 1276 | std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) { | 1393 | std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) { |
| 1277 | return other_alias.id == image_id; | 1394 | return other_alias.id == image_id; |
| 1278 | }); | 1395 | }); |
| 1396 | other_image.CheckAliasState(); | ||
| 1279 | ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}", | 1397 | ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}", |
| 1280 | num_removed_aliases); | 1398 | num_removed_aliases); |
| 1281 | } | 1399 | } |
| 1400 | for (const ImageId overlap_id : image.overlapping_images) { | ||
| 1401 | ImageBase& other_image = slot_images[overlap_id]; | ||
| 1402 | [[maybe_unused]] const size_t num_removed_overlaps = std::erase_if( | ||
| 1403 | other_image.overlapping_images, | ||
| 1404 | [image_id](const ImageId other_overlap_id) { return other_overlap_id == image_id; }); | ||
| 1405 | other_image.CheckBadOverlapState(); | ||
| 1406 | ASSERT_MSG(num_removed_overlaps == 1, "Invalid number of removed overlapps: {}", | ||
| 1407 | num_removed_overlaps); | ||
| 1408 | } | ||
| 1282 | for (const ImageViewId image_view_id : image_view_ids) { | 1409 | for (const ImageViewId image_view_id : image_view_ids) { |
| 1283 | sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); | 1410 | sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); |
| 1284 | slot_image_views.erase(image_view_id); | 1411 | slot_image_views.erase(image_view_id); |
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 6835fd747..4efe042b6 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp | |||
| @@ -581,6 +581,8 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr | |||
| 581 | 581 | ||
| 582 | for (s32 layer = 0; layer < info.resources.layers; ++layer) { | 582 | for (s32 layer = 0; layer < info.resources.layers; ++layer) { |
| 583 | const std::span<const u8> src = input.subspan(host_offset); | 583 | const std::span<const u8> src = input.subspan(host_offset); |
| 584 | gpu_memory.ReadBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes()); | ||
| 585 | |||
| 584 | SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, | 586 | SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, |
| 585 | num_tiles.depth, block.height, block.depth); | 587 | num_tiles.depth, block.height, block.depth); |
| 586 | 588 | ||
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 64206b3d2..707a8b8fb 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp | |||
| @@ -408,6 +408,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | |||
| 408 | } | 408 | } |
| 409 | logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld); | 409 | logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld); |
| 410 | 410 | ||
| 411 | CollectPhysicalMemoryInfo(); | ||
| 411 | CollectTelemetryParameters(); | 412 | CollectTelemetryParameters(); |
| 412 | CollectToolingInfo(); | 413 | CollectToolingInfo(); |
| 413 | 414 | ||
| @@ -818,6 +819,17 @@ void Device::CollectTelemetryParameters() { | |||
| 818 | } | 819 | } |
| 819 | } | 820 | } |
| 820 | 821 | ||
| 822 | void Device::CollectPhysicalMemoryInfo() { | ||
| 823 | const auto mem_properties = physical.GetMemoryProperties(); | ||
| 824 | const std::size_t num_properties = mem_properties.memoryHeapCount; | ||
| 825 | device_access_memory = 0; | ||
| 826 | for (std::size_t element = 0; element < num_properties; element++) { | ||
| 827 | if ((mem_properties.memoryHeaps[element].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) != 0) { | ||
| 828 | device_access_memory += mem_properties.memoryHeaps[element].size; | ||
| 829 | } | ||
| 830 | } | ||
| 831 | } | ||
| 832 | |||
| 821 | void Device::CollectToolingInfo() { | 833 | void Device::CollectToolingInfo() { |
| 822 | if (!ext_tooling_info) { | 834 | if (!ext_tooling_info) { |
| 823 | return; | 835 | return; |
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 67d70cd22..a1aba973b 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h | |||
| @@ -225,6 +225,10 @@ public: | |||
| 225 | return use_asynchronous_shaders; | 225 | return use_asynchronous_shaders; |
| 226 | } | 226 | } |
| 227 | 227 | ||
| 228 | u64 GetDeviceLocalMemory() const { | ||
| 229 | return device_access_memory; | ||
| 230 | } | ||
| 231 | |||
| 228 | private: | 232 | private: |
| 229 | /// Checks if the physical device is suitable. | 233 | /// Checks if the physical device is suitable. |
| 230 | void CheckSuitability(bool requires_swapchain) const; | 234 | void CheckSuitability(bool requires_swapchain) const; |
| @@ -244,6 +248,9 @@ private: | |||
| 244 | /// Collects information about attached tools. | 248 | /// Collects information about attached tools. |
| 245 | void CollectToolingInfo(); | 249 | void CollectToolingInfo(); |
| 246 | 250 | ||
| 251 | /// Collects information about the device's local memory. | ||
| 252 | void CollectPhysicalMemoryInfo(); | ||
| 253 | |||
| 247 | /// Returns a list of queue initialization descriptors. | 254 | /// Returns a list of queue initialization descriptors. |
| 248 | std::vector<VkDeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const; | 255 | std::vector<VkDeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const; |
| 249 | 256 | ||
| @@ -302,6 +309,8 @@ private: | |||
| 302 | 309 | ||
| 303 | /// Nsight Aftermath GPU crash tracker | 310 | /// Nsight Aftermath GPU crash tracker |
| 304 | std::unique_ptr<NsightAftermathTracker> nsight_aftermath_tracker; | 311 | std::unique_ptr<NsightAftermathTracker> nsight_aftermath_tracker; |
| 312 | |||
| 313 | u64 device_access_memory; | ||
| 305 | }; | 314 | }; |
| 306 | 315 | ||
| 307 | } // namespace Vulkan | 316 | } // namespace Vulkan |
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp index 5edd06ebc..aa173d19e 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp | |||
| @@ -69,10 +69,10 @@ constexpr VkExportMemoryAllocateInfo EXPORT_ALLOCATE_INFO{ | |||
| 69 | 69 | ||
| 70 | class MemoryAllocation { | 70 | class MemoryAllocation { |
| 71 | public: | 71 | public: |
| 72 | explicit MemoryAllocation(vk::DeviceMemory memory_, VkMemoryPropertyFlags properties, | 72 | explicit MemoryAllocation(MemoryAllocator* const allocator_, vk::DeviceMemory memory_, |
| 73 | u64 allocation_size_, u32 type) | 73 | VkMemoryPropertyFlags properties, u64 allocation_size_, u32 type) |
| 74 | : memory{std::move(memory_)}, allocation_size{allocation_size_}, property_flags{properties}, | 74 | : allocator{allocator_}, memory{std::move(memory_)}, allocation_size{allocation_size_}, |
| 75 | shifted_memory_type{1U << type} {} | 75 | property_flags{properties}, shifted_memory_type{1U << type} {} |
| 76 | 76 | ||
| 77 | #if defined(_WIN32) || defined(__unix__) | 77 | #if defined(_WIN32) || defined(__unix__) |
| 78 | ~MemoryAllocation() { | 78 | ~MemoryAllocation() { |
| @@ -106,6 +106,10 @@ public: | |||
| 106 | const auto it = std::ranges::find(commits, begin, &Range::begin); | 106 | const auto it = std::ranges::find(commits, begin, &Range::begin); |
| 107 | ASSERT_MSG(it != commits.end(), "Invalid commit"); | 107 | ASSERT_MSG(it != commits.end(), "Invalid commit"); |
| 108 | commits.erase(it); | 108 | commits.erase(it); |
| 109 | if (commits.empty()) { | ||
| 110 | // Do not call any code involving 'this' after this call, the object will be destroyed | ||
| 111 | allocator->ReleaseMemory(this); | ||
| 112 | } | ||
| 109 | } | 113 | } |
| 110 | 114 | ||
| 111 | [[nodiscard]] std::span<u8> Map() { | 115 | [[nodiscard]] std::span<u8> Map() { |
| @@ -171,6 +175,7 @@ private: | |||
| 171 | return candidate; | 175 | return candidate; |
| 172 | } | 176 | } |
| 173 | 177 | ||
| 178 | MemoryAllocator* const allocator; ///< Parent memory allocation. | ||
| 174 | const vk::DeviceMemory memory; ///< Vulkan memory allocation handler. | 179 | const vk::DeviceMemory memory; ///< Vulkan memory allocation handler. |
| 175 | const u64 allocation_size; ///< Size of this allocation. | 180 | const u64 allocation_size; ///< Size of this allocation. |
| 176 | const VkMemoryPropertyFlags property_flags; ///< Vulkan memory property flags. | 181 | const VkMemoryPropertyFlags property_flags; ///< Vulkan memory property flags. |
| @@ -275,10 +280,17 @@ bool MemoryAllocator::TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, | |||
| 275 | return false; | 280 | return false; |
| 276 | } | 281 | } |
| 277 | } | 282 | } |
| 278 | allocations.push_back(std::make_unique<MemoryAllocation>(std::move(memory), flags, size, type)); | 283 | allocations.push_back( |
| 284 | std::make_unique<MemoryAllocation>(this, std::move(memory), flags, size, type)); | ||
| 279 | return true; | 285 | return true; |
| 280 | } | 286 | } |
| 281 | 287 | ||
| 288 | void MemoryAllocator::ReleaseMemory(MemoryAllocation* alloc) { | ||
| 289 | const auto it = std::ranges::find(allocations, alloc, &std::unique_ptr<MemoryAllocation>::get); | ||
| 290 | ASSERT(it != allocations.end()); | ||
| 291 | allocations.erase(it); | ||
| 292 | } | ||
| 293 | |||
| 282 | std::optional<MemoryCommit> MemoryAllocator::TryCommit(const VkMemoryRequirements& requirements, | 294 | std::optional<MemoryCommit> MemoryAllocator::TryCommit(const VkMemoryRequirements& requirements, |
| 283 | VkMemoryPropertyFlags flags) { | 295 | VkMemoryPropertyFlags flags) { |
| 284 | for (auto& allocation : allocations) { | 296 | for (auto& allocation : allocations) { |
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.h b/src/video_core/vulkan_common/vulkan_memory_allocator.h index db12d02f4..b61e931e0 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.h +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.h | |||
| @@ -69,6 +69,8 @@ private: | |||
| 69 | /// Memory allocator container. | 69 | /// Memory allocator container. |
| 70 | /// Allocates and releases memory allocations on demand. | 70 | /// Allocates and releases memory allocations on demand. |
| 71 | class MemoryAllocator { | 71 | class MemoryAllocator { |
| 72 | friend MemoryAllocation; | ||
| 73 | |||
| 72 | public: | 74 | public: |
| 73 | /** | 75 | /** |
| 74 | * Construct memory allocator | 76 | * Construct memory allocator |
| @@ -104,6 +106,9 @@ private: | |||
| 104 | /// Tries to allocate a chunk of memory. | 106 | /// Tries to allocate a chunk of memory. |
| 105 | bool TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size); | 107 | bool TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size); |
| 106 | 108 | ||
| 109 | /// Releases a chunk of memory. | ||
| 110 | void ReleaseMemory(MemoryAllocation* alloc); | ||
| 111 | |||
| 107 | /// Tries to allocate a memory commit. | 112 | /// Tries to allocate a memory commit. |
| 108 | std::optional<MemoryCommit> TryCommit(const VkMemoryRequirements& requirements, | 113 | std::optional<MemoryCommit> TryCommit(const VkMemoryRequirements& requirements, |
| 109 | VkMemoryPropertyFlags flags); | 114 | VkMemoryPropertyFlags flags); |
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index a1e726dc7..62bafc453 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp | |||
| @@ -822,6 +822,7 @@ void Config::ReadRendererValues() { | |||
| 822 | QStringLiteral("use_asynchronous_shaders"), false); | 822 | QStringLiteral("use_asynchronous_shaders"), false); |
| 823 | ReadSettingGlobal(Settings::values.use_fast_gpu_time, QStringLiteral("use_fast_gpu_time"), | 823 | ReadSettingGlobal(Settings::values.use_fast_gpu_time, QStringLiteral("use_fast_gpu_time"), |
| 824 | true); | 824 | true); |
| 825 | ReadSettingGlobal(Settings::values.use_caches_gc, QStringLiteral("use_caches_gc"), false); | ||
| 825 | ReadSettingGlobal(Settings::values.bg_red, QStringLiteral("bg_red"), 0.0); | 826 | ReadSettingGlobal(Settings::values.bg_red, QStringLiteral("bg_red"), 0.0); |
| 826 | ReadSettingGlobal(Settings::values.bg_green, QStringLiteral("bg_green"), 0.0); | 827 | ReadSettingGlobal(Settings::values.bg_green, QStringLiteral("bg_green"), 0.0); |
| 827 | ReadSettingGlobal(Settings::values.bg_blue, QStringLiteral("bg_blue"), 0.0); | 828 | ReadSettingGlobal(Settings::values.bg_blue, QStringLiteral("bg_blue"), 0.0); |
| @@ -1410,6 +1411,7 @@ void Config::SaveRendererValues() { | |||
| 1410 | Settings::values.use_asynchronous_shaders, false); | 1411 | Settings::values.use_asynchronous_shaders, false); |
| 1411 | WriteSettingGlobal(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time, | 1412 | WriteSettingGlobal(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time, |
| 1412 | true); | 1413 | true); |
| 1414 | WriteSettingGlobal(QStringLiteral("use_caches_gc"), Settings::values.use_caches_gc, false); | ||
| 1413 | // Cast to double because Qt's written float values are not human-readable | 1415 | // Cast to double because Qt's written float values are not human-readable |
| 1414 | WriteSettingGlobal(QStringLiteral("bg_red"), Settings::values.bg_red, 0.0); | 1416 | WriteSettingGlobal(QStringLiteral("bg_red"), Settings::values.bg_red, 0.0); |
| 1415 | WriteSettingGlobal(QStringLiteral("bg_green"), Settings::values.bg_green, 0.0); | 1417 | WriteSettingGlobal(QStringLiteral("bg_green"), Settings::values.bg_green, 0.0); |
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp index 1af87e849..8d13c9857 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.cpp +++ b/src/yuzu/configuration/configure_graphics_advanced.cpp | |||
| @@ -31,6 +31,7 @@ void ConfigureGraphicsAdvanced::SetConfiguration() { | |||
| 31 | ui->disable_fps_limit->setChecked(Settings::values.disable_fps_limit.GetValue()); | 31 | ui->disable_fps_limit->setChecked(Settings::values.disable_fps_limit.GetValue()); |
| 32 | ui->use_assembly_shaders->setChecked(Settings::values.use_assembly_shaders.GetValue()); | 32 | ui->use_assembly_shaders->setChecked(Settings::values.use_assembly_shaders.GetValue()); |
| 33 | ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue()); | 33 | ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue()); |
| 34 | ui->use_caches_gc->setChecked(Settings::values.use_caches_gc.GetValue()); | ||
| 34 | ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue()); | 35 | ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue()); |
| 35 | 36 | ||
| 36 | if (Settings::IsConfiguringGlobal()) { | 37 | if (Settings::IsConfiguringGlobal()) { |
| @@ -65,6 +66,8 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() { | |||
| 65 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders, | 66 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders, |
| 66 | ui->use_asynchronous_shaders, | 67 | ui->use_asynchronous_shaders, |
| 67 | use_asynchronous_shaders); | 68 | use_asynchronous_shaders); |
| 69 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_caches_gc, ui->use_caches_gc, | ||
| 70 | use_caches_gc); | ||
| 68 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_fast_gpu_time, | 71 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_fast_gpu_time, |
| 69 | ui->use_fast_gpu_time, use_fast_gpu_time); | 72 | ui->use_fast_gpu_time, use_fast_gpu_time); |
| 70 | 73 | ||
| @@ -105,6 +108,7 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { | |||
| 105 | ui->use_asynchronous_shaders->setEnabled( | 108 | ui->use_asynchronous_shaders->setEnabled( |
| 106 | Settings::values.use_asynchronous_shaders.UsingGlobal()); | 109 | Settings::values.use_asynchronous_shaders.UsingGlobal()); |
| 107 | ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal()); | 110 | ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal()); |
| 111 | ui->use_caches_gc->setEnabled(Settings::values.use_caches_gc.UsingGlobal()); | ||
| 108 | ui->anisotropic_filtering_combobox->setEnabled( | 112 | ui->anisotropic_filtering_combobox->setEnabled( |
| 109 | Settings::values.max_anisotropy.UsingGlobal()); | 113 | Settings::values.max_anisotropy.UsingGlobal()); |
| 110 | 114 | ||
| @@ -121,6 +125,8 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { | |||
| 121 | use_asynchronous_shaders); | 125 | use_asynchronous_shaders); |
| 122 | ConfigurationShared::SetColoredTristate(ui->use_fast_gpu_time, | 126 | ConfigurationShared::SetColoredTristate(ui->use_fast_gpu_time, |
| 123 | Settings::values.use_fast_gpu_time, use_fast_gpu_time); | 127 | Settings::values.use_fast_gpu_time, use_fast_gpu_time); |
| 128 | ConfigurationShared::SetColoredTristate(ui->use_caches_gc, Settings::values.use_caches_gc, | ||
| 129 | use_caches_gc); | ||
| 124 | ConfigurationShared::SetColoredComboBox( | 130 | ConfigurationShared::SetColoredComboBox( |
| 125 | ui->gpu_accuracy, ui->label_gpu_accuracy, | 131 | ui->gpu_accuracy, ui->label_gpu_accuracy, |
| 126 | static_cast<int>(Settings::values.gpu_accuracy.GetValue(true))); | 132 | static_cast<int>(Settings::values.gpu_accuracy.GetValue(true))); |
diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h index c19c34851..6ac5f20ec 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.h +++ b/src/yuzu/configuration/configure_graphics_advanced.h | |||
| @@ -39,4 +39,5 @@ private: | |||
| 39 | ConfigurationShared::CheckState use_assembly_shaders; | 39 | ConfigurationShared::CheckState use_assembly_shaders; |
| 40 | ConfigurationShared::CheckState use_asynchronous_shaders; | 40 | ConfigurationShared::CheckState use_asynchronous_shaders; |
| 41 | ConfigurationShared::CheckState use_fast_gpu_time; | 41 | ConfigurationShared::CheckState use_fast_gpu_time; |
| 42 | ConfigurationShared::CheckState use_caches_gc; | ||
| 42 | }; | 43 | }; |
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index 824cb2fb2..18c43629e 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui | |||
| @@ -122,6 +122,16 @@ | |||
| 122 | </widget> | 122 | </widget> |
| 123 | </item> | 123 | </item> |
| 124 | <item> | 124 | <item> |
| 125 | <widget class="QCheckBox" name="use_caches_gc"> | ||
| 126 | <property name="toolTip"> | ||
| 127 | <string>Enables garbage collection for the GPU caches, this will try to keep VRAM within 3-4 GB by flushing the least used textures/buffers. May cause issues in a few games.</string> | ||
| 128 | </property> | ||
| 129 | <property name="text"> | ||
| 130 | <string>Enable GPU cache garbage collection (experimental)</string> | ||
| 131 | </property> | ||
| 132 | </widget> | ||
| 133 | </item> | ||
| 134 | <item> | ||
| 125 | <widget class="QWidget" name="af_layout" native="true"> | 135 | <widget class="QWidget" name="af_layout" native="true"> |
| 126 | <layout class="QHBoxLayout" name="horizontalLayout_1"> | 136 | <layout class="QHBoxLayout" name="horizontalLayout_1"> |
| 127 | <property name="leftMargin"> | 137 | <property name="leftMargin"> |
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index c960ccf89..cc9850aad 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h | |||
| @@ -227,6 +227,10 @@ use_asynchronous_gpu_emulation = | |||
| 227 | # 0: Off, 1 (default): On | 227 | # 0: Off, 1 (default): On |
| 228 | use_vsync = | 228 | use_vsync = |
| 229 | 229 | ||
| 230 | # Whether to use garbage collection or not for GPU caches. | ||
| 231 | # 0 (default): Off, 1: On | ||
| 232 | use_caches_gc = | ||
| 233 | |||
| 230 | # The clear color for the renderer. What shows up on the sides of the bottom screen. | 234 | # The clear color for the renderer. What shows up on the sides of the bottom screen. |
| 231 | # Must be in range of 0.0-1.0. Defaults to 1.0 for all. | 235 | # Must be in range of 0.0-1.0. Defaults to 1.0 for all. |
| 232 | bg_red = | 236 | bg_red = |