diff options
20 files changed, 259 insertions, 43 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 200d792dd..21bfb76a4 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -76,8 +76,9 @@ class BufferCache { | |||
| 76 | 76 | ||
| 77 | static constexpr BufferId NULL_BUFFER_ID{0}; | 77 | static constexpr BufferId NULL_BUFFER_ID{0}; |
| 78 | 78 | ||
| 79 | static constexpr u64 EXPECTED_MEMORY = 512_MiB; | 79 | static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB; |
| 80 | static constexpr u64 CRITICAL_MEMORY = 1_GiB; | 80 | static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB; |
| 81 | static constexpr s64 TARGET_THRESHOLD = 4_GiB; | ||
| 81 | 82 | ||
| 82 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 83 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 83 | 84 | ||
| @@ -436,6 +437,8 @@ private: | |||
| 436 | Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache; | 437 | Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache; |
| 437 | u64 frame_tick = 0; | 438 | u64 frame_tick = 0; |
| 438 | u64 total_used_memory = 0; | 439 | u64 total_used_memory = 0; |
| 440 | u64 minimum_memory = 0; | ||
| 441 | u64 critical_memory = 0; | ||
| 439 | 442 | ||
| 440 | std::array<BufferId, ((1ULL << 39) >> PAGE_BITS)> page_table; | 443 | std::array<BufferId, ((1ULL << 39) >> PAGE_BITS)> page_table; |
| 441 | }; | 444 | }; |
| @@ -451,11 +454,30 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, | |||
| 451 | // Ensure the first slot is used for the null buffer | 454 | // Ensure the first slot is used for the null buffer |
| 452 | void(slot_buffers.insert(runtime, NullBufferParams{})); | 455 | void(slot_buffers.insert(runtime, NullBufferParams{})); |
| 453 | common_ranges.clear(); | 456 | common_ranges.clear(); |
| 457 | |||
| 458 | if (!runtime.CanReportMemoryUsage()) { | ||
| 459 | minimum_memory = DEFAULT_EXPECTED_MEMORY; | ||
| 460 | critical_memory = DEFAULT_CRITICAL_MEMORY; | ||
| 461 | return; | ||
| 462 | } | ||
| 463 | |||
| 464 | const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory()); | ||
| 465 | const s64 min_spacing_expected = device_memory - 1_GiB - 512_MiB; | ||
| 466 | const s64 min_spacing_critical = device_memory - 1_GiB; | ||
| 467 | const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD); | ||
| 468 | const s64 min_vacancy_expected = (6 * mem_threshold) / 10; | ||
| 469 | const s64 min_vacancy_critical = (3 * mem_threshold) / 10; | ||
| 470 | minimum_memory = static_cast<u64>( | ||
| 471 | std::max(std::min(device_memory - min_vacancy_expected, min_spacing_expected), | ||
| 472 | DEFAULT_EXPECTED_MEMORY)); | ||
| 473 | critical_memory = static_cast<u64>( | ||
| 474 | std::max(std::min(device_memory - min_vacancy_critical, min_spacing_critical), | ||
| 475 | DEFAULT_CRITICAL_MEMORY)); | ||
| 454 | } | 476 | } |
| 455 | 477 | ||
| 456 | template <class P> | 478 | template <class P> |
| 457 | void BufferCache<P>::RunGarbageCollector() { | 479 | void BufferCache<P>::RunGarbageCollector() { |
| 458 | const bool aggressive_gc = total_used_memory >= CRITICAL_MEMORY; | 480 | const bool aggressive_gc = total_used_memory >= critical_memory; |
| 459 | const u64 ticks_to_destroy = aggressive_gc ? 60 : 120; | 481 | const u64 ticks_to_destroy = aggressive_gc ? 60 : 120; |
| 460 | int num_iterations = aggressive_gc ? 64 : 32; | 482 | int num_iterations = aggressive_gc ? 64 : 32; |
| 461 | const auto clean_up = [this, &num_iterations](BufferId buffer_id) { | 483 | const auto clean_up = [this, &num_iterations](BufferId buffer_id) { |
| @@ -486,7 +508,11 @@ void BufferCache<P>::TickFrame() { | |||
| 486 | const bool skip_preferred = hits * 256 < shots * 251; | 508 | const bool skip_preferred = hits * 256 < shots * 251; |
| 487 | uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; | 509 | uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; |
| 488 | 510 | ||
| 489 | if (total_used_memory >= EXPECTED_MEMORY) { | 511 | // If we can obtain the memory info, use it instead of the estimate. |
| 512 | if (runtime.CanReportMemoryUsage()) { | ||
| 513 | total_used_memory = runtime.GetDeviceMemoryUsage(); | ||
| 514 | } | ||
| 515 | if (total_used_memory >= minimum_memory) { | ||
| 490 | RunGarbageCollector(); | 516 | RunGarbageCollector(); |
| 491 | } | 517 | } |
| 492 | ++frame_tick; | 518 | ++frame_tick; |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index d4dd10bb6..f1f7b384b 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -135,6 +135,20 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_) | |||
| 135 | buffer.Create(); | 135 | buffer.Create(); |
| 136 | glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY); | 136 | glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY); |
| 137 | } | 137 | } |
| 138 | |||
| 139 | device_access_memory = [this]() -> u64 { | ||
| 140 | if (device.CanReportMemoryUsage()) { | ||
| 141 | return device.GetCurrentDedicatedVideoMemory() + 512_MiB; | ||
| 142 | } | ||
| 143 | return 2_GiB; // Return minimum requirements | ||
| 144 | }(); | ||
| 145 | } | ||
| 146 | |||
| 147 | u64 BufferCacheRuntime::GetDeviceMemoryUsage() const { | ||
| 148 | if (device.CanReportMemoryUsage()) { | ||
| 149 | return device_access_memory - device.GetCurrentDedicatedVideoMemory(); | ||
| 150 | } | ||
| 151 | return 2_GiB; | ||
| 138 | } | 152 | } |
| 139 | 153 | ||
| 140 | void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, | 154 | void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 7287731b6..a8699f28c 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -89,6 +89,8 @@ public: | |||
| 89 | void BindImageBuffer(Buffer& buffer, u32 offset, u32 size, | 89 | void BindImageBuffer(Buffer& buffer, u32 offset, u32 size, |
| 90 | VideoCore::Surface::PixelFormat format); | 90 | VideoCore::Surface::PixelFormat format); |
| 91 | 91 | ||
| 92 | u64 GetDeviceMemoryUsage() const; | ||
| 93 | |||
| 92 | void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) { | 94 | void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) { |
| 93 | const GLuint handle = fast_uniforms[stage][binding_index].handle; | 95 | const GLuint handle = fast_uniforms[stage][binding_index].handle; |
| 94 | const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size); | 96 | const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size); |
| @@ -151,6 +153,14 @@ public: | |||
| 151 | use_storage_buffers = use_storage_buffers_; | 153 | use_storage_buffers = use_storage_buffers_; |
| 152 | } | 154 | } |
| 153 | 155 | ||
| 156 | u64 GetDeviceLocalMemory() const { | ||
| 157 | return device_access_memory; | ||
| 158 | } | ||
| 159 | |||
| 160 | bool CanReportMemoryUsage() const { | ||
| 161 | return device.CanReportMemoryUsage(); | ||
| 162 | } | ||
| 163 | |||
| 154 | private: | 164 | private: |
| 155 | static constexpr std::array PABO_LUT{ | 165 | static constexpr std::array PABO_LUT{ |
| 156 | GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, | 166 | GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, |
| @@ -184,6 +194,8 @@ private: | |||
| 184 | std::array<OGLBuffer, VideoCommon::NUM_COMPUTE_UNIFORM_BUFFERS> copy_compute_uniforms; | 194 | std::array<OGLBuffer, VideoCommon::NUM_COMPUTE_UNIFORM_BUFFERS> copy_compute_uniforms; |
| 185 | 195 | ||
| 186 | u32 index_buffer_offset = 0; | 196 | u32 index_buffer_offset = 0; |
| 197 | |||
| 198 | u64 device_access_memory; | ||
| 187 | }; | 199 | }; |
| 188 | 200 | ||
| 189 | struct BufferCacheParams { | 201 | struct BufferCacheParams { |
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 715cd3a48..656dd7eb0 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -13,12 +13,15 @@ | |||
| 13 | 13 | ||
| 14 | #include <glad/glad.h> | 14 | #include <glad/glad.h> |
| 15 | 15 | ||
| 16 | #include "common/literals.h" | ||
| 16 | #include "common/logging/log.h" | 17 | #include "common/logging/log.h" |
| 17 | #include "common/settings.h" | 18 | #include "common/settings.h" |
| 18 | #include "shader_recompiler/stage.h" | 19 | #include "shader_recompiler/stage.h" |
| 19 | #include "video_core/renderer_opengl/gl_device.h" | 20 | #include "video_core/renderer_opengl/gl_device.h" |
| 20 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 21 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 21 | 22 | ||
| 23 | using namespace Common::Literals; | ||
| 24 | |||
| 22 | namespace OpenGL { | 25 | namespace OpenGL { |
| 23 | namespace { | 26 | namespace { |
| 24 | constexpr std::array LIMIT_UBOS = { | 27 | constexpr std::array LIMIT_UBOS = { |
| @@ -165,6 +168,7 @@ Device::Device() { | |||
| 165 | has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2; | 168 | has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2; |
| 166 | warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel; | 169 | warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel; |
| 167 | need_fastmath_off = is_nvidia; | 170 | need_fastmath_off = is_nvidia; |
| 171 | can_report_memory = GLAD_GL_NVX_gpu_memory_info; | ||
| 168 | 172 | ||
| 169 | // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive | 173 | // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive |
| 170 | // uniform buffers as "push constants" | 174 | // uniform buffers as "push constants" |
| @@ -276,4 +280,10 @@ void main() { | |||
| 276 | })"); | 280 | })"); |
| 277 | } | 281 | } |
| 278 | 282 | ||
| 283 | u64 Device::GetCurrentDedicatedVideoMemory() const { | ||
| 284 | GLint cur_avail_mem_kb = 0; | ||
| 285 | glGetIntegerv(GL_GPU_MEMORY_INFO_DEDICATED_VIDMEM_NVX, &cur_avail_mem_kb); | ||
| 286 | return static_cast<u64>(cur_avail_mem_kb) * 1_KiB; | ||
| 287 | } | ||
| 288 | |||
| 279 | } // namespace OpenGL | 289 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 95c2e8d38..9bb0b9148 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -20,6 +20,8 @@ public: | |||
| 20 | 20 | ||
| 21 | [[nodiscard]] std::string GetVendorName() const; | 21 | [[nodiscard]] std::string GetVendorName() const; |
| 22 | 22 | ||
| 23 | u64 GetCurrentDedicatedVideoMemory() const; | ||
| 24 | |||
| 23 | u32 GetMaxUniformBuffers(Shader::Stage stage) const noexcept { | 25 | u32 GetMaxUniformBuffers(Shader::Stage stage) const noexcept { |
| 24 | return max_uniform_buffers[static_cast<size_t>(stage)]; | 26 | return max_uniform_buffers[static_cast<size_t>(stage)]; |
| 25 | } | 27 | } |
| @@ -168,6 +170,10 @@ public: | |||
| 168 | return vendor_name == "ATI Technologies Inc."; | 170 | return vendor_name == "ATI Technologies Inc."; |
| 169 | } | 171 | } |
| 170 | 172 | ||
| 173 | bool CanReportMemoryUsage() const { | ||
| 174 | return can_report_memory; | ||
| 175 | } | ||
| 176 | |||
| 171 | private: | 177 | private: |
| 172 | static bool TestVariableAoffi(); | 178 | static bool TestVariableAoffi(); |
| 173 | static bool TestPreciseBug(); | 179 | static bool TestPreciseBug(); |
| @@ -210,6 +216,7 @@ private: | |||
| 210 | bool need_fastmath_off{}; | 216 | bool need_fastmath_off{}; |
| 211 | bool has_cbuf_ftou_bug{}; | 217 | bool has_cbuf_ftou_bug{}; |
| 212 | bool has_bool_ref_bug{}; | 218 | bool has_bool_ref_bug{}; |
| 219 | bool can_report_memory{}; | ||
| 213 | 220 | ||
| 214 | std::string vendor_name; | 221 | std::string vendor_name; |
| 215 | }; | 222 | }; |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 3c1f79a27..8f9a65beb 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -484,6 +484,13 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& | |||
| 484 | rescale_read_fbos[i].Create(); | 484 | rescale_read_fbos[i].Create(); |
| 485 | } | 485 | } |
| 486 | } | 486 | } |
| 487 | |||
| 488 | device_access_memory = [this]() -> u64 { | ||
| 489 | if (device.CanReportMemoryUsage()) { | ||
| 490 | return device.GetCurrentDedicatedVideoMemory() + 512_MiB; | ||
| 491 | } | ||
| 492 | return 2_GiB; // Return minimum requirements | ||
| 493 | }(); | ||
| 487 | } | 494 | } |
| 488 | 495 | ||
| 489 | TextureCacheRuntime::~TextureCacheRuntime() = default; | 496 | TextureCacheRuntime::~TextureCacheRuntime() = default; |
| @@ -500,13 +507,11 @@ ImageBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) { | |||
| 500 | return download_buffers.RequestMap(size, false); | 507 | return download_buffers.RequestMap(size, false); |
| 501 | } | 508 | } |
| 502 | 509 | ||
| 503 | u64 TextureCacheRuntime::GetDeviceLocalMemory() const { | 510 | u64 TextureCacheRuntime::GetDeviceMemoryUsage() const { |
| 504 | if (GLAD_GL_NVX_gpu_memory_info) { | 511 | if (device.CanReportMemoryUsage()) { |
| 505 | GLint cur_avail_mem_kb = 0; | 512 | return device_access_memory - device.GetCurrentDedicatedVideoMemory(); |
| 506 | glGetIntegerv(GL_GPU_MEMORY_INFO_CURRENT_AVAILABLE_VIDMEM_NVX, &cur_avail_mem_kb); | ||
| 507 | return static_cast<u64>(cur_avail_mem_kb) * 1_KiB; | ||
| 508 | } | 513 | } |
| 509 | return 2_GiB; // Return minimum requirements | 514 | return 2_GiB; |
| 510 | } | 515 | } |
| 511 | 516 | ||
| 512 | void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image, | 517 | void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image, |
| @@ -686,6 +691,7 @@ Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, | |||
| 686 | } | 691 | } |
| 687 | if (IsConverted(runtime->device, info.format, info.type)) { | 692 | if (IsConverted(runtime->device, info.format, info.type)) { |
| 688 | flags |= ImageFlagBits::Converted; | 693 | flags |= ImageFlagBits::Converted; |
| 694 | flags |= ImageFlagBits::CostlyLoad; | ||
| 689 | gl_internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; | 695 | gl_internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; |
| 690 | gl_format = GL_RGBA; | 696 | gl_format = GL_RGBA; |
| 691 | gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; | 697 | gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 7f425631f..53088b66e 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include <glad/glad.h> | 10 | #include <glad/glad.h> |
| 11 | 11 | ||
| 12 | #include "shader_recompiler/shader_info.h" | 12 | #include "shader_recompiler/shader_info.h" |
| 13 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 13 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 14 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 14 | #include "video_core/renderer_opengl/util_shaders.h" | 15 | #include "video_core/renderer_opengl/util_shaders.h" |
| 15 | #include "video_core/texture_cache/image_view_base.h" | 16 | #include "video_core/texture_cache/image_view_base.h" |
| @@ -21,7 +22,6 @@ struct ResolutionScalingInfo; | |||
| 21 | 22 | ||
| 22 | namespace OpenGL { | 23 | namespace OpenGL { |
| 23 | 24 | ||
| 24 | class Device; | ||
| 25 | class ProgramManager; | 25 | class ProgramManager; |
| 26 | class StateTracker; | 26 | class StateTracker; |
| 27 | 27 | ||
| @@ -83,7 +83,15 @@ public: | |||
| 83 | 83 | ||
| 84 | ImageBufferMap DownloadStagingBuffer(size_t size); | 84 | ImageBufferMap DownloadStagingBuffer(size_t size); |
| 85 | 85 | ||
| 86 | u64 GetDeviceLocalMemory() const; | 86 | u64 GetDeviceLocalMemory() const { |
| 87 | return device_access_memory; | ||
| 88 | } | ||
| 89 | |||
| 90 | u64 GetDeviceMemoryUsage() const; | ||
| 91 | |||
| 92 | bool CanReportMemoryUsage() const { | ||
| 93 | return device.CanReportMemoryUsage(); | ||
| 94 | } | ||
| 87 | 95 | ||
| 88 | bool ShouldReinterpret([[maybe_unused]] Image& dst, [[maybe_unused]] Image& src) { | 96 | bool ShouldReinterpret([[maybe_unused]] Image& dst, [[maybe_unused]] Image& src) { |
| 89 | return true; | 97 | return true; |
| @@ -172,6 +180,7 @@ private: | |||
| 172 | std::array<OGLFramebuffer, 4> rescale_draw_fbos; | 180 | std::array<OGLFramebuffer, 4> rescale_draw_fbos; |
| 173 | std::array<OGLFramebuffer, 4> rescale_read_fbos; | 181 | std::array<OGLFramebuffer, 4> rescale_read_fbos; |
| 174 | const Settings::ResolutionScalingInfo& resolution; | 182 | const Settings::ResolutionScalingInfo& resolution; |
| 183 | u64 device_access_memory; | ||
| 175 | }; | 184 | }; |
| 176 | 185 | ||
| 177 | class Image : public VideoCommon::ImageBase { | 186 | class Image : public VideoCommon::ImageBase { |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 5ffd93499..def838c34 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -141,6 +141,18 @@ StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size) { | |||
| 141 | return staging_pool.Request(size, MemoryUsage::Download); | 141 | return staging_pool.Request(size, MemoryUsage::Download); |
| 142 | } | 142 | } |
| 143 | 143 | ||
| 144 | u64 BufferCacheRuntime::GetDeviceLocalMemory() const { | ||
| 145 | return device.GetDeviceLocalMemory(); | ||
| 146 | } | ||
| 147 | |||
| 148 | u64 BufferCacheRuntime::GetDeviceMemoryUsage() const { | ||
| 149 | return device.GetDeviceMemoryUsage(); | ||
| 150 | } | ||
| 151 | |||
| 152 | bool BufferCacheRuntime::CanReportMemoryUsage() const { | ||
| 153 | return device.CanReportMemoryUsage(); | ||
| 154 | } | ||
| 155 | |||
| 144 | void BufferCacheRuntime::Finish() { | 156 | void BufferCacheRuntime::Finish() { |
| 145 | scheduler.Finish(); | 157 | scheduler.Finish(); |
| 146 | } | 158 | } |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 1ee0d8420..d7fdd18ff 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h | |||
| @@ -65,6 +65,12 @@ public: | |||
| 65 | 65 | ||
| 66 | void Finish(); | 66 | void Finish(); |
| 67 | 67 | ||
| 68 | u64 GetDeviceLocalMemory() const; | ||
| 69 | |||
| 70 | u64 GetDeviceMemoryUsage() const; | ||
| 71 | |||
| 72 | bool CanReportMemoryUsage() const; | ||
| 73 | |||
| 68 | [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); | 74 | [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); |
| 69 | 75 | ||
| 70 | [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); | 76 | [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); |
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 5d5329abf..64a58304b 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp | |||
| @@ -118,7 +118,7 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem | |||
| 118 | .image = nullptr, | 118 | .image = nullptr, |
| 119 | .buffer = *stream_buffer, | 119 | .buffer = *stream_buffer, |
| 120 | }; | 120 | }; |
| 121 | const auto memory_properties = device.GetPhysical().GetMemoryProperties(); | 121 | const auto memory_properties = device.GetPhysical().GetMemoryProperties().memoryProperties; |
| 122 | VkMemoryAllocateInfo stream_memory_info{ | 122 | VkMemoryAllocateInfo stream_memory_info{ |
| 123 | .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, | 123 | .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, |
| 124 | .pNext = make_dedicated ? &dedicated_info : nullptr, | 124 | .pNext = make_dedicated ? &dedicated_info : nullptr, |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 83a23b66a..f2890d263 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp | |||
| @@ -1189,6 +1189,14 @@ u64 TextureCacheRuntime::GetDeviceLocalMemory() const { | |||
| 1189 | return device.GetDeviceLocalMemory(); | 1189 | return device.GetDeviceLocalMemory(); |
| 1190 | } | 1190 | } |
| 1191 | 1191 | ||
| 1192 | u64 TextureCacheRuntime::GetDeviceMemoryUsage() const { | ||
| 1193 | return device.GetDeviceMemoryUsage(); | ||
| 1194 | } | ||
| 1195 | |||
| 1196 | bool TextureCacheRuntime::CanReportMemoryUsage() const { | ||
| 1197 | return device.CanReportMemoryUsage(); | ||
| 1198 | } | ||
| 1199 | |||
| 1192 | void TextureCacheRuntime::TickFrame() {} | 1200 | void TextureCacheRuntime::TickFrame() {} |
| 1193 | 1201 | ||
| 1194 | Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_, | 1202 | Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_, |
| @@ -1203,6 +1211,7 @@ Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu | |||
| 1203 | } else { | 1211 | } else { |
| 1204 | flags |= VideoCommon::ImageFlagBits::Converted; | 1212 | flags |= VideoCommon::ImageFlagBits::Converted; |
| 1205 | } | 1213 | } |
| 1214 | flags |= VideoCommon::ImageFlagBits::CostlyLoad; | ||
| 1206 | } | 1215 | } |
| 1207 | if (runtime->device.HasDebuggingToolAttached()) { | 1216 | if (runtime->device.HasDebuggingToolAttached()) { |
| 1208 | original_image.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); | 1217 | original_image.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index c81130dd2..cb15b4a1c 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h | |||
| @@ -55,6 +55,10 @@ public: | |||
| 55 | 55 | ||
| 56 | u64 GetDeviceLocalMemory() const; | 56 | u64 GetDeviceLocalMemory() const; |
| 57 | 57 | ||
| 58 | u64 GetDeviceMemoryUsage() const; | ||
| 59 | |||
| 60 | bool CanReportMemoryUsage() const; | ||
| 61 | |||
| 58 | void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, | 62 | void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, |
| 59 | const Region2D& dst_region, const Region2D& src_region, | 63 | const Region2D& dst_region, const Region2D& src_region, |
| 60 | Tegra::Engines::Fermi2D::Filter filter, | 64 | Tegra::Engines::Fermi2D::Filter filter, |
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 89c111c00..dd0106432 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h | |||
| @@ -33,11 +33,12 @@ enum class ImageFlagBits : u32 { | |||
| 33 | ///< garbage collection priority | 33 | ///< garbage collection priority |
| 34 | Alias = 1 << 11, ///< This image has aliases and has priority on garbage | 34 | Alias = 1 << 11, ///< This image has aliases and has priority on garbage |
| 35 | ///< collection | 35 | ///< collection |
| 36 | CostlyLoad = 1 << 12, ///< Protected from low-tier GC as it is costly to load back. | ||
| 36 | 37 | ||
| 37 | // Rescaler | 38 | // Rescaler |
| 38 | Rescaled = 1 << 12, | 39 | Rescaled = 1 << 13, |
| 39 | CheckingRescalable = 1 << 13, | 40 | CheckingRescalable = 1 << 14, |
| 40 | IsRescalable = 1 << 14, | 41 | IsRescalable = 1 << 15, |
| 41 | }; | 42 | }; |
| 42 | DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) | 43 | DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) |
| 43 | 44 | ||
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 72eeb8bbd..efc1c4525 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -50,14 +50,20 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& | |||
| 50 | void(slot_samplers.insert(runtime, sampler_descriptor)); | 50 | void(slot_samplers.insert(runtime, sampler_descriptor)); |
| 51 | 51 | ||
| 52 | if constexpr (HAS_DEVICE_MEMORY_INFO) { | 52 | if constexpr (HAS_DEVICE_MEMORY_INFO) { |
| 53 | const auto device_memory = runtime.GetDeviceLocalMemory(); | 53 | const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory()); |
| 54 | const u64 possible_expected_memory = (device_memory * 4) / 10; | 54 | const s64 min_spacing_expected = device_memory - 1_GiB - 512_MiB; |
| 55 | const u64 possible_critical_memory = (device_memory * 7) / 10; | 55 | const s64 min_spacing_critical = device_memory - 1_GiB; |
| 56 | expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY - 256_MiB); | 56 | const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD); |
| 57 | critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY - 512_MiB); | 57 | const s64 min_vacancy_expected = (6 * mem_threshold) / 10; |
| 58 | minimum_memory = 0; | 58 | const s64 min_vacancy_critical = (3 * mem_threshold) / 10; |
| 59 | expected_memory = static_cast<u64>( | ||
| 60 | std::max(std::min(device_memory - min_vacancy_expected, min_spacing_expected), | ||
| 61 | DEFAULT_EXPECTED_MEMORY)); | ||
| 62 | critical_memory = static_cast<u64>( | ||
| 63 | std::max(std::min(device_memory - min_vacancy_critical, min_spacing_critical), | ||
| 64 | DEFAULT_CRITICAL_MEMORY)); | ||
| 65 | minimum_memory = static_cast<u64>((device_memory - mem_threshold) / 2); | ||
| 59 | } else { | 66 | } else { |
| 60 | // On OpenGL we can be more conservatives as the driver takes care. | ||
| 61 | expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; | 67 | expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; |
| 62 | critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; | 68 | critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; |
| 63 | minimum_memory = 0; | 69 | minimum_memory = 0; |
| @@ -66,18 +72,21 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& | |||
| 66 | 72 | ||
| 67 | template <class P> | 73 | template <class P> |
| 68 | void TextureCache<P>::RunGarbageCollector() { | 74 | void TextureCache<P>::RunGarbageCollector() { |
| 69 | const bool high_priority_mode = total_used_memory >= expected_memory; | 75 | bool high_priority_mode = total_used_memory >= expected_memory; |
| 70 | const bool aggressive_mode = total_used_memory >= critical_memory; | 76 | bool aggressive_mode = total_used_memory >= critical_memory; |
| 71 | const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 100ULL; | 77 | const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 50ULL; |
| 72 | size_t num_iterations = aggressive_mode ? 300 : (high_priority_mode ? 50 : 10); | 78 | size_t num_iterations = aggressive_mode ? 40 : (high_priority_mode ? 20 : 10); |
| 73 | const auto clean_up = [this, &num_iterations, high_priority_mode](ImageId image_id) { | 79 | const auto clean_up = [this, &num_iterations, &high_priority_mode, |
| 80 | &aggressive_mode](ImageId image_id) { | ||
| 74 | if (num_iterations == 0) { | 81 | if (num_iterations == 0) { |
| 75 | return true; | 82 | return true; |
| 76 | } | 83 | } |
| 77 | --num_iterations; | 84 | --num_iterations; |
| 78 | auto& image = slot_images[image_id]; | 85 | auto& image = slot_images[image_id]; |
| 79 | const bool must_download = image.IsSafeDownload(); | 86 | const bool must_download = |
| 80 | if (!high_priority_mode && must_download) { | 87 | image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap); |
| 88 | if (!high_priority_mode && | ||
| 89 | (must_download || True(image.flags & ImageFlagBits::CostlyLoad))) { | ||
| 81 | return false; | 90 | return false; |
| 82 | } | 91 | } |
| 83 | if (must_download) { | 92 | if (must_download) { |
| @@ -92,6 +101,18 @@ void TextureCache<P>::RunGarbageCollector() { | |||
| 92 | } | 101 | } |
| 93 | UnregisterImage(image_id); | 102 | UnregisterImage(image_id); |
| 94 | DeleteImage(image_id, image.scale_tick > frame_tick + 5); | 103 | DeleteImage(image_id, image.scale_tick > frame_tick + 5); |
| 104 | if (total_used_memory < critical_memory) { | ||
| 105 | if (aggressive_mode) { | ||
| 106 | // Sink the aggresiveness. | ||
| 107 | num_iterations >>= 2; | ||
| 108 | aggressive_mode = false; | ||
| 109 | return false; | ||
| 110 | } | ||
| 111 | if (high_priority_mode && total_used_memory < expected_memory) { | ||
| 112 | num_iterations >>= 1; | ||
| 113 | high_priority_mode = false; | ||
| 114 | } | ||
| 115 | } | ||
| 95 | return false; | 116 | return false; |
| 96 | }; | 117 | }; |
| 97 | lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up); | 118 | lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up); |
| @@ -99,6 +120,10 @@ void TextureCache<P>::RunGarbageCollector() { | |||
| 99 | 120 | ||
| 100 | template <class P> | 121 | template <class P> |
| 101 | void TextureCache<P>::TickFrame() { | 122 | void TextureCache<P>::TickFrame() { |
| 123 | // If we can obtain the memory info, use it instead of the estimate. | ||
| 124 | if (runtime.CanReportMemoryUsage()) { | ||
| 125 | total_used_memory = runtime.GetDeviceMemoryUsage(); | ||
| 126 | } | ||
| 102 | if (total_used_memory > minimum_memory) { | 127 | if (total_used_memory > minimum_memory) { |
| 103 | RunGarbageCollector(); | 128 | RunGarbageCollector(); |
| 104 | } | 129 | } |
| @@ -106,6 +131,7 @@ void TextureCache<P>::TickFrame() { | |||
| 106 | sentenced_framebuffers.Tick(); | 131 | sentenced_framebuffers.Tick(); |
| 107 | sentenced_image_view.Tick(); | 132 | sentenced_image_view.Tick(); |
| 108 | runtime.TickFrame(); | 133 | runtime.TickFrame(); |
| 134 | critical_gc = 0; | ||
| 109 | ++frame_tick; | 135 | ++frame_tick; |
| 110 | } | 136 | } |
| 111 | 137 | ||
| @@ -1052,6 +1078,11 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA | |||
| 1052 | 1078 | ||
| 1053 | for (const ImageId overlap_id : overlap_ids) { | 1079 | for (const ImageId overlap_id : overlap_ids) { |
| 1054 | Image& overlap = slot_images[overlap_id]; | 1080 | Image& overlap = slot_images[overlap_id]; |
| 1081 | if (True(overlap.flags & ImageFlagBits::GpuModified)) { | ||
| 1082 | new_image.flags |= ImageFlagBits::GpuModified; | ||
| 1083 | new_image.modification_tick = | ||
| 1084 | std::max(overlap.modification_tick, new_image.modification_tick); | ||
| 1085 | } | ||
| 1055 | if (overlap.info.num_samples != new_image.info.num_samples) { | 1086 | if (overlap.info.num_samples != new_image.info.num_samples) { |
| 1056 | LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); | 1087 | LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); |
| 1057 | } else { | 1088 | } else { |
| @@ -1414,6 +1445,10 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { | |||
| 1414 | tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); | 1445 | tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); |
| 1415 | } | 1446 | } |
| 1416 | total_used_memory += Common::AlignUp(tentative_size, 1024); | 1447 | total_used_memory += Common::AlignUp(tentative_size, 1024); |
| 1448 | if (total_used_memory > critical_memory && critical_gc < GC_EMERGENCY_COUNTS) { | ||
| 1449 | RunGarbageCollector(); | ||
| 1450 | critical_gc++; | ||
| 1451 | } | ||
| 1417 | image.lru_index = lru_cache.Insert(image_id, frame_tick); | 1452 | image.lru_index = lru_cache.Insert(image_id, frame_tick); |
| 1418 | 1453 | ||
| 1419 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, | 1454 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, |
| @@ -1704,6 +1739,9 @@ void TextureCache<P>::SynchronizeAliases(ImageId image_id) { | |||
| 1704 | most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); | 1739 | most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); |
| 1705 | aliased_images.push_back(&aliased); | 1740 | aliased_images.push_back(&aliased); |
| 1706 | any_rescaled |= True(aliased_image.flags & ImageFlagBits::Rescaled); | 1741 | any_rescaled |= True(aliased_image.flags & ImageFlagBits::Rescaled); |
| 1742 | if (True(aliased_image.flags & ImageFlagBits::GpuModified)) { | ||
| 1743 | image.flags |= ImageFlagBits::GpuModified; | ||
| 1744 | } | ||
| 1707 | } | 1745 | } |
| 1708 | } | 1746 | } |
| 1709 | if (aliased_images.empty()) { | 1747 | if (aliased_images.empty()) { |
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 647ca0730..b1324edf3 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h | |||
| @@ -59,8 +59,10 @@ class TextureCache { | |||
| 59 | /// True when the API can provide info about the memory of the device. | 59 | /// True when the API can provide info about the memory of the device. |
| 60 | static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; | 60 | static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; |
| 61 | 61 | ||
| 62 | static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB; | 62 | static constexpr s64 TARGET_THRESHOLD = 4_GiB; |
| 63 | static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB; | 63 | static constexpr s64 DEFAULT_EXPECTED_MEMORY = 1_GiB + 125_MiB; |
| 64 | static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB + 625_MiB; | ||
| 65 | static constexpr size_t GC_EMERGENCY_COUNTS = 2; | ||
| 64 | 66 | ||
| 65 | using Runtime = typename P::Runtime; | 67 | using Runtime = typename P::Runtime; |
| 66 | using Image = typename P::Image; | 68 | using Image = typename P::Image; |
| @@ -372,6 +374,7 @@ private: | |||
| 372 | u64 minimum_memory; | 374 | u64 minimum_memory; |
| 373 | u64 expected_memory; | 375 | u64 expected_memory; |
| 374 | u64 critical_memory; | 376 | u64 critical_memory; |
| 377 | size_t critical_gc; | ||
| 375 | 378 | ||
| 376 | SlotVector<Image> slot_images; | 379 | SlotVector<Image> slot_images; |
| 377 | SlotVector<ImageMapView> slot_map_views; | 380 | SlotVector<ImageMapView> slot_map_views; |
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 32c10d675..e142bee35 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp | |||
| @@ -12,12 +12,14 @@ | |||
| 12 | #include <vector> | 12 | #include <vector> |
| 13 | 13 | ||
| 14 | #include "common/assert.h" | 14 | #include "common/assert.h" |
| 15 | #include "common/literals.h" | ||
| 15 | #include "common/settings.h" | 16 | #include "common/settings.h" |
| 16 | #include "video_core/vulkan_common/nsight_aftermath_tracker.h" | 17 | #include "video_core/vulkan_common/nsight_aftermath_tracker.h" |
| 17 | #include "video_core/vulkan_common/vulkan_device.h" | 18 | #include "video_core/vulkan_common/vulkan_device.h" |
| 18 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 19 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 19 | 20 | ||
| 20 | namespace Vulkan { | 21 | namespace Vulkan { |
| 22 | using namespace Common::Literals; | ||
| 21 | namespace { | 23 | namespace { |
| 22 | namespace Alternatives { | 24 | namespace Alternatives { |
| 23 | constexpr std::array STENCIL8_UINT{ | 25 | constexpr std::array STENCIL8_UINT{ |
| @@ -596,6 +598,11 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | |||
| 596 | } | 598 | } |
| 597 | logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld); | 599 | logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld); |
| 598 | 600 | ||
| 601 | is_integrated = properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU; | ||
| 602 | is_virtual = properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU; | ||
| 603 | is_non_gpu = properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_OTHER || | ||
| 604 | properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_CPU; | ||
| 605 | |||
| 599 | CollectPhysicalMemoryInfo(); | 606 | CollectPhysicalMemoryInfo(); |
| 600 | CollectTelemetryParameters(); | 607 | CollectTelemetryParameters(); |
| 601 | CollectToolingInfo(); | 608 | CollectToolingInfo(); |
| @@ -985,6 +992,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { | |||
| 985 | test(has_khr_swapchain_mutable_format, VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME, | 992 | test(has_khr_swapchain_mutable_format, VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME, |
| 986 | false); | 993 | false); |
| 987 | test(has_ext_line_rasterization, VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME, false); | 994 | test(has_ext_line_rasterization, VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME, false); |
| 995 | test(ext_memory_budget, VK_EXT_MEMORY_BUDGET_EXTENSION_NAME, true); | ||
| 988 | if (Settings::values.enable_nsight_aftermath) { | 996 | if (Settings::values.enable_nsight_aftermath) { |
| 989 | test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, | 997 | test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, |
| 990 | true); | 998 | true); |
| @@ -997,7 +1005,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { | |||
| 997 | VkPhysicalDeviceFeatures2KHR features{}; | 1005 | VkPhysicalDeviceFeatures2KHR features{}; |
| 998 | features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; | 1006 | features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; |
| 999 | 1007 | ||
| 1000 | VkPhysicalDeviceProperties2KHR physical_properties; | 1008 | VkPhysicalDeviceProperties2KHR physical_properties{}; |
| 1001 | physical_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; | 1009 | physical_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; |
| 1002 | 1010 | ||
| 1003 | if (has_khr_shader_float16_int8) { | 1011 | if (has_khr_shader_float16_int8) { |
| @@ -1267,15 +1275,50 @@ void Device::CollectTelemetryParameters() { | |||
| 1267 | vendor_name = driver.driverName; | 1275 | vendor_name = driver.driverName; |
| 1268 | } | 1276 | } |
| 1269 | 1277 | ||
| 1278 | u64 Device::GetDeviceMemoryUsage() const { | ||
| 1279 | VkPhysicalDeviceMemoryBudgetPropertiesEXT budget; | ||
| 1280 | budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT; | ||
| 1281 | budget.pNext = nullptr; | ||
| 1282 | physical.GetMemoryProperties(&budget); | ||
| 1283 | u64 result{}; | ||
| 1284 | for (const size_t heap : valid_heap_memory) { | ||
| 1285 | result += budget.heapUsage[heap]; | ||
| 1286 | } | ||
| 1287 | return result; | ||
| 1288 | } | ||
| 1289 | |||
| 1270 | void Device::CollectPhysicalMemoryInfo() { | 1290 | void Device::CollectPhysicalMemoryInfo() { |
| 1271 | const auto mem_properties = physical.GetMemoryProperties(); | 1291 | VkPhysicalDeviceMemoryBudgetPropertiesEXT budget{}; |
| 1292 | budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT; | ||
| 1293 | const auto mem_info = physical.GetMemoryProperties(ext_memory_budget ? &budget : nullptr); | ||
| 1294 | const auto& mem_properties = mem_info.memoryProperties; | ||
| 1272 | const size_t num_properties = mem_properties.memoryHeapCount; | 1295 | const size_t num_properties = mem_properties.memoryHeapCount; |
| 1273 | device_access_memory = 0; | 1296 | device_access_memory = 0; |
| 1297 | u64 device_initial_usage = 0; | ||
| 1298 | u64 local_memory = 0; | ||
| 1274 | for (size_t element = 0; element < num_properties; ++element) { | 1299 | for (size_t element = 0; element < num_properties; ++element) { |
| 1275 | if ((mem_properties.memoryHeaps[element].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) != 0) { | 1300 | const bool is_heap_local = |
| 1276 | device_access_memory += mem_properties.memoryHeaps[element].size; | 1301 | (mem_properties.memoryHeaps[element].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) != 0; |
| 1302 | if (!is_integrated && !is_heap_local) { | ||
| 1303 | continue; | ||
| 1277 | } | 1304 | } |
| 1305 | valid_heap_memory.push_back(element); | ||
| 1306 | if (is_heap_local) { | ||
| 1307 | local_memory += mem_properties.memoryHeaps[element].size; | ||
| 1308 | } | ||
| 1309 | if (ext_memory_budget) { | ||
| 1310 | device_initial_usage += budget.heapUsage[element]; | ||
| 1311 | device_access_memory += budget.heapBudget[element]; | ||
| 1312 | continue; | ||
| 1313 | } | ||
| 1314 | device_access_memory += mem_properties.memoryHeaps[element].size; | ||
| 1315 | } | ||
| 1316 | if (!is_integrated) { | ||
| 1317 | return; | ||
| 1278 | } | 1318 | } |
| 1319 | const s64 available_memory = static_cast<s64>(device_access_memory - device_initial_usage); | ||
| 1320 | device_access_memory = static_cast<u64>(std::max<s64>( | ||
| 1321 | std::min<s64>(available_memory - 8_GiB, 4_GiB), static_cast<s64>(local_memory))); | ||
| 1279 | } | 1322 | } |
| 1280 | 1323 | ||
| 1281 | void Device::CollectToolingInfo() { | 1324 | void Device::CollectToolingInfo() { |
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 1c7c18bcf..2d709d069 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h | |||
| @@ -341,6 +341,12 @@ public: | |||
| 341 | return device_access_memory; | 341 | return device_access_memory; |
| 342 | } | 342 | } |
| 343 | 343 | ||
| 344 | bool CanReportMemoryUsage() const { | ||
| 345 | return ext_memory_budget; | ||
| 346 | } | ||
| 347 | |||
| 348 | u64 GetDeviceMemoryUsage() const; | ||
| 349 | |||
| 344 | u32 GetSetsPerPool() const { | 350 | u32 GetSetsPerPool() const { |
| 345 | return sets_per_pool; | 351 | return sets_per_pool; |
| 346 | } | 352 | } |
| @@ -421,6 +427,9 @@ private: | |||
| 421 | bool is_topology_list_restart_supported{}; ///< Support for primitive restart with list | 427 | bool is_topology_list_restart_supported{}; ///< Support for primitive restart with list |
| 422 | ///< topologies. | 428 | ///< topologies. |
| 423 | bool is_patch_list_restart_supported{}; ///< Support for primitive restart with list patch. | 429 | bool is_patch_list_restart_supported{}; ///< Support for primitive restart with list patch. |
| 430 | bool is_integrated{}; ///< Is GPU an iGPU. | ||
| 431 | bool is_virtual{}; ///< Is GPU a virtual GPU. | ||
| 432 | bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device. | ||
| 424 | bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. | 433 | bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. |
| 425 | bool nv_viewport_array2{}; ///< Support for VK_NV_viewport_array2. | 434 | bool nv_viewport_array2{}; ///< Support for VK_NV_viewport_array2. |
| 426 | bool nv_geometry_shader_passthrough{}; ///< Support for VK_NV_geometry_shader_passthrough. | 435 | bool nv_geometry_shader_passthrough{}; ///< Support for VK_NV_geometry_shader_passthrough. |
| @@ -445,6 +454,7 @@ private: | |||
| 445 | bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64. | 454 | bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64. |
| 446 | bool ext_conservative_rasterization{}; ///< Support for VK_EXT_conservative_rasterization. | 455 | bool ext_conservative_rasterization{}; ///< Support for VK_EXT_conservative_rasterization. |
| 447 | bool ext_provoking_vertex{}; ///< Support for VK_EXT_provoking_vertex. | 456 | bool ext_provoking_vertex{}; ///< Support for VK_EXT_provoking_vertex. |
| 457 | bool ext_memory_budget{}; ///< Support for VK_EXT_memory_budget. | ||
| 448 | bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. | 458 | bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. |
| 449 | bool has_broken_cube_compatibility{}; ///< Has broken cube compatiblity bit | 459 | bool has_broken_cube_compatibility{}; ///< Has broken cube compatiblity bit |
| 450 | bool has_renderdoc{}; ///< Has RenderDoc attached | 460 | bool has_renderdoc{}; ///< Has RenderDoc attached |
| @@ -456,6 +466,7 @@ private: | |||
| 456 | // Telemetry parameters | 466 | // Telemetry parameters |
| 457 | std::string vendor_name; ///< Device's driver name. | 467 | std::string vendor_name; ///< Device's driver name. |
| 458 | std::vector<std::string> supported_extensions; ///< Reported Vulkan extensions. | 468 | std::vector<std::string> supported_extensions; ///< Reported Vulkan extensions. |
| 469 | std::vector<size_t> valid_heap_memory; ///< Heaps used. | ||
| 459 | 470 | ||
| 460 | /// Format properties dictionary. | 471 | /// Format properties dictionary. |
| 461 | std::unordered_map<VkFormat, VkFormatProperties> format_properties; | 472 | std::unordered_map<VkFormat, VkFormatProperties> format_properties; |
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp index 300a61205..e6e97b332 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp | |||
| @@ -227,7 +227,7 @@ void MemoryCommit::Release() { | |||
| 227 | } | 227 | } |
| 228 | 228 | ||
| 229 | MemoryAllocator::MemoryAllocator(const Device& device_, bool export_allocations_) | 229 | MemoryAllocator::MemoryAllocator(const Device& device_, bool export_allocations_) |
| 230 | : device{device_}, properties{device_.GetPhysical().GetMemoryProperties()}, | 230 | : device{device_}, properties{device_.GetPhysical().GetMemoryProperties().memoryProperties}, |
| 231 | export_allocations{export_allocations_}, | 231 | export_allocations{export_allocations_}, |
| 232 | buffer_image_granularity{ | 232 | buffer_image_granularity{ |
| 233 | device_.GetPhysical().GetProperties().limits.bufferImageGranularity} {} | 233 | device_.GetPhysical().GetProperties().limits.bufferImageGranularity} {} |
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index a794f16dd..742cc39da 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp | |||
| @@ -237,8 +237,8 @@ bool Load(VkInstance instance, InstanceDispatch& dld) noexcept { | |||
| 237 | return X(vkCreateDevice) && X(vkDestroyDevice) && X(vkDestroyDevice) && | 237 | return X(vkCreateDevice) && X(vkDestroyDevice) && X(vkDestroyDevice) && |
| 238 | X(vkEnumerateDeviceExtensionProperties) && X(vkEnumeratePhysicalDevices) && | 238 | X(vkEnumerateDeviceExtensionProperties) && X(vkEnumeratePhysicalDevices) && |
| 239 | X(vkGetDeviceProcAddr) && X(vkGetPhysicalDeviceFormatProperties) && | 239 | X(vkGetDeviceProcAddr) && X(vkGetPhysicalDeviceFormatProperties) && |
| 240 | X(vkGetPhysicalDeviceMemoryProperties) && X(vkGetPhysicalDeviceProperties) && | 240 | X(vkGetPhysicalDeviceMemoryProperties) && X(vkGetPhysicalDeviceMemoryProperties2) && |
| 241 | X(vkGetPhysicalDeviceQueueFamilyProperties); | 241 | X(vkGetPhysicalDeviceProperties) && X(vkGetPhysicalDeviceQueueFamilyProperties); |
| 242 | #undef X | 242 | #undef X |
| 243 | } | 243 | } |
| 244 | 244 | ||
| @@ -926,9 +926,12 @@ std::vector<VkPresentModeKHR> PhysicalDevice::GetSurfacePresentModesKHR( | |||
| 926 | return modes; | 926 | return modes; |
| 927 | } | 927 | } |
| 928 | 928 | ||
| 929 | VkPhysicalDeviceMemoryProperties PhysicalDevice::GetMemoryProperties() const noexcept { | 929 | VkPhysicalDeviceMemoryProperties2 PhysicalDevice::GetMemoryProperties( |
| 930 | VkPhysicalDeviceMemoryProperties properties; | 930 | void* next_structures) const noexcept { |
| 931 | dld->vkGetPhysicalDeviceMemoryProperties(physical_device, &properties); | 931 | VkPhysicalDeviceMemoryProperties2 properties{}; |
| 932 | properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2; | ||
| 933 | properties.pNext = next_structures; | ||
| 934 | dld->vkGetPhysicalDeviceMemoryProperties2(physical_device, &properties); | ||
| 932 | return properties; | 935 | return properties; |
| 933 | } | 936 | } |
| 934 | 937 | ||
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 53bac627f..0a5f9931c 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h | |||
| @@ -172,6 +172,7 @@ struct InstanceDispatch { | |||
| 172 | PFN_vkGetPhysicalDeviceFeatures2KHR vkGetPhysicalDeviceFeatures2KHR{}; | 172 | PFN_vkGetPhysicalDeviceFeatures2KHR vkGetPhysicalDeviceFeatures2KHR{}; |
| 173 | PFN_vkGetPhysicalDeviceFormatProperties vkGetPhysicalDeviceFormatProperties{}; | 173 | PFN_vkGetPhysicalDeviceFormatProperties vkGetPhysicalDeviceFormatProperties{}; |
| 174 | PFN_vkGetPhysicalDeviceMemoryProperties vkGetPhysicalDeviceMemoryProperties{}; | 174 | PFN_vkGetPhysicalDeviceMemoryProperties vkGetPhysicalDeviceMemoryProperties{}; |
| 175 | PFN_vkGetPhysicalDeviceMemoryProperties2 vkGetPhysicalDeviceMemoryProperties2{}; | ||
| 175 | PFN_vkGetPhysicalDeviceProperties vkGetPhysicalDeviceProperties{}; | 176 | PFN_vkGetPhysicalDeviceProperties vkGetPhysicalDeviceProperties{}; |
| 176 | PFN_vkGetPhysicalDeviceProperties2KHR vkGetPhysicalDeviceProperties2KHR{}; | 177 | PFN_vkGetPhysicalDeviceProperties2KHR vkGetPhysicalDeviceProperties2KHR{}; |
| 177 | PFN_vkGetPhysicalDeviceQueueFamilyProperties vkGetPhysicalDeviceQueueFamilyProperties{}; | 178 | PFN_vkGetPhysicalDeviceQueueFamilyProperties vkGetPhysicalDeviceQueueFamilyProperties{}; |
| @@ -950,7 +951,8 @@ public: | |||
| 950 | 951 | ||
| 951 | std::vector<VkPresentModeKHR> GetSurfacePresentModesKHR(VkSurfaceKHR) const; | 952 | std::vector<VkPresentModeKHR> GetSurfacePresentModesKHR(VkSurfaceKHR) const; |
| 952 | 953 | ||
| 953 | VkPhysicalDeviceMemoryProperties GetMemoryProperties() const noexcept; | 954 | VkPhysicalDeviceMemoryProperties2 GetMemoryProperties( |
| 955 | void* next_structures = nullptr) const noexcept; | ||
| 954 | 956 | ||
| 955 | private: | 957 | private: |
| 956 | VkPhysicalDevice physical_device = nullptr; | 958 | VkPhysicalDevice physical_device = nullptr; |