diff options
| author | 2022-10-06 21:29:53 +0200 | |
|---|---|---|
| committer | 2022-10-06 21:29:53 +0200 | |
| commit | 1effa578f12f79d7816e3543291f302f126cc1d2 (patch) | |
| tree | 14803b31b6817294d40d57446f6fa94c5ff3fe9a /src/video_core/buffer_cache | |
| parent | Merge pull request #9025 from FernandoS27/slava-ukrayini (diff) | |
| parent | vulkan_blitter: Fix pool allocation double free. (diff) | |
| download | yuzu-1effa578f12f79d7816e3543291f302f126cc1d2.tar.gz yuzu-1effa578f12f79d7816e3543291f302f126cc1d2.tar.xz yuzu-1effa578f12f79d7816e3543291f302f126cc1d2.zip | |
Merge pull request #8467 from FernandoS27/yfc-rel-1
Project yuzu Fried Chicken (Y.F.C.) Part 1
Diffstat (limited to 'src/video_core/buffer_cache')
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 166 |
1 files changed, 84 insertions, 82 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index f015dae56..8e26b3f95 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -5,7 +5,6 @@ | |||
| 5 | 5 | ||
| 6 | #include <algorithm> | 6 | #include <algorithm> |
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <deque> | ||
| 9 | #include <memory> | 8 | #include <memory> |
| 10 | #include <mutex> | 9 | #include <mutex> |
| 11 | #include <numeric> | 10 | #include <numeric> |
| @@ -23,6 +22,7 @@ | |||
| 23 | #include "common/settings.h" | 22 | #include "common/settings.h" |
| 24 | #include "core/memory.h" | 23 | #include "core/memory.h" |
| 25 | #include "video_core/buffer_cache/buffer_base.h" | 24 | #include "video_core/buffer_cache/buffer_base.h" |
| 25 | #include "video_core/control/channel_state_cache.h" | ||
| 26 | #include "video_core/delayed_destruction_ring.h" | 26 | #include "video_core/delayed_destruction_ring.h" |
| 27 | #include "video_core/dirty_flags.h" | 27 | #include "video_core/dirty_flags.h" |
| 28 | #include "video_core/engines/kepler_compute.h" | 28 | #include "video_core/engines/kepler_compute.h" |
| @@ -56,7 +56,7 @@ using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFE | |||
| 56 | using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>; | 56 | using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>; |
| 57 | 57 | ||
| 58 | template <typename P> | 58 | template <typename P> |
| 59 | class BufferCache { | 59 | class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { |
| 60 | 60 | ||
| 61 | // Page size for caching purposes. | 61 | // Page size for caching purposes. |
| 62 | // This is unrelated to the CPU page size and it can be changed as it seems optimal. | 62 | // This is unrelated to the CPU page size and it can be changed as it seems optimal. |
| @@ -116,10 +116,7 @@ public: | |||
| 116 | static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB); | 116 | static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB); |
| 117 | 117 | ||
| 118 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, | 118 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, |
| 119 | Tegra::Engines::Maxwell3D& maxwell3d_, | 119 | Core::Memory::Memory& cpu_memory_, Runtime& runtime_); |
| 120 | Tegra::Engines::KeplerCompute& kepler_compute_, | ||
| 121 | Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, | ||
| 122 | Runtime& runtime_); | ||
| 123 | 120 | ||
| 124 | void TickFrame(); | 121 | void TickFrame(); |
| 125 | 122 | ||
| @@ -129,7 +126,7 @@ public: | |||
| 129 | 126 | ||
| 130 | void DownloadMemory(VAddr cpu_addr, u64 size); | 127 | void DownloadMemory(VAddr cpu_addr, u64 size); |
| 131 | 128 | ||
| 132 | bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<u8> inlined_buffer); | 129 | bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer); |
| 133 | 130 | ||
| 134 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size); | 131 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size); |
| 135 | 132 | ||
| @@ -353,7 +350,7 @@ private: | |||
| 353 | 350 | ||
| 354 | void NotifyBufferDeletion(); | 351 | void NotifyBufferDeletion(); |
| 355 | 352 | ||
| 356 | [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr) const; | 353 | [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr, bool is_written = false) const; |
| 357 | 354 | ||
| 358 | [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, | 355 | [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, |
| 359 | PixelFormat format); | 356 | PixelFormat format); |
| @@ -367,9 +364,6 @@ private: | |||
| 367 | void ClearDownload(IntervalType subtract_interval); | 364 | void ClearDownload(IntervalType subtract_interval); |
| 368 | 365 | ||
| 369 | VideoCore::RasterizerInterface& rasterizer; | 366 | VideoCore::RasterizerInterface& rasterizer; |
| 370 | Tegra::Engines::Maxwell3D& maxwell3d; | ||
| 371 | Tegra::Engines::KeplerCompute& kepler_compute; | ||
| 372 | Tegra::MemoryManager& gpu_memory; | ||
| 373 | Core::Memory::Memory& cpu_memory; | 367 | Core::Memory::Memory& cpu_memory; |
| 374 | 368 | ||
| 375 | SlotVector<Buffer> slot_buffers; | 369 | SlotVector<Buffer> slot_buffers; |
| @@ -444,12 +438,8 @@ private: | |||
| 444 | 438 | ||
| 445 | template <class P> | 439 | template <class P> |
| 446 | BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, | 440 | BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, |
| 447 | Tegra::Engines::Maxwell3D& maxwell3d_, | 441 | Core::Memory::Memory& cpu_memory_, Runtime& runtime_) |
| 448 | Tegra::Engines::KeplerCompute& kepler_compute_, | 442 | : runtime{runtime_}, rasterizer{rasterizer_}, cpu_memory{cpu_memory_} { |
| 449 | Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, | ||
| 450 | Runtime& runtime_) | ||
| 451 | : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, | ||
| 452 | kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} { | ||
| 453 | // Ensure the first slot is used for the null buffer | 443 | // Ensure the first slot is used for the null buffer |
| 454 | void(slot_buffers.insert(runtime, NullBufferParams{})); | 444 | void(slot_buffers.insert(runtime, NullBufferParams{})); |
| 455 | common_ranges.clear(); | 445 | common_ranges.clear(); |
| @@ -552,8 +542,8 @@ void BufferCache<P>::ClearDownload(IntervalType subtract_interval) { | |||
| 552 | 542 | ||
| 553 | template <class P> | 543 | template <class P> |
| 554 | bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) { | 544 | bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) { |
| 555 | const std::optional<VAddr> cpu_src_address = gpu_memory.GpuToCpuAddress(src_address); | 545 | const std::optional<VAddr> cpu_src_address = gpu_memory->GpuToCpuAddress(src_address); |
| 556 | const std::optional<VAddr> cpu_dest_address = gpu_memory.GpuToCpuAddress(dest_address); | 546 | const std::optional<VAddr> cpu_dest_address = gpu_memory->GpuToCpuAddress(dest_address); |
| 557 | if (!cpu_src_address || !cpu_dest_address) { | 547 | if (!cpu_src_address || !cpu_dest_address) { |
| 558 | return false; | 548 | return false; |
| 559 | } | 549 | } |
| @@ -611,7 +601,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | |||
| 611 | 601 | ||
| 612 | template <class P> | 602 | template <class P> |
| 613 | bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) { | 603 | bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) { |
| 614 | const std::optional<VAddr> cpu_dst_address = gpu_memory.GpuToCpuAddress(dst_address); | 604 | const std::optional<VAddr> cpu_dst_address = gpu_memory->GpuToCpuAddress(dst_address); |
| 615 | if (!cpu_dst_address) { | 605 | if (!cpu_dst_address) { |
| 616 | return false; | 606 | return false; |
| 617 | } | 607 | } |
| @@ -635,7 +625,7 @@ bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) { | |||
| 635 | template <class P> | 625 | template <class P> |
| 636 | void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | 626 | void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, |
| 637 | u32 size) { | 627 | u32 size) { |
| 638 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | 628 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 639 | const Binding binding{ | 629 | const Binding binding{ |
| 640 | .cpu_addr = *cpu_addr, | 630 | .cpu_addr = *cpu_addr, |
| 641 | .size = size, | 631 | .size = size, |
| @@ -673,7 +663,7 @@ void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) { | |||
| 673 | if (is_indexed) { | 663 | if (is_indexed) { |
| 674 | BindHostIndexBuffer(); | 664 | BindHostIndexBuffer(); |
| 675 | } else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { | 665 | } else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { |
| 676 | const auto& regs = maxwell3d.regs; | 666 | const auto& regs = maxwell3d->regs; |
| 677 | if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) { | 667 | if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) { |
| 678 | runtime.BindQuadArrayIndexBuffer(regs.vertex_buffer.first, regs.vertex_buffer.count); | 668 | runtime.BindQuadArrayIndexBuffer(regs.vertex_buffer.first, regs.vertex_buffer.count); |
| 679 | } | 669 | } |
| @@ -733,9 +723,9 @@ void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, | |||
| 733 | enabled_storage_buffers[stage] |= 1U << ssbo_index; | 723 | enabled_storage_buffers[stage] |= 1U << ssbo_index; |
| 734 | written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index; | 724 | written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index; |
| 735 | 725 | ||
| 736 | const auto& cbufs = maxwell3d.state.shader_stages[stage]; | 726 | const auto& cbufs = maxwell3d->state.shader_stages[stage]; |
| 737 | const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset; | 727 | const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset; |
| 738 | storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr); | 728 | storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr, is_written); |
| 739 | } | 729 | } |
| 740 | 730 | ||
| 741 | template <class P> | 731 | template <class P> |
| @@ -770,12 +760,12 @@ void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, | |||
| 770 | enabled_compute_storage_buffers |= 1U << ssbo_index; | 760 | enabled_compute_storage_buffers |= 1U << ssbo_index; |
| 771 | written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index; | 761 | written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index; |
| 772 | 762 | ||
| 773 | const auto& launch_desc = kepler_compute.launch_description; | 763 | const auto& launch_desc = kepler_compute->launch_description; |
| 774 | ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0); | 764 | ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0); |
| 775 | 765 | ||
| 776 | const auto& cbufs = launch_desc.const_buffer_config; | 766 | const auto& cbufs = launch_desc.const_buffer_config; |
| 777 | const GPUVAddr ssbo_addr = cbufs[cbuf_index].Address() + cbuf_offset; | 767 | const GPUVAddr ssbo_addr = cbufs[cbuf_index].Address() + cbuf_offset; |
| 778 | compute_storage_buffers[ssbo_index] = StorageBufferBinding(ssbo_addr); | 768 | compute_storage_buffers[ssbo_index] = StorageBufferBinding(ssbo_addr, is_written); |
| 779 | } | 769 | } |
| 780 | 770 | ||
| 781 | template <class P> | 771 | template <class P> |
| @@ -836,6 +826,19 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 836 | const bool is_accuracy_normal = | 826 | const bool is_accuracy_normal = |
| 837 | Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal; | 827 | Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal; |
| 838 | 828 | ||
| 829 | auto it = committed_ranges.begin(); | ||
| 830 | while (it != committed_ranges.end()) { | ||
| 831 | auto& current_intervals = *it; | ||
| 832 | auto next_it = std::next(it); | ||
| 833 | while (next_it != committed_ranges.end()) { | ||
| 834 | for (auto& interval : *next_it) { | ||
| 835 | current_intervals.subtract(interval); | ||
| 836 | } | ||
| 837 | next_it++; | ||
| 838 | } | ||
| 839 | it++; | ||
| 840 | } | ||
| 841 | |||
| 839 | boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads; | 842 | boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads; |
| 840 | u64 total_size_bytes = 0; | 843 | u64 total_size_bytes = 0; |
| 841 | u64 largest_copy = 0; | 844 | u64 largest_copy = 0; |
| @@ -991,19 +994,19 @@ void BufferCache<P>::BindHostIndexBuffer() { | |||
| 991 | const u32 size = index_buffer.size; | 994 | const u32 size = index_buffer.size; |
| 992 | SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); | 995 | SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); |
| 993 | if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { | 996 | if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { |
| 994 | const u32 new_offset = offset + maxwell3d.regs.index_array.first * | 997 | const u32 new_offset = offset + maxwell3d->regs.index_array.first * |
| 995 | maxwell3d.regs.index_array.FormatSizeInBytes(); | 998 | maxwell3d->regs.index_array.FormatSizeInBytes(); |
| 996 | runtime.BindIndexBuffer(buffer, new_offset, size); | 999 | runtime.BindIndexBuffer(buffer, new_offset, size); |
| 997 | } else { | 1000 | } else { |
| 998 | runtime.BindIndexBuffer(maxwell3d.regs.draw.topology, maxwell3d.regs.index_array.format, | 1001 | runtime.BindIndexBuffer(maxwell3d->regs.draw.topology, maxwell3d->regs.index_array.format, |
| 999 | maxwell3d.regs.index_array.first, maxwell3d.regs.index_array.count, | 1002 | maxwell3d->regs.index_array.first, |
| 1000 | buffer, offset, size); | 1003 | maxwell3d->regs.index_array.count, buffer, offset, size); |
| 1001 | } | 1004 | } |
| 1002 | } | 1005 | } |
| 1003 | 1006 | ||
| 1004 | template <class P> | 1007 | template <class P> |
| 1005 | void BufferCache<P>::BindHostVertexBuffers() { | 1008 | void BufferCache<P>::BindHostVertexBuffers() { |
| 1006 | auto& flags = maxwell3d.dirty.flags; | 1009 | auto& flags = maxwell3d->dirty.flags; |
| 1007 | for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { | 1010 | for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { |
| 1008 | const Binding& binding = vertex_buffers[index]; | 1011 | const Binding& binding = vertex_buffers[index]; |
| 1009 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 1012 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| @@ -1014,7 +1017,7 @@ void BufferCache<P>::BindHostVertexBuffers() { | |||
| 1014 | } | 1017 | } |
| 1015 | flags[Dirty::VertexBuffer0 + index] = false; | 1018 | flags[Dirty::VertexBuffer0 + index] = false; |
| 1016 | 1019 | ||
| 1017 | const u32 stride = maxwell3d.regs.vertex_array[index].stride; | 1020 | const u32 stride = maxwell3d->regs.vertex_array[index].stride; |
| 1018 | const u32 offset = buffer.Offset(binding.cpu_addr); | 1021 | const u32 offset = buffer.Offset(binding.cpu_addr); |
| 1019 | runtime.BindVertexBuffer(index, buffer, offset, binding.size, stride); | 1022 | runtime.BindVertexBuffer(index, buffer, offset, binding.size, stride); |
| 1020 | } | 1023 | } |
| @@ -1154,7 +1157,7 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) { | |||
| 1154 | 1157 | ||
| 1155 | template <class P> | 1158 | template <class P> |
| 1156 | void BufferCache<P>::BindHostTransformFeedbackBuffers() { | 1159 | void BufferCache<P>::BindHostTransformFeedbackBuffers() { |
| 1157 | if (maxwell3d.regs.tfb_enabled == 0) { | 1160 | if (maxwell3d->regs.tfb_enabled == 0) { |
| 1158 | return; | 1161 | return; |
| 1159 | } | 1162 | } |
| 1160 | for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { | 1163 | for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { |
| @@ -1239,16 +1242,19 @@ void BufferCache<P>::BindHostComputeTextureBuffers() { | |||
| 1239 | 1242 | ||
| 1240 | template <class P> | 1243 | template <class P> |
| 1241 | void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) { | 1244 | void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) { |
| 1242 | if (is_indexed) { | 1245 | do { |
| 1243 | UpdateIndexBuffer(); | 1246 | has_deleted_buffers = false; |
| 1244 | } | 1247 | if (is_indexed) { |
| 1245 | UpdateVertexBuffers(); | 1248 | UpdateIndexBuffer(); |
| 1246 | UpdateTransformFeedbackBuffers(); | 1249 | } |
| 1247 | for (size_t stage = 0; stage < NUM_STAGES; ++stage) { | 1250 | UpdateVertexBuffers(); |
| 1248 | UpdateUniformBuffers(stage); | 1251 | UpdateTransformFeedbackBuffers(); |
| 1249 | UpdateStorageBuffers(stage); | 1252 | for (size_t stage = 0; stage < NUM_STAGES; ++stage) { |
| 1250 | UpdateTextureBuffers(stage); | 1253 | UpdateUniformBuffers(stage); |
| 1251 | } | 1254 | UpdateStorageBuffers(stage); |
| 1255 | UpdateTextureBuffers(stage); | ||
| 1256 | } | ||
| 1257 | } while (has_deleted_buffers); | ||
| 1252 | } | 1258 | } |
| 1253 | 1259 | ||
| 1254 | template <class P> | 1260 | template <class P> |
| @@ -1262,8 +1268,8 @@ template <class P> | |||
| 1262 | void BufferCache<P>::UpdateIndexBuffer() { | 1268 | void BufferCache<P>::UpdateIndexBuffer() { |
| 1263 | // We have to check for the dirty flags and index count | 1269 | // We have to check for the dirty flags and index count |
| 1264 | // The index count is currently changed without updating the dirty flags | 1270 | // The index count is currently changed without updating the dirty flags |
| 1265 | const auto& index_array = maxwell3d.regs.index_array; | 1271 | const auto& index_array = maxwell3d->regs.index_array; |
| 1266 | auto& flags = maxwell3d.dirty.flags; | 1272 | auto& flags = maxwell3d->dirty.flags; |
| 1267 | if (!flags[Dirty::IndexBuffer] && last_index_count == index_array.count) { | 1273 | if (!flags[Dirty::IndexBuffer] && last_index_count == index_array.count) { |
| 1268 | return; | 1274 | return; |
| 1269 | } | 1275 | } |
| @@ -1272,7 +1278,7 @@ void BufferCache<P>::UpdateIndexBuffer() { | |||
| 1272 | 1278 | ||
| 1273 | const GPUVAddr gpu_addr_begin = index_array.StartAddress(); | 1279 | const GPUVAddr gpu_addr_begin = index_array.StartAddress(); |
| 1274 | const GPUVAddr gpu_addr_end = index_array.EndAddress(); | 1280 | const GPUVAddr gpu_addr_end = index_array.EndAddress(); |
| 1275 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin); | 1281 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); |
| 1276 | const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); | 1282 | const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); |
| 1277 | const u32 draw_size = (index_array.count + index_array.first) * index_array.FormatSizeInBytes(); | 1283 | const u32 draw_size = (index_array.count + index_array.first) * index_array.FormatSizeInBytes(); |
| 1278 | const u32 size = std::min(address_size, draw_size); | 1284 | const u32 size = std::min(address_size, draw_size); |
| @@ -1289,8 +1295,8 @@ void BufferCache<P>::UpdateIndexBuffer() { | |||
| 1289 | 1295 | ||
| 1290 | template <class P> | 1296 | template <class P> |
| 1291 | void BufferCache<P>::UpdateVertexBuffers() { | 1297 | void BufferCache<P>::UpdateVertexBuffers() { |
| 1292 | auto& flags = maxwell3d.dirty.flags; | 1298 | auto& flags = maxwell3d->dirty.flags; |
| 1293 | if (!maxwell3d.dirty.flags[Dirty::VertexBuffers]) { | 1299 | if (!maxwell3d->dirty.flags[Dirty::VertexBuffers]) { |
| 1294 | return; | 1300 | return; |
| 1295 | } | 1301 | } |
| 1296 | flags[Dirty::VertexBuffers] = false; | 1302 | flags[Dirty::VertexBuffers] = false; |
| @@ -1302,33 +1308,25 @@ void BufferCache<P>::UpdateVertexBuffers() { | |||
| 1302 | 1308 | ||
| 1303 | template <class P> | 1309 | template <class P> |
| 1304 | void BufferCache<P>::UpdateVertexBuffer(u32 index) { | 1310 | void BufferCache<P>::UpdateVertexBuffer(u32 index) { |
| 1305 | if (!maxwell3d.dirty.flags[Dirty::VertexBuffer0 + index]) { | 1311 | if (!maxwell3d->dirty.flags[Dirty::VertexBuffer0 + index]) { |
| 1306 | return; | 1312 | return; |
| 1307 | } | 1313 | } |
| 1308 | const auto& array = maxwell3d.regs.vertex_array[index]; | 1314 | const auto& array = maxwell3d->regs.vertex_array[index]; |
| 1309 | const auto& limit = maxwell3d.regs.vertex_array_limit[index]; | 1315 | const auto& limit = maxwell3d->regs.vertex_array_limit[index]; |
| 1310 | const GPUVAddr gpu_addr_begin = array.StartAddress(); | 1316 | const GPUVAddr gpu_addr_begin = array.StartAddress(); |
| 1311 | const GPUVAddr gpu_addr_end = limit.LimitAddress() + 1; | 1317 | const GPUVAddr gpu_addr_end = limit.LimitAddress() + 1; |
| 1312 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin); | 1318 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); |
| 1313 | u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); | 1319 | u32 address_size = static_cast<u32>( |
| 1314 | if (address_size >= 64_MiB) { | 1320 | std::min(gpu_addr_end - gpu_addr_begin, static_cast<u64>(std::numeric_limits<u32>::max()))); |
| 1315 | // Reported vertex buffer size is very large, cap to mapped buffer size | 1321 | if (array.enable == 0 || address_size == 0 || !cpu_addr) { |
| 1316 | GPUVAddr submapped_addr_end = gpu_addr_begin; | ||
| 1317 | |||
| 1318 | const auto ranges{gpu_memory.GetSubmappedRange(gpu_addr_begin, address_size)}; | ||
| 1319 | if (ranges.size() > 0) { | ||
| 1320 | const auto& [addr, size] = *ranges.begin(); | ||
| 1321 | submapped_addr_end = addr + size; | ||
| 1322 | } | ||
| 1323 | |||
| 1324 | address_size = | ||
| 1325 | std::min(address_size, static_cast<u32>(submapped_addr_end - gpu_addr_begin)); | ||
| 1326 | } | ||
| 1327 | const u32 size = address_size; // TODO: Analyze stride and number of vertices | ||
| 1328 | if (array.enable == 0 || size == 0 || !cpu_addr) { | ||
| 1329 | vertex_buffers[index] = NULL_BINDING; | 1322 | vertex_buffers[index] = NULL_BINDING; |
| 1330 | return; | 1323 | return; |
| 1331 | } | 1324 | } |
| 1325 | if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) { | ||
| 1326 | address_size = | ||
| 1327 | static_cast<u32>(gpu_memory->MaxContinousRange(gpu_addr_begin, address_size)); | ||
| 1328 | } | ||
| 1329 | const u32 size = address_size; // TODO: Analyze stride and number of vertices | ||
| 1332 | vertex_buffers[index] = Binding{ | 1330 | vertex_buffers[index] = Binding{ |
| 1333 | .cpu_addr = *cpu_addr, | 1331 | .cpu_addr = *cpu_addr, |
| 1334 | .size = size, | 1332 | .size = size, |
| @@ -1382,7 +1380,7 @@ void BufferCache<P>::UpdateTextureBuffers(size_t stage) { | |||
| 1382 | 1380 | ||
| 1383 | template <class P> | 1381 | template <class P> |
| 1384 | void BufferCache<P>::UpdateTransformFeedbackBuffers() { | 1382 | void BufferCache<P>::UpdateTransformFeedbackBuffers() { |
| 1385 | if (maxwell3d.regs.tfb_enabled == 0) { | 1383 | if (maxwell3d->regs.tfb_enabled == 0) { |
| 1386 | return; | 1384 | return; |
| 1387 | } | 1385 | } |
| 1388 | for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { | 1386 | for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { |
| @@ -1392,10 +1390,10 @@ void BufferCache<P>::UpdateTransformFeedbackBuffers() { | |||
| 1392 | 1390 | ||
| 1393 | template <class P> | 1391 | template <class P> |
| 1394 | void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) { | 1392 | void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) { |
| 1395 | const auto& binding = maxwell3d.regs.tfb_bindings[index]; | 1393 | const auto& binding = maxwell3d->regs.tfb_bindings[index]; |
| 1396 | const GPUVAddr gpu_addr = binding.Address() + binding.buffer_offset; | 1394 | const GPUVAddr gpu_addr = binding.Address() + binding.buffer_offset; |
| 1397 | const u32 size = binding.buffer_size; | 1395 | const u32 size = binding.buffer_size; |
| 1398 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | 1396 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 1399 | if (binding.buffer_enable == 0 || size == 0 || !cpu_addr) { | 1397 | if (binding.buffer_enable == 0 || size == 0 || !cpu_addr) { |
| 1400 | transform_feedback_buffers[index] = NULL_BINDING; | 1398 | transform_feedback_buffers[index] = NULL_BINDING; |
| 1401 | return; | 1399 | return; |
| @@ -1414,10 +1412,10 @@ void BufferCache<P>::UpdateComputeUniformBuffers() { | |||
| 1414 | ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { | 1412 | ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { |
| 1415 | Binding& binding = compute_uniform_buffers[index]; | 1413 | Binding& binding = compute_uniform_buffers[index]; |
| 1416 | binding = NULL_BINDING; | 1414 | binding = NULL_BINDING; |
| 1417 | const auto& launch_desc = kepler_compute.launch_description; | 1415 | const auto& launch_desc = kepler_compute->launch_description; |
| 1418 | if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) { | 1416 | if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) { |
| 1419 | const auto& cbuf = launch_desc.const_buffer_config[index]; | 1417 | const auto& cbuf = launch_desc.const_buffer_config[index]; |
| 1420 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(cbuf.Address()); | 1418 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(cbuf.Address()); |
| 1421 | if (cpu_addr) { | 1419 | if (cpu_addr) { |
| 1422 | binding.cpu_addr = *cpu_addr; | 1420 | binding.cpu_addr = *cpu_addr; |
| 1423 | binding.size = cbuf.size; | 1421 | binding.size = cbuf.size; |
| @@ -1567,6 +1565,8 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) { | |||
| 1567 | const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); | 1565 | const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); |
| 1568 | const u32 size = static_cast<u32>(overlap.end - overlap.begin); | 1566 | const u32 size = static_cast<u32>(overlap.end - overlap.begin); |
| 1569 | const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); | 1567 | const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); |
| 1568 | auto& new_buffer = slot_buffers[new_buffer_id]; | ||
| 1569 | runtime.ClearBuffer(new_buffer, 0, new_buffer.SizeBytes(), 0); | ||
| 1570 | for (const BufferId overlap_id : overlap.ids) { | 1570 | for (const BufferId overlap_id : overlap.ids) { |
| 1571 | JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); | 1571 | JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); |
| 1572 | } | 1572 | } |
| @@ -1695,7 +1695,7 @@ void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, | |||
| 1695 | 1695 | ||
| 1696 | template <class P> | 1696 | template <class P> |
| 1697 | bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size, | 1697 | bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size, |
| 1698 | std::span<u8> inlined_buffer) { | 1698 | std::span<const u8> inlined_buffer) { |
| 1699 | const bool is_dirty = IsRegionRegistered(dest_address, copy_size); | 1699 | const bool is_dirty = IsRegionRegistered(dest_address, copy_size); |
| 1700 | if (!is_dirty) { | 1700 | if (!is_dirty) { |
| 1701 | return false; | 1701 | return false; |
| @@ -1831,7 +1831,7 @@ void BufferCache<P>::NotifyBufferDeletion() { | |||
| 1831 | dirty_uniform_buffers.fill(~u32{0}); | 1831 | dirty_uniform_buffers.fill(~u32{0}); |
| 1832 | uniform_buffer_binding_sizes.fill({}); | 1832 | uniform_buffer_binding_sizes.fill({}); |
| 1833 | } | 1833 | } |
| 1834 | auto& flags = maxwell3d.dirty.flags; | 1834 | auto& flags = maxwell3d->dirty.flags; |
| 1835 | flags[Dirty::IndexBuffer] = true; | 1835 | flags[Dirty::IndexBuffer] = true; |
| 1836 | flags[Dirty::VertexBuffers] = true; | 1836 | flags[Dirty::VertexBuffers] = true; |
| 1837 | for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { | 1837 | for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { |
| @@ -1841,16 +1841,18 @@ void BufferCache<P>::NotifyBufferDeletion() { | |||
| 1841 | } | 1841 | } |
| 1842 | 1842 | ||
| 1843 | template <class P> | 1843 | template <class P> |
| 1844 | typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr) const { | 1844 | typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, |
| 1845 | const GPUVAddr gpu_addr = gpu_memory.Read<u64>(ssbo_addr); | 1845 | bool is_written) const { |
| 1846 | const u32 size = gpu_memory.Read<u32>(ssbo_addr + 8); | 1846 | const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr); |
| 1847 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | 1847 | const u32 size = gpu_memory->Read<u32>(ssbo_addr + 8); |
| 1848 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | ||
| 1848 | if (!cpu_addr || size == 0) { | 1849 | if (!cpu_addr || size == 0) { |
| 1849 | return NULL_BINDING; | 1850 | return NULL_BINDING; |
| 1850 | } | 1851 | } |
| 1852 | const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE); | ||
| 1851 | const Binding binding{ | 1853 | const Binding binding{ |
| 1852 | .cpu_addr = *cpu_addr, | 1854 | .cpu_addr = *cpu_addr, |
| 1853 | .size = size, | 1855 | .size = is_written ? size : static_cast<u32>(cpu_end - *cpu_addr), |
| 1854 | .buffer_id = BufferId{}, | 1856 | .buffer_id = BufferId{}, |
| 1855 | }; | 1857 | }; |
| 1856 | return binding; | 1858 | return binding; |
| @@ -1859,7 +1861,7 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s | |||
| 1859 | template <class P> | 1861 | template <class P> |
| 1860 | typename BufferCache<P>::TextureBufferBinding BufferCache<P>::GetTextureBufferBinding( | 1862 | typename BufferCache<P>::TextureBufferBinding BufferCache<P>::GetTextureBufferBinding( |
| 1861 | GPUVAddr gpu_addr, u32 size, PixelFormat format) { | 1863 | GPUVAddr gpu_addr, u32 size, PixelFormat format) { |
| 1862 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | 1864 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 1863 | TextureBufferBinding binding; | 1865 | TextureBufferBinding binding; |
| 1864 | if (!cpu_addr || size == 0) { | 1866 | if (!cpu_addr || size == 0) { |
| 1865 | binding.cpu_addr = 0; | 1867 | binding.cpu_addr = 0; |