summaryrefslogtreecommitdiff
path: root/src/video_core/buffer_cache
diff options
context:
space:
mode:
authorGravatar Fernando S2022-10-06 21:29:53 +0200
committerGravatar GitHub2022-10-06 21:29:53 +0200
commit1effa578f12f79d7816e3543291f302f126cc1d2 (patch)
tree14803b31b6817294d40d57446f6fa94c5ff3fe9a /src/video_core/buffer_cache
parentMerge pull request #9025 from FernandoS27/slava-ukrayini (diff)
parentvulkan_blitter: Fix pool allocation double free. (diff)
downloadyuzu-1effa578f12f79d7816e3543291f302f126cc1d2.tar.gz
yuzu-1effa578f12f79d7816e3543291f302f126cc1d2.tar.xz
yuzu-1effa578f12f79d7816e3543291f302f126cc1d2.zip
Merge pull request #8467 from FernandoS27/yfc-rel-1
Project yuzu Fried Chicken (Y.F.C.) Part 1
Diffstat (limited to 'src/video_core/buffer_cache')
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h166
1 files changed, 84 insertions, 82 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index f015dae56..8e26b3f95 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -5,7 +5,6 @@
5 5
6#include <algorithm> 6#include <algorithm>
7#include <array> 7#include <array>
8#include <deque>
9#include <memory> 8#include <memory>
10#include <mutex> 9#include <mutex>
11#include <numeric> 10#include <numeric>
@@ -23,6 +22,7 @@
23#include "common/settings.h" 22#include "common/settings.h"
24#include "core/memory.h" 23#include "core/memory.h"
25#include "video_core/buffer_cache/buffer_base.h" 24#include "video_core/buffer_cache/buffer_base.h"
25#include "video_core/control/channel_state_cache.h"
26#include "video_core/delayed_destruction_ring.h" 26#include "video_core/delayed_destruction_ring.h"
27#include "video_core/dirty_flags.h" 27#include "video_core/dirty_flags.h"
28#include "video_core/engines/kepler_compute.h" 28#include "video_core/engines/kepler_compute.h"
@@ -56,7 +56,7 @@ using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFE
56using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>; 56using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>;
57 57
58template <typename P> 58template <typename P>
59class BufferCache { 59class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
60 60
61 // Page size for caching purposes. 61 // Page size for caching purposes.
62 // This is unrelated to the CPU page size and it can be changed as it seems optimal. 62 // This is unrelated to the CPU page size and it can be changed as it seems optimal.
@@ -116,10 +116,7 @@ public:
116 static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB); 116 static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB);
117 117
118 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, 118 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
119 Tegra::Engines::Maxwell3D& maxwell3d_, 119 Core::Memory::Memory& cpu_memory_, Runtime& runtime_);
120 Tegra::Engines::KeplerCompute& kepler_compute_,
121 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
122 Runtime& runtime_);
123 120
124 void TickFrame(); 121 void TickFrame();
125 122
@@ -129,7 +126,7 @@ public:
129 126
130 void DownloadMemory(VAddr cpu_addr, u64 size); 127 void DownloadMemory(VAddr cpu_addr, u64 size);
131 128
132 bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<u8> inlined_buffer); 129 bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer);
133 130
134 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size); 131 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size);
135 132
@@ -353,7 +350,7 @@ private:
353 350
354 void NotifyBufferDeletion(); 351 void NotifyBufferDeletion();
355 352
356 [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr) const; 353 [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr, bool is_written = false) const;
357 354
358 [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, 355 [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size,
359 PixelFormat format); 356 PixelFormat format);
@@ -367,9 +364,6 @@ private:
367 void ClearDownload(IntervalType subtract_interval); 364 void ClearDownload(IntervalType subtract_interval);
368 365
369 VideoCore::RasterizerInterface& rasterizer; 366 VideoCore::RasterizerInterface& rasterizer;
370 Tegra::Engines::Maxwell3D& maxwell3d;
371 Tegra::Engines::KeplerCompute& kepler_compute;
372 Tegra::MemoryManager& gpu_memory;
373 Core::Memory::Memory& cpu_memory; 367 Core::Memory::Memory& cpu_memory;
374 368
375 SlotVector<Buffer> slot_buffers; 369 SlotVector<Buffer> slot_buffers;
@@ -444,12 +438,8 @@ private:
444 438
445template <class P> 439template <class P>
446BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, 440BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
447 Tegra::Engines::Maxwell3D& maxwell3d_, 441 Core::Memory::Memory& cpu_memory_, Runtime& runtime_)
448 Tegra::Engines::KeplerCompute& kepler_compute_, 442 : runtime{runtime_}, rasterizer{rasterizer_}, cpu_memory{cpu_memory_} {
449 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
450 Runtime& runtime_)
451 : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
452 kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} {
453 // Ensure the first slot is used for the null buffer 443 // Ensure the first slot is used for the null buffer
454 void(slot_buffers.insert(runtime, NullBufferParams{})); 444 void(slot_buffers.insert(runtime, NullBufferParams{}));
455 common_ranges.clear(); 445 common_ranges.clear();
@@ -552,8 +542,8 @@ void BufferCache<P>::ClearDownload(IntervalType subtract_interval) {
552 542
553template <class P> 543template <class P>
554bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) { 544bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) {
555 const std::optional<VAddr> cpu_src_address = gpu_memory.GpuToCpuAddress(src_address); 545 const std::optional<VAddr> cpu_src_address = gpu_memory->GpuToCpuAddress(src_address);
556 const std::optional<VAddr> cpu_dest_address = gpu_memory.GpuToCpuAddress(dest_address); 546 const std::optional<VAddr> cpu_dest_address = gpu_memory->GpuToCpuAddress(dest_address);
557 if (!cpu_src_address || !cpu_dest_address) { 547 if (!cpu_src_address || !cpu_dest_address) {
558 return false; 548 return false;
559 } 549 }
@@ -611,7 +601,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
611 601
612template <class P> 602template <class P>
613bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) { 603bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
614 const std::optional<VAddr> cpu_dst_address = gpu_memory.GpuToCpuAddress(dst_address); 604 const std::optional<VAddr> cpu_dst_address = gpu_memory->GpuToCpuAddress(dst_address);
615 if (!cpu_dst_address) { 605 if (!cpu_dst_address) {
616 return false; 606 return false;
617 } 607 }
@@ -635,7 +625,7 @@ bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
635template <class P> 625template <class P>
636void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, 626void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
637 u32 size) { 627 u32 size) {
638 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 628 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
639 const Binding binding{ 629 const Binding binding{
640 .cpu_addr = *cpu_addr, 630 .cpu_addr = *cpu_addr,
641 .size = size, 631 .size = size,
@@ -673,7 +663,7 @@ void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) {
673 if (is_indexed) { 663 if (is_indexed) {
674 BindHostIndexBuffer(); 664 BindHostIndexBuffer();
675 } else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { 665 } else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
676 const auto& regs = maxwell3d.regs; 666 const auto& regs = maxwell3d->regs;
677 if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) { 667 if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) {
678 runtime.BindQuadArrayIndexBuffer(regs.vertex_buffer.first, regs.vertex_buffer.count); 668 runtime.BindQuadArrayIndexBuffer(regs.vertex_buffer.first, regs.vertex_buffer.count);
679 } 669 }
@@ -733,9 +723,9 @@ void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index,
733 enabled_storage_buffers[stage] |= 1U << ssbo_index; 723 enabled_storage_buffers[stage] |= 1U << ssbo_index;
734 written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index; 724 written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index;
735 725
736 const auto& cbufs = maxwell3d.state.shader_stages[stage]; 726 const auto& cbufs = maxwell3d->state.shader_stages[stage];
737 const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset; 727 const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset;
738 storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr); 728 storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr, is_written);
739} 729}
740 730
741template <class P> 731template <class P>
@@ -770,12 +760,12 @@ void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index,
770 enabled_compute_storage_buffers |= 1U << ssbo_index; 760 enabled_compute_storage_buffers |= 1U << ssbo_index;
771 written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index; 761 written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index;
772 762
773 const auto& launch_desc = kepler_compute.launch_description; 763 const auto& launch_desc = kepler_compute->launch_description;
774 ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0); 764 ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0);
775 765
776 const auto& cbufs = launch_desc.const_buffer_config; 766 const auto& cbufs = launch_desc.const_buffer_config;
777 const GPUVAddr ssbo_addr = cbufs[cbuf_index].Address() + cbuf_offset; 767 const GPUVAddr ssbo_addr = cbufs[cbuf_index].Address() + cbuf_offset;
778 compute_storage_buffers[ssbo_index] = StorageBufferBinding(ssbo_addr); 768 compute_storage_buffers[ssbo_index] = StorageBufferBinding(ssbo_addr, is_written);
779} 769}
780 770
781template <class P> 771template <class P>
@@ -836,6 +826,19 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
836 const bool is_accuracy_normal = 826 const bool is_accuracy_normal =
837 Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal; 827 Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal;
838 828
829 auto it = committed_ranges.begin();
830 while (it != committed_ranges.end()) {
831 auto& current_intervals = *it;
832 auto next_it = std::next(it);
833 while (next_it != committed_ranges.end()) {
834 for (auto& interval : *next_it) {
835 current_intervals.subtract(interval);
836 }
837 next_it++;
838 }
839 it++;
840 }
841
839 boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads; 842 boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads;
840 u64 total_size_bytes = 0; 843 u64 total_size_bytes = 0;
841 u64 largest_copy = 0; 844 u64 largest_copy = 0;
@@ -991,19 +994,19 @@ void BufferCache<P>::BindHostIndexBuffer() {
991 const u32 size = index_buffer.size; 994 const u32 size = index_buffer.size;
992 SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); 995 SynchronizeBuffer(buffer, index_buffer.cpu_addr, size);
993 if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { 996 if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
994 const u32 new_offset = offset + maxwell3d.regs.index_array.first * 997 const u32 new_offset = offset + maxwell3d->regs.index_array.first *
995 maxwell3d.regs.index_array.FormatSizeInBytes(); 998 maxwell3d->regs.index_array.FormatSizeInBytes();
996 runtime.BindIndexBuffer(buffer, new_offset, size); 999 runtime.BindIndexBuffer(buffer, new_offset, size);
997 } else { 1000 } else {
998 runtime.BindIndexBuffer(maxwell3d.regs.draw.topology, maxwell3d.regs.index_array.format, 1001 runtime.BindIndexBuffer(maxwell3d->regs.draw.topology, maxwell3d->regs.index_array.format,
999 maxwell3d.regs.index_array.first, maxwell3d.regs.index_array.count, 1002 maxwell3d->regs.index_array.first,
1000 buffer, offset, size); 1003 maxwell3d->regs.index_array.count, buffer, offset, size);
1001 } 1004 }
1002} 1005}
1003 1006
1004template <class P> 1007template <class P>
1005void BufferCache<P>::BindHostVertexBuffers() { 1008void BufferCache<P>::BindHostVertexBuffers() {
1006 auto& flags = maxwell3d.dirty.flags; 1009 auto& flags = maxwell3d->dirty.flags;
1007 for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { 1010 for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
1008 const Binding& binding = vertex_buffers[index]; 1011 const Binding& binding = vertex_buffers[index];
1009 Buffer& buffer = slot_buffers[binding.buffer_id]; 1012 Buffer& buffer = slot_buffers[binding.buffer_id];
@@ -1014,7 +1017,7 @@ void BufferCache<P>::BindHostVertexBuffers() {
1014 } 1017 }
1015 flags[Dirty::VertexBuffer0 + index] = false; 1018 flags[Dirty::VertexBuffer0 + index] = false;
1016 1019
1017 const u32 stride = maxwell3d.regs.vertex_array[index].stride; 1020 const u32 stride = maxwell3d->regs.vertex_array[index].stride;
1018 const u32 offset = buffer.Offset(binding.cpu_addr); 1021 const u32 offset = buffer.Offset(binding.cpu_addr);
1019 runtime.BindVertexBuffer(index, buffer, offset, binding.size, stride); 1022 runtime.BindVertexBuffer(index, buffer, offset, binding.size, stride);
1020 } 1023 }
@@ -1154,7 +1157,7 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
1154 1157
1155template <class P> 1158template <class P>
1156void BufferCache<P>::BindHostTransformFeedbackBuffers() { 1159void BufferCache<P>::BindHostTransformFeedbackBuffers() {
1157 if (maxwell3d.regs.tfb_enabled == 0) { 1160 if (maxwell3d->regs.tfb_enabled == 0) {
1158 return; 1161 return;
1159 } 1162 }
1160 for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { 1163 for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
@@ -1239,16 +1242,19 @@ void BufferCache<P>::BindHostComputeTextureBuffers() {
1239 1242
1240template <class P> 1243template <class P>
1241void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) { 1244void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) {
1242 if (is_indexed) { 1245 do {
1243 UpdateIndexBuffer(); 1246 has_deleted_buffers = false;
1244 } 1247 if (is_indexed) {
1245 UpdateVertexBuffers(); 1248 UpdateIndexBuffer();
1246 UpdateTransformFeedbackBuffers(); 1249 }
1247 for (size_t stage = 0; stage < NUM_STAGES; ++stage) { 1250 UpdateVertexBuffers();
1248 UpdateUniformBuffers(stage); 1251 UpdateTransformFeedbackBuffers();
1249 UpdateStorageBuffers(stage); 1252 for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
1250 UpdateTextureBuffers(stage); 1253 UpdateUniformBuffers(stage);
1251 } 1254 UpdateStorageBuffers(stage);
1255 UpdateTextureBuffers(stage);
1256 }
1257 } while (has_deleted_buffers);
1252} 1258}
1253 1259
1254template <class P> 1260template <class P>
@@ -1262,8 +1268,8 @@ template <class P>
1262void BufferCache<P>::UpdateIndexBuffer() { 1268void BufferCache<P>::UpdateIndexBuffer() {
1263 // We have to check for the dirty flags and index count 1269 // We have to check for the dirty flags and index count
1264 // The index count is currently changed without updating the dirty flags 1270 // The index count is currently changed without updating the dirty flags
1265 const auto& index_array = maxwell3d.regs.index_array; 1271 const auto& index_array = maxwell3d->regs.index_array;
1266 auto& flags = maxwell3d.dirty.flags; 1272 auto& flags = maxwell3d->dirty.flags;
1267 if (!flags[Dirty::IndexBuffer] && last_index_count == index_array.count) { 1273 if (!flags[Dirty::IndexBuffer] && last_index_count == index_array.count) {
1268 return; 1274 return;
1269 } 1275 }
@@ -1272,7 +1278,7 @@ void BufferCache<P>::UpdateIndexBuffer() {
1272 1278
1273 const GPUVAddr gpu_addr_begin = index_array.StartAddress(); 1279 const GPUVAddr gpu_addr_begin = index_array.StartAddress();
1274 const GPUVAddr gpu_addr_end = index_array.EndAddress(); 1280 const GPUVAddr gpu_addr_end = index_array.EndAddress();
1275 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin); 1281 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin);
1276 const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); 1282 const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
1277 const u32 draw_size = (index_array.count + index_array.first) * index_array.FormatSizeInBytes(); 1283 const u32 draw_size = (index_array.count + index_array.first) * index_array.FormatSizeInBytes();
1278 const u32 size = std::min(address_size, draw_size); 1284 const u32 size = std::min(address_size, draw_size);
@@ -1289,8 +1295,8 @@ void BufferCache<P>::UpdateIndexBuffer() {
1289 1295
1290template <class P> 1296template <class P>
1291void BufferCache<P>::UpdateVertexBuffers() { 1297void BufferCache<P>::UpdateVertexBuffers() {
1292 auto& flags = maxwell3d.dirty.flags; 1298 auto& flags = maxwell3d->dirty.flags;
1293 if (!maxwell3d.dirty.flags[Dirty::VertexBuffers]) { 1299 if (!maxwell3d->dirty.flags[Dirty::VertexBuffers]) {
1294 return; 1300 return;
1295 } 1301 }
1296 flags[Dirty::VertexBuffers] = false; 1302 flags[Dirty::VertexBuffers] = false;
@@ -1302,33 +1308,25 @@ void BufferCache<P>::UpdateVertexBuffers() {
1302 1308
1303template <class P> 1309template <class P>
1304void BufferCache<P>::UpdateVertexBuffer(u32 index) { 1310void BufferCache<P>::UpdateVertexBuffer(u32 index) {
1305 if (!maxwell3d.dirty.flags[Dirty::VertexBuffer0 + index]) { 1311 if (!maxwell3d->dirty.flags[Dirty::VertexBuffer0 + index]) {
1306 return; 1312 return;
1307 } 1313 }
1308 const auto& array = maxwell3d.regs.vertex_array[index]; 1314 const auto& array = maxwell3d->regs.vertex_array[index];
1309 const auto& limit = maxwell3d.regs.vertex_array_limit[index]; 1315 const auto& limit = maxwell3d->regs.vertex_array_limit[index];
1310 const GPUVAddr gpu_addr_begin = array.StartAddress(); 1316 const GPUVAddr gpu_addr_begin = array.StartAddress();
1311 const GPUVAddr gpu_addr_end = limit.LimitAddress() + 1; 1317 const GPUVAddr gpu_addr_end = limit.LimitAddress() + 1;
1312 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin); 1318 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin);
1313 u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); 1319 u32 address_size = static_cast<u32>(
1314 if (address_size >= 64_MiB) { 1320 std::min(gpu_addr_end - gpu_addr_begin, static_cast<u64>(std::numeric_limits<u32>::max())));
1315 // Reported vertex buffer size is very large, cap to mapped buffer size 1321 if (array.enable == 0 || address_size == 0 || !cpu_addr) {
1316 GPUVAddr submapped_addr_end = gpu_addr_begin;
1317
1318 const auto ranges{gpu_memory.GetSubmappedRange(gpu_addr_begin, address_size)};
1319 if (ranges.size() > 0) {
1320 const auto& [addr, size] = *ranges.begin();
1321 submapped_addr_end = addr + size;
1322 }
1323
1324 address_size =
1325 std::min(address_size, static_cast<u32>(submapped_addr_end - gpu_addr_begin));
1326 }
1327 const u32 size = address_size; // TODO: Analyze stride and number of vertices
1328 if (array.enable == 0 || size == 0 || !cpu_addr) {
1329 vertex_buffers[index] = NULL_BINDING; 1322 vertex_buffers[index] = NULL_BINDING;
1330 return; 1323 return;
1331 } 1324 }
1325 if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) {
1326 address_size =
1327 static_cast<u32>(gpu_memory->MaxContinousRange(gpu_addr_begin, address_size));
1328 }
1329 const u32 size = address_size; // TODO: Analyze stride and number of vertices
1332 vertex_buffers[index] = Binding{ 1330 vertex_buffers[index] = Binding{
1333 .cpu_addr = *cpu_addr, 1331 .cpu_addr = *cpu_addr,
1334 .size = size, 1332 .size = size,
@@ -1382,7 +1380,7 @@ void BufferCache<P>::UpdateTextureBuffers(size_t stage) {
1382 1380
1383template <class P> 1381template <class P>
1384void BufferCache<P>::UpdateTransformFeedbackBuffers() { 1382void BufferCache<P>::UpdateTransformFeedbackBuffers() {
1385 if (maxwell3d.regs.tfb_enabled == 0) { 1383 if (maxwell3d->regs.tfb_enabled == 0) {
1386 return; 1384 return;
1387 } 1385 }
1388 for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { 1386 for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
@@ -1392,10 +1390,10 @@ void BufferCache<P>::UpdateTransformFeedbackBuffers() {
1392 1390
1393template <class P> 1391template <class P>
1394void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) { 1392void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) {
1395 const auto& binding = maxwell3d.regs.tfb_bindings[index]; 1393 const auto& binding = maxwell3d->regs.tfb_bindings[index];
1396 const GPUVAddr gpu_addr = binding.Address() + binding.buffer_offset; 1394 const GPUVAddr gpu_addr = binding.Address() + binding.buffer_offset;
1397 const u32 size = binding.buffer_size; 1395 const u32 size = binding.buffer_size;
1398 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 1396 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
1399 if (binding.buffer_enable == 0 || size == 0 || !cpu_addr) { 1397 if (binding.buffer_enable == 0 || size == 0 || !cpu_addr) {
1400 transform_feedback_buffers[index] = NULL_BINDING; 1398 transform_feedback_buffers[index] = NULL_BINDING;
1401 return; 1399 return;
@@ -1414,10 +1412,10 @@ void BufferCache<P>::UpdateComputeUniformBuffers() {
1414 ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { 1412 ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) {
1415 Binding& binding = compute_uniform_buffers[index]; 1413 Binding& binding = compute_uniform_buffers[index];
1416 binding = NULL_BINDING; 1414 binding = NULL_BINDING;
1417 const auto& launch_desc = kepler_compute.launch_description; 1415 const auto& launch_desc = kepler_compute->launch_description;
1418 if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) { 1416 if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) {
1419 const auto& cbuf = launch_desc.const_buffer_config[index]; 1417 const auto& cbuf = launch_desc.const_buffer_config[index];
1420 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(cbuf.Address()); 1418 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(cbuf.Address());
1421 if (cpu_addr) { 1419 if (cpu_addr) {
1422 binding.cpu_addr = *cpu_addr; 1420 binding.cpu_addr = *cpu_addr;
1423 binding.size = cbuf.size; 1421 binding.size = cbuf.size;
@@ -1567,6 +1565,8 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) {
1567 const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); 1565 const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size);
1568 const u32 size = static_cast<u32>(overlap.end - overlap.begin); 1566 const u32 size = static_cast<u32>(overlap.end - overlap.begin);
1569 const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); 1567 const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size);
1568 auto& new_buffer = slot_buffers[new_buffer_id];
1569 runtime.ClearBuffer(new_buffer, 0, new_buffer.SizeBytes(), 0);
1570 for (const BufferId overlap_id : overlap.ids) { 1570 for (const BufferId overlap_id : overlap.ids) {
1571 JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); 1571 JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
1572 } 1572 }
@@ -1695,7 +1695,7 @@ void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes,
1695 1695
1696template <class P> 1696template <class P>
1697bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size, 1697bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
1698 std::span<u8> inlined_buffer) { 1698 std::span<const u8> inlined_buffer) {
1699 const bool is_dirty = IsRegionRegistered(dest_address, copy_size); 1699 const bool is_dirty = IsRegionRegistered(dest_address, copy_size);
1700 if (!is_dirty) { 1700 if (!is_dirty) {
1701 return false; 1701 return false;
@@ -1831,7 +1831,7 @@ void BufferCache<P>::NotifyBufferDeletion() {
1831 dirty_uniform_buffers.fill(~u32{0}); 1831 dirty_uniform_buffers.fill(~u32{0});
1832 uniform_buffer_binding_sizes.fill({}); 1832 uniform_buffer_binding_sizes.fill({});
1833 } 1833 }
1834 auto& flags = maxwell3d.dirty.flags; 1834 auto& flags = maxwell3d->dirty.flags;
1835 flags[Dirty::IndexBuffer] = true; 1835 flags[Dirty::IndexBuffer] = true;
1836 flags[Dirty::VertexBuffers] = true; 1836 flags[Dirty::VertexBuffers] = true;
1837 for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { 1837 for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
@@ -1841,16 +1841,18 @@ void BufferCache<P>::NotifyBufferDeletion() {
1841} 1841}
1842 1842
1843template <class P> 1843template <class P>
1844typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr) const { 1844typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr,
1845 const GPUVAddr gpu_addr = gpu_memory.Read<u64>(ssbo_addr); 1845 bool is_written) const {
1846 const u32 size = gpu_memory.Read<u32>(ssbo_addr + 8); 1846 const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr);
1847 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 1847 const u32 size = gpu_memory->Read<u32>(ssbo_addr + 8);
1848 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
1848 if (!cpu_addr || size == 0) { 1849 if (!cpu_addr || size == 0) {
1849 return NULL_BINDING; 1850 return NULL_BINDING;
1850 } 1851 }
1852 const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE);
1851 const Binding binding{ 1853 const Binding binding{
1852 .cpu_addr = *cpu_addr, 1854 .cpu_addr = *cpu_addr,
1853 .size = size, 1855 .size = is_written ? size : static_cast<u32>(cpu_end - *cpu_addr),
1854 .buffer_id = BufferId{}, 1856 .buffer_id = BufferId{},
1855 }; 1857 };
1856 return binding; 1858 return binding;
@@ -1859,7 +1861,7 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s
1859template <class P> 1861template <class P>
1860typename BufferCache<P>::TextureBufferBinding BufferCache<P>::GetTextureBufferBinding( 1862typename BufferCache<P>::TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(
1861 GPUVAddr gpu_addr, u32 size, PixelFormat format) { 1863 GPUVAddr gpu_addr, u32 size, PixelFormat format) {
1862 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 1864 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
1863 TextureBufferBinding binding; 1865 TextureBufferBinding binding;
1864 if (!cpu_addr || size == 0) { 1866 if (!cpu_addr || size == 0) {
1865 binding.cpu_addr = 0; 1867 binding.cpu_addr = 0;